2 * BIRD -- The Border Gateway Protocol
4 * (c) 2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
13 #include "nest/route.h"
21 struct proto_config c
;
22 u32 local_as
, remote_as
;
24 ip_addr source_addr
; /* Source address to use */
25 struct iface
*iface
; /* Interface for link-local addresses */
26 u16 remote_port
; /* Neighbor destination port */
27 int multihop
; /* Number of hops if multihop */
28 int ttl_security
; /* Enable TTL security [RFC5082] */
29 int next_hop_self
; /* Always set next hop to local IP address */
30 int next_hop_keep
; /* Do not touch next hop attribute */
31 int missing_lladdr
; /* What we will do when we don' know link-local addr, see MLL_* */
32 int gw_mode
; /* How we compute route gateway from next_hop attr, see GW_* */
33 int compare_path_lengths
; /* Use path lengths when selecting best route */
34 int med_metric
; /* Compare MULTI_EXIT_DISC even between routes from differen ASes */
35 int igp_metric
; /* Use IGP metrics when selecting best route */
36 int prefer_older
; /* Prefer older routes according to RFC 5004 */
37 int deterministic_med
; /* Use more complicated algo to have strict RFC 4271 MED comparison */
38 u32 default_local_pref
; /* Default value for LOCAL_PREF attribute */
39 u32 default_med
; /* Default value for MULTI_EXIT_DISC attribute */
40 int capabilities
; /* Enable capability handshake [RFC3392] */
41 int enable_refresh
; /* Enable local support for route refresh [RFC2918] */
42 int enable_as4
; /* Enable local support for 4B AS numbers [RFC4893] */
43 int enable_extended_messages
; /* Enable local support for extended messages [draft] */
44 u32 rr_cluster_id
; /* Route reflector cluster ID, if different from local ID */
45 int rr_client
; /* Whether neighbor is RR client of me */
46 int rs_client
; /* Whether neighbor is RS client of me */
47 int advertise_ipv4
; /* Whether we should add IPv4 capability advertisement to OPEN message */
48 int passive
; /* Do not initiate outgoing connection */
49 int interpret_communities
; /* Hardwired handling of well-known communities */
50 int secondary
; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
51 int add_path
; /* Use ADD-PATH extension [RFC7911] */
52 int allow_local_as
; /* Allow that number of local ASNs in incoming AS_PATHs */
53 int gr_mode
; /* Graceful restart mode (BGP_GR_*) */
54 int setkey
; /* Set MD5 password to system SA/SP database */
55 unsigned gr_time
; /* Graceful restart timeout */
56 unsigned connect_delay_time
; /* Minimum delay between connect attempts */
57 unsigned connect_retry_time
; /* Timeout for connect attempts */
58 unsigned hold_time
, initial_hold_time
;
59 unsigned keepalive_time
;
60 unsigned error_amnesia_time
; /* Errors are forgotten after */
61 unsigned error_delay_time_min
; /* Time to wait after an error is detected */
62 unsigned error_delay_time_max
;
63 unsigned disable_after_error
; /* Disable the protocol when error is detected */
65 char *password
; /* Password used for MD5 authentication */
66 struct rtable_config
*igp_table
; /* Table used for recursive next hop lookups */
67 int check_link
; /* Use iface link state for liveness detection */
68 int bfd
; /* Use BFD for liveness detection */
76 #define GW_RECURSIVE 2
80 #define ADD_PATH_FULL 3
83 #define BGP_GR_AWARE 2
85 /* For peer_gr_flags */
86 #define BGP_GRF_RESTART 0x80
88 /* For peer_gr_aflags */
89 #define BGP_GRF_FORWARDING 0x80
93 struct bgp_proto
*bgp
;
95 uint state
; /* State of connection state machine */
96 struct timer
*connect_retry_timer
;
97 struct timer
*hold_timer
;
98 struct timer
*keepalive_timer
;
100 int packets_to_send
; /* Bitmap of packet types to be sent */
101 int notify_code
, notify_subcode
, notify_size
;
103 u32 advertised_as
; /* Temporary value for AS number received */
104 int start_state
; /* protocol start_state snapshot when connection established */
105 u8 peer_refresh_support
; /* Peer supports route refresh [RFC2918] */
106 u8 peer_as4_support
; /* Peer supports 4B AS numbers [RFC4893] */
107 u8 peer_add_path
; /* Peer supports ADD-PATH [RFC7911] */
108 u8 peer_enhanced_refresh_support
; /* Peer supports enhanced refresh [RFC7313] */
114 u8 peer_ext_messages_support
; /* Peer supports extended message length [draft] */
115 unsigned hold_time
, keepalive_time
; /* Times calculated from my and neighbor's requirements */
120 struct bgp_config
*cf
; /* Shortcut to BGP configuration */
121 u32 local_as
, remote_as
;
122 int start_state
; /* Substates that partitions BS_START */
123 u8 is_internal
; /* Internal BGP connection (local_as == remote_as) */
124 u8 as4_session
; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
125 u8 add_path_rx
; /* Session expects receive of ADD-PATH extended NLRI */
126 u8 add_path_tx
; /* Session expects transmit of ADD-PATH extended NLRI */
127 u8 ext_messages
; /* Session allows to use extended messages (both sides support it) */
128 u32 local_id
; /* BGP identifier of this router */
129 u32 remote_id
; /* BGP identifier of the neighbor */
130 u32 rr_cluster_id
; /* Route reflector cluster ID */
131 int rr_client
; /* Whether neighbor is RR client of me */
132 int rs_client
; /* Whether neighbor is RS client of me */
133 u8 gr_ready
; /* Neighbor could do graceful restart */
134 u8 gr_active
; /* Neighbor is doing graceful restart */
135 u8 feed_state
; /* Feed state (TX) for EoR, RR packets, see BFS_* */
136 u8 load_state
; /* Load state (RX) for EoR, RR packets, see BFS_* */
137 struct bgp_conn
*conn
; /* Connection we have established */
138 struct bgp_conn outgoing_conn
; /* Outgoing connection we're working with */
139 struct bgp_conn incoming_conn
; /* Incoming connection we have neither accepted nor rejected yet */
140 struct object_lock
*lock
; /* Lock for neighbor connection */
141 struct neighbor
*neigh
; /* Neighbor entry corresponding to remote ip, NULL if multihop */
142 struct bfd_request
*bfd_req
; /* BFD request, if BFD is used */
143 ip_addr source_addr
; /* Local address used as an advertised next hop */
144 rtable
*igp_table
; /* Table used for recursive next hop lookups */
145 struct event
*event
; /* Event for respawning and shutting process */
146 struct timer
*startup_timer
; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
147 struct timer
*gr_timer
; /* Timer waiting for reestablishment after graceful restart */
148 struct bgp_bucket
**bucket_hash
; /* Hash table of attribute buckets */
149 uint hash_size
, hash_count
, hash_limit
;
150 HASH(struct bgp_prefix
) prefix_hash
; /* Prefixes to be sent */
151 slab
*prefix_slab
; /* Slab holding prefix nodes */
152 list bucket_queue
; /* Queue of buckets to send */
153 struct bgp_bucket
*withdraw_bucket
; /* Withdrawn routes */
154 unsigned startup_delay
; /* Time to delay protocol startup by due to errors */
155 bird_clock_t last_proto_error
; /* Time of last error that leads to protocol stop */
156 u8 last_error_class
; /* Error class of last error */
157 u32 last_error_code
; /* Error code of last error. BGP protocol errors
158 are encoded as (bgp_err_code << 16 | bgp_err_subcode) */
160 byte
*mp_reach_start
, *mp_unreach_start
; /* Multiprotocol BGP attribute notes */
161 unsigned mp_reach_len
, mp_unreach_len
;
162 ip_addr local_link
; /* Link-level version of source_addr */
172 struct bgp_prefix
*next
;
173 node bucket_node
; /* Node in per-bucket list */
177 node send_node
; /* Node in send queue */
178 struct bgp_bucket
*hash_next
, *hash_prev
; /* Node in bucket hash table */
179 unsigned hash
; /* Hash over extended attributes */
180 list prefixes
; /* Prefixes in this buckets */
181 ea_list eattrs
[0]; /* Per-bucket extended attributes */
185 #define BGP_VERSION 4
186 #define BGP_HEADER_LENGTH 19
187 #define BGP_MAX_MESSAGE_LENGTH 4096
188 #define BGP_MAX_EXT_MSG_LENGTH 65535
189 #define BGP_RX_BUFFER_SIZE 4096
190 #define BGP_TX_BUFFER_SIZE 4096
191 #define BGP_RX_BUFFER_EXT_SIZE 65535
192 #define BGP_TX_BUFFER_EXT_SIZE 65535
194 static inline uint
bgp_max_packet_length(struct bgp_proto
*p
)
195 { return p
->ext_messages
? BGP_MAX_EXT_MSG_LENGTH
: BGP_MAX_MESSAGE_LENGTH
; }
197 extern struct linpool
*bgp_linpool
;
200 void bgp_start_timer(struct timer
*t
, int value
);
201 void bgp_check_config(struct bgp_config
*c
);
202 void bgp_error(struct bgp_conn
*c
, unsigned code
, unsigned subcode
, byte
*data
, int len
);
203 void bgp_close_conn(struct bgp_conn
*c
);
204 void bgp_update_startup_delay(struct bgp_proto
*p
);
205 void bgp_conn_enter_openconfirm_state(struct bgp_conn
*conn
);
206 void bgp_conn_enter_established_state(struct bgp_conn
*conn
);
207 void bgp_conn_enter_close_state(struct bgp_conn
*conn
);
208 void bgp_conn_enter_idle_state(struct bgp_conn
*conn
);
209 void bgp_handle_graceful_restart(struct bgp_proto
*p
);
210 void bgp_graceful_restart_done(struct bgp_proto
*p
);
211 void bgp_refresh_begin(struct bgp_proto
*p
);
212 void bgp_refresh_end(struct bgp_proto
*p
);
213 void bgp_store_error(struct bgp_proto
*p
, struct bgp_conn
*c
, u8
class, u32 code
);
214 void bgp_stop(struct bgp_proto
*p
, unsigned subcode
);
216 struct rte_source
*bgp_find_source(struct bgp_proto
*p
, u32 path_id
);
217 struct rte_source
*bgp_get_source(struct bgp_proto
*p
, u32 path_id
);
222 #define BGP_FORCE_DEBUG 1
224 #define BGP_FORCE_DEBUG 0
226 #define BGP_TRACE(flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
227 log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
229 #define BGP_TRACE_RL(rl, flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
230 log_rl(rl, L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
235 /* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6
236 * we store two addesses in it - a global address and a link local address.
239 #define NEXT_HOP_LENGTH (2*sizeof(ip_addr))
240 static inline void set_next_hop(byte
*b
, ip_addr addr
) { ((ip_addr
*) b
)[0] = addr
; ((ip_addr
*) b
)[1] = IPA_NONE
; }
242 #define NEXT_HOP_LENGTH sizeof(ip_addr)
243 static inline void set_next_hop(byte
*b
, ip_addr addr
) { ((ip_addr
*) b
)[0] = addr
; }
246 void bgp_attach_attr(struct ea_list
**to
, struct linpool
*pool
, unsigned attr
, uintptr_t val
);
247 byte
*bgp_attach_attr_wa(struct ea_list
**to
, struct linpool
*pool
, unsigned attr
, unsigned len
);
248 struct rta
*bgp_decode_attrs(struct bgp_conn
*conn
, byte
*a
, uint len
, struct linpool
*pool
, int mandatory
);
249 int bgp_get_attr(struct eattr
*e
, byte
*buf
, int buflen
);
250 int bgp_rte_better(struct rte
*, struct rte
*);
251 int bgp_rte_mergable(rte
*pri
, rte
*sec
);
252 int bgp_rte_recalculate(rtable
*table
, net
*net
, rte
*new, rte
*old
, rte
*old_best
);
253 void bgp_rt_notify(struct proto
*P
, rtable
*tbl UNUSED
, net
*n
, rte
*new, rte
*old UNUSED
, ea_list
*attrs
);
254 int bgp_import_control(struct proto
*, struct rte
**, struct ea_list
**, struct linpool
*);
255 void bgp_init_bucket_table(struct bgp_proto
*);
256 void bgp_free_bucket_table(struct bgp_proto
*p
);
257 void bgp_free_bucket(struct bgp_proto
*p
, struct bgp_bucket
*buck
);
258 void bgp_init_prefix_table(struct bgp_proto
*p
, u32 order
);
259 void bgp_free_prefix_table(struct bgp_proto
*p
);
260 void bgp_free_prefix(struct bgp_proto
*p
, struct bgp_prefix
*bp
);
261 uint
bgp_encode_attrs(struct bgp_proto
*p
, byte
*w
, ea_list
*attrs
, int remains
);
262 void bgp_get_route_info(struct rte
*, byte
*buf
, struct ea_list
*attrs
);
264 inline static void bgp_attach_attr_ip(struct ea_list
**to
, struct linpool
*pool
, unsigned attr
, ip_addr a
)
265 { *(ip_addr
*) bgp_attach_attr_wa(to
, pool
, attr
, sizeof(ip_addr
)) = a
; }
269 void mrt_dump_bgp_state_change(struct bgp_conn
*conn
, unsigned old
, unsigned new);
270 void bgp_schedule_packet(struct bgp_conn
*conn
, int type
);
271 void bgp_kick_tx(void *vconn
);
272 void bgp_tx(struct birdsock
*sk
);
273 int bgp_rx(struct birdsock
*sk
, uint size
);
274 const char * bgp_error_dsc(unsigned code
, unsigned subcode
);
275 void bgp_log_error(struct bgp_proto
*p
, u8
class, char *msg
, unsigned code
, unsigned subcode
, byte
*data
, unsigned len
);
279 #define PKT_OPEN 0x01
280 #define PKT_UPDATE 0x02
281 #define PKT_NOTIFICATION 0x03
282 #define PKT_KEEPALIVE 0x04
283 #define PKT_ROUTE_REFRESH 0x05 /* [RFC2918] */
284 #define PKT_BEGIN_REFRESH 0x1e /* Dummy type for BoRR packet [RFC7313] */
285 #define PKT_SCHEDULE_CLOSE 0x1f /* Used internally to schedule socket close */
289 #define BAF_OPTIONAL 0x80
290 #define BAF_TRANSITIVE 0x40
291 #define BAF_PARTIAL 0x20
292 #define BAF_EXT_LEN 0x10
294 #define BA_ORIGIN 0x01 /* [RFC1771] */ /* WM */
295 #define BA_AS_PATH 0x02 /* WM */
296 #define BA_NEXT_HOP 0x03 /* WM */
297 #define BA_MULTI_EXIT_DISC 0x04 /* ON */
298 #define BA_LOCAL_PREF 0x05 /* WD */
299 #define BA_ATOMIC_AGGR 0x06 /* WD */
300 #define BA_AGGREGATOR 0x07 /* OT */
301 #define BA_COMMUNITY 0x08 /* [RFC1997] */ /* OT */
302 #define BA_ORIGINATOR_ID 0x09 /* [RFC1966] */ /* ON */
303 #define BA_CLUSTER_LIST 0x0a /* ON */
304 /* We don't support these: */
305 #define BA_DPA 0x0b /* ??? */
306 #define BA_ADVERTISER 0x0c /* [RFC1863] */
307 #define BA_RCID_PATH 0x0d
308 #define BA_MP_REACH_NLRI 0x0e /* [RFC2283] */
309 #define BA_MP_UNREACH_NLRI 0x0f
310 #define BA_EXT_COMMUNITY 0x10 /* [RFC4360] */
311 #define BA_AS4_PATH 0x11 /* [RFC4893] */
312 #define BA_AS4_AGGREGATOR 0x12
313 #define BA_LARGE_COMMUNITY 0x20 /* [RFC8092] */
315 /* BGP connection states */
318 #define BS_CONNECT 1 /* Attempting to connect */
319 #define BS_ACTIVE 2 /* Waiting for connection retry & listening */
320 #define BS_OPENSENT 3
321 #define BS_OPENCONFIRM 4
322 #define BS_ESTABLISHED 5
323 #define BS_CLOSE 6 /* Used during transition to BS_IDLE */
329 * Used in PS_START for fine-grained specification of starting state.
331 * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP
332 * protocol done what is neccessary to start itself (like acquiring the lock),
333 * it goes to BSS_CONNECT. When some connection attempt failed because of
334 * option or capability error, it goes to BSS_CONNECT_NOCAP.
337 #define BSS_PREPARE 0 /* Used before ordinary BGP started, i. e. waiting for lock */
338 #define BSS_DELAY 1 /* Startup delay due to previous errors */
339 #define BSS_CONNECT 2 /* Ordinary BGP connecting */
340 #define BSS_CONNECT_NOCAP 3 /* Legacy BGP connecting (without capabilities) */
343 /* BGP feed states (TX)
345 * RFC 4724 specifies that an initial feed should end with End-of-RIB mark.
347 * RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets.
349 * These states (stored in p->feed_state) are used to keep track of these
350 * requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is
351 * set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB
352 * or EoRR packet. When the packet is sent, the state returned to BFS_NONE.
354 * Note that when a non-demarcated feed (e.g. plain RFC 4271 initial load
355 * without End-of-RIB or plain RFC 2918 route refresh without BoRR/EoRR
356 * demarcation) is active, BFS_NONE is set.
358 * BFS_NONE, BFS_LOADING and BFS_REFRESHING are also used as load states (RX)
359 * with correspondent semantics (-, expecting End-of-RIB, expecting EoRR).
362 #define BFS_NONE 0 /* No feed or original non-demarcated feed */
363 #define BFS_LOADING 1 /* Initial feed active, End-of-RIB planned */
364 #define BFS_LOADED 2 /* Loading done, End-of-RIB marker scheduled */
365 #define BFS_REFRESHING 3 /* Route refresh (introduced by BoRR) active */
366 #define BFS_REFRESHED 4 /* Refresh done, EoRR packet scheduled */
372 #define BE_MISC 1 /* Miscellaneous error */
373 #define BE_SOCKET 2 /* Socket error */
374 #define BE_BGP_RX 3 /* BGP protocol error notification received */
375 #define BE_BGP_TX 4 /* BGP protocol error notification sent */
376 #define BE_AUTO_DOWN 5 /* Automatic shutdown */
377 #define BE_MAN_DOWN 6 /* Manual shutdown */
379 /* Misc error codes */
381 #define BEM_NEIGHBOR_LOST 1
382 #define BEM_INVALID_NEXT_HOP 2
383 #define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */
384 #define BEM_NO_SOCKET 4
385 #define BEM_LINK_DOWN 5
386 #define BEM_BFD_DOWN 6
387 #define BEM_GRACEFUL_RESTART 7
389 /* Automatic shutdown error codes */
391 #define BEA_ROUTE_LIMIT_EXCEEDED 1
393 /* Well-known communities */
395 #define BGP_COMM_NO_EXPORT 0xffffff01 /* Don't export outside local AS / confed. */
396 #define BGP_COMM_NO_ADVERTISE 0xffffff02 /* Don't export at all */
397 #define BGP_COMM_NO_EXPORT_SUBCONFED 0xffffff03 /* NO_EXPORT even in local confederation */
403 #define ORIGIN_INCOMPLETE 2
405 /* Address families */
407 #define BGP_AF_IPV4 1
408 #define BGP_AF_IPV6 2
411 #define BGP_AF BGP_AF_IPV6
413 #define BGP_AF BGP_AF_IPV4