2 * BIRD -- BGP Attributes
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
8 * Can be freely distributed and used under the terms of the GNU GPL.
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "conf/conf.h"
21 #include "lib/resource.h"
22 #include "lib/string.h"
23 #include "lib/unaligned.h"
28 * UPDATE message error handling
30 * All checks from RFC 4271 6.3 are done as specified with these exceptions:
31 * - The semantic check of an IP address from NEXT_HOP attribute is missing.
32 * - Checks of some optional attribute values are missing.
33 * - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
34 * are probably inadequate.
36 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
37 * 4271 does not explicitly specifiy the behavior in that case.
39 * Loop detection related to route reflection (based on ORIGINATOR_ID
40 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
41 * specifies that such updates should be ignored, but that is generally
44 * Error checking of optional transitive attributes is done according to
45 * draft-ietf-idr-optional-transitive-03, but errors are handled always
48 * Unexpected AS_CONFED_* segments in AS_PATH are logged and removed,
49 * but unknown segments cause a session drop with Malformed AS_PATH
50 * error (see validate_path()). The behavior in such case is not
51 * explicitly specified by RFC 4271. RFC 5065 specifies that
52 * inconsistent AS_CONFED_* segments should cause a session drop, but
53 * implementations that pass invalid AS_CONFED_* segments are
56 * Error handling of AS4_* attributes is done as specified by
57 * draft-ietf-idr-rfc4893bis-03. There are several possible
58 * inconsistencies between AGGREGATOR and AS4_AGGREGATOR that are not
59 * handled by that draft, these are logged and ignored (see
60 * bgp_reconstruct_4b_attrs()).
62 * BGP attribute table has several hooks:
64 * export - Hook that validates and normalizes attribute during export phase.
65 * Receives eattr, may modify it (e.g., sort community lists for canonical
66 * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
67 * necessary. May assume that eattr has value valid w.r.t. its type, but may be
68 * invalid w.r.t. BGP constraints. Optional.
70 * encode - Hook that converts internal representation to external one during
71 * packet writing. Receives eattr and puts it in the buffer (including attribute
72 * header). Returns number of bytes, or -1 if not enough space. May assume that
73 * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
74 * for all known attributes that exist internally after export phase (i.e., all
75 * except pseudoattributes MP_(UN)REACH_NLRI).
77 * decode - Hook that converts external representation to internal one during
78 * packet parsing. Receives attribute data in buffer, validates it and adds
79 * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
80 * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
82 * format - Optional hook that converts eattr to textual representation.
85 // XXXX review pool usage : c->c.proto->pool
88 struct bgp_attr_desc
{
92 void (*export
)(struct bgp_export_state
*s
, eattr
*a
);
93 int (*encode
)(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size
);
94 void (*decode
)(struct bgp_parse_state
*s
, uint code
, uint flags
, byte
*data
, uint len
, ea_list
**to
);
95 void (*format
)(eattr
*ea
, byte
*buf
, uint size
);
98 static const struct bgp_attr_desc bgp_attr_table
[];
100 static inline int bgp_attr_known(uint code
);
103 bgp_set_attr(ea_list
**attrs
, struct linpool
*pool
, uint code
, uint flags
, uintptr_t val
)
105 ASSERT(bgp_attr_known(code
));
107 ea_list
*a
= lp_alloc(pool
, sizeof(ea_list
) + sizeof(eattr
));
108 eattr
*e
= &a
->attrs
[0];
110 a
->flags
= EALF_SORTED
;
115 e
->id
= EA_CODE(EAP_BGP
, code
);
116 e
->type
= bgp_attr_table
[code
].type
;
119 if (e
->type
& EAF_EMBEDDED
)
120 e
->u
.data
= (u32
) val
;
122 e
->u
.ptr
= (struct adata
*) val
;
129 #define REPORT(msg, args...) \
130 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
132 #define DISCARD(msg, args...) \
133 ({ REPORT(msg, ## args); return; })
135 #define WITHDRAW(msg, args...) \
136 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
139 ({ a->type = EAF_TYPE_UNDEF; return; })
141 #define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
142 #define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
143 #define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
144 #define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
145 #define NO_MANDATORY "Missing mandatory %s attribute"
149 bgp_put_attr_hdr3(byte
*buf
, uint code
, uint flags
, uint len
)
158 bgp_put_attr_hdr4(byte
*buf
, uint code
, uint flags
, uint len
)
160 *buf
++ = flags
| BAF_EXT_LEN
;
167 bgp_put_attr_hdr(byte
*buf
, uint code
, uint flags
, uint len
)
170 return bgp_put_attr_hdr3(buf
, code
, flags
, len
);
172 return bgp_put_attr_hdr4(buf
, code
, flags
, len
);
176 bgp_encode_u8(struct bgp_write_state
*s UNUSED
, eattr
*a
, byte
*buf
, uint size
)
181 bgp_put_attr_hdr3(buf
, EA_ID(a
->id
), a
->flags
, 1);
188 bgp_encode_u32(struct bgp_write_state
*s UNUSED
, eattr
*a
, byte
*buf
, uint size
)
193 bgp_put_attr_hdr3(buf
, EA_ID(a
->id
), a
->flags
, 4);
194 put_u32(buf
+3, a
->u
.data
);
200 bgp_encode_u32s(struct bgp_write_state
*s UNUSED
, eattr
*a
, byte
*buf
, uint size
)
202 uint len
= a
->u
.ptr
->length
;
207 uint hdr
= bgp_put_attr_hdr(buf
, EA_ID(a
->id
), a
->flags
, len
);
208 put_u32s(buf
+ hdr
, (u32
*) a
->u
.ptr
->data
, len
/ 4);
214 bgp_put_attr(byte
*buf
, uint size
, uint code
, uint flags
, byte
*data
, uint len
)
219 uint hdr
= bgp_put_attr_hdr(buf
, code
, flags
, len
);
220 memcpy(buf
+ hdr
, data
, len
);
226 bgp_encode_raw(struct bgp_write_state
*s UNUSED
, eattr
*a
, byte
*buf
, uint size
)
228 return bgp_put_attr(buf
, size
, EA_ID(a
->id
), a
->flags
, a
->u
.ptr
->data
, a
->u
.ptr
->length
);
237 bgp_export_origin(struct bgp_export_state
*s
, eattr
*a
)
240 WITHDRAW(BAD_VALUE
, "ORIGIN", a
->u
.data
);
244 bgp_decode_origin(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
247 WITHDRAW(BAD_LENGTH
, "ORIGIN", len
);
250 WITHDRAW(BAD_VALUE
, "ORIGIN", data
[0]);
252 bgp_set_attr_u32(to
, s
->pool
, BA_ORIGIN
, flags
, data
[0]);
256 bgp_format_origin(eattr
*a
, byte
*buf
, uint size UNUSED
)
258 static const char *bgp_origin_names
[] = { "IGP", "EGP", "Incomplete" };
260 bsprintf(buf
, (a
->u
.data
<= 2) ? bgp_origin_names
[a
->u
.data
] : "?");
265 bgp_encode_as_path(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size
)
267 byte
*data
= a
->u
.ptr
->data
;
268 uint len
= a
->u
.ptr
->length
;
272 /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
275 len
= as_path_32to16(data
, src
, len
);
278 return bgp_put_attr(buf
, size
, BA_AS_PATH
, a
->flags
, data
, len
);
282 bgp_decode_as_path(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
286 if (!as_path_valid(data
, len
, (s
->as4_session
? 4 : 2), err
, sizeof(err
)))
287 WITHDRAW("Malformed AS_PATH attribute - %s", err
);
291 /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
293 data
= alloca(2*len
);
294 len
= as_path_16to32(data
, src
, len
);
297 bgp_set_attr_data(to
, s
->pool
, BA_AS_PATH
, flags
, data
, len
);
302 bgp_encode_next_hop(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size
)
305 * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
306 * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
307 * store it and encode it later by AFI-specific hooks.
310 if (s
->channel
->afi
== BGP_AF_IPV4
)
312 ASSERT(a
->u
.ptr
->length
== sizeof(ip_addr
));
317 bgp_put_attr_hdr3(buf
, BA_NEXT_HOP
, a
->flags
, 4);
318 put_ip4(buf
+3, ipa_to_ip4( *(ip_addr
*) a
->u
.ptr
->data
));
330 bgp_decode_next_hop(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags UNUSED
, byte
*data
, uint len
, ea_list
**to UNUSED
)
333 WITHDRAW(BAD_LENGTH
, "NEXT_HOP", len
);
335 /* Semantic checks are done later */
336 s
->ip_next_hop_len
= len
;
337 s
->ip_next_hop_data
= data
;
340 /* TODO: This function should use AF-specific hook */
342 bgp_format_next_hop(eattr
*a
, byte
*buf
, uint size UNUSED
)
344 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
345 uint len
= a
->u
.ptr
->length
;
347 ASSERT((len
== 16) || (len
== 32));
349 /* in IPv6, we may have two addresses in NEXT HOP */
350 if ((len
== 16) || ipa_zero(nh
[1]))
351 bsprintf(buf
, "%I", nh
[0]);
353 bsprintf(buf
, "%I %I", nh
[0], nh
[1]);
358 bgp_decode_med(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
361 WITHDRAW(BAD_LENGTH
, "MULTI_EXIT_DISC", len
);
363 u32 val
= get_u32(data
);
364 bgp_set_attr_u32(to
, s
->pool
, BA_MULTI_EXIT_DISC
, flags
, val
);
369 bgp_export_local_pref(struct bgp_export_state
*s
, eattr
*a
)
371 if (!s
->proto
->is_interior
)
376 bgp_decode_local_pref(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
378 if (!s
->proto
->is_interior
)
379 DISCARD(BAD_EBGP
, "LOCAL_PREF");
382 WITHDRAW(BAD_LENGTH
, "LOCAL_PREF", len
);
384 u32 val
= get_u32(data
);
385 bgp_set_attr_u32(to
, s
->pool
, BA_LOCAL_PREF
, flags
, val
);
390 bgp_decode_atomic_aggr(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data UNUSED
, uint len
, ea_list
**to
)
393 DISCARD(BAD_LENGTH
, "ATOMIC_AGGR", len
);
395 bgp_set_attr_data(to
, s
->pool
, BA_ATOMIC_AGGR
, flags
, NULL
, 0);
399 bgp_encode_aggregator(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size
)
401 byte
*data
= a
->u
.ptr
->data
;
402 uint len
= a
->u
.ptr
->length
;
406 /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
409 len
= aggregator_32to16(data
, src
);
412 return bgp_put_attr(buf
, size
, BA_AGGREGATOR
, a
->flags
, data
, len
);
416 bgp_decode_aggregator(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
418 if (len
!= (s
->as4_session
? 8 : 6))
419 DISCARD(BAD_LENGTH
, "AGGREGATOR", len
);
423 /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
426 len
= aggregator_16to32(data
, src
);
429 bgp_set_attr_data(to
, s
->pool
, BA_AGGREGATOR
, flags
, data
, len
);
433 bgp_format_aggregator(eattr
*a
, byte
*buf
, uint size UNUSED
)
435 byte
*data
= a
->u
.ptr
->data
;
437 bsprintf(buf
, "%I4 AS%u", get_ip4(data
+4), get_u32(data
+0));
442 bgp_export_community(struct bgp_export_state
*s
, eattr
*a
)
444 if (a
->u
.ptr
->length
== 0)
447 a
->u
.ptr
= int_set_sort(s
->pool
, a
->u
.ptr
);
451 bgp_decode_community(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
453 if (!len
|| (len
% 4))
454 WITHDRAW(BAD_LENGTH
, "COMMUNITY", len
);
456 struct adata
*ad
= lp_alloc_adata(s
->pool
, len
);
457 get_u32s(data
, (u32
*) ad
->data
, len
/ 4);
458 bgp_set_attr_ptr(to
, s
->pool
, BA_COMMUNITY
, flags
, ad
);
463 bgp_export_originator_id(struct bgp_export_state
*s
, eattr
*a
)
465 if (!s
->proto
->is_internal
)
470 bgp_decode_originator_id(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
472 if (!s
->proto
->is_internal
)
473 DISCARD(BAD_EBGP
, "ORIGINATOR_ID");
476 WITHDRAW(BAD_LENGTH
, "ORIGINATOR_ID", len
);
478 u32 val
= get_u32(data
);
479 bgp_set_attr_u32(to
, s
->pool
, BA_ORIGINATOR_ID
, flags
, val
);
484 bgp_export_cluster_list(struct bgp_export_state
*s UNUSED
, eattr
*a
)
486 if (!s
->proto
->is_internal
)
489 if (a
->u
.ptr
->length
== 0)
494 bgp_decode_cluster_list(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
496 if (!s
->proto
->is_internal
)
497 DISCARD(BAD_EBGP
, "CLUSTER_LIST");
499 if (!len
|| (len
% 4))
500 WITHDRAW(BAD_LENGTH
, "CLUSTER_LIST", len
);
502 struct adata
*ad
= lp_alloc_adata(s
->pool
, len
);
503 get_u32s(data
, (u32
*) ad
->data
, len
/ 4);
504 bgp_set_attr_ptr(to
, s
->pool
, BA_CLUSTER_LIST
, flags
, ad
);
508 bgp_format_cluster_list(eattr
*a
, byte
*buf
, uint size
)
510 /* Truncates cluster lists larger than buflen, probably not a problem */
511 int_set_format(a
->u
.ptr
, 0, -1, buf
, size
);
518 return (get_u16(buf
) << 16) | buf
[2];
522 bgp_decode_mp_reach_nlri(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags UNUSED
, byte
*data
, uint len
, ea_list
**to UNUSED
)
525 * 2 B MP_REACH_NLRI data - Address Family Identifier
526 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
527 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
528 * var MP_REACH_NLRI data - Network Address of Next Hop
529 * 1 B MP_REACH_NLRI data - Reserved (zero)
530 * var MP_REACH_NLRI data - Network Layer Reachability Information
533 if ((len
< 5) || (len
< (5 + (uint
) data
[3])))
534 bgp_parse_error(s
, 9);
536 s
->mp_reach_af
= get_af3(data
);
537 s
->mp_next_hop_len
= data
[3];
538 s
->mp_next_hop_data
= data
+ 4;
539 s
->mp_reach_len
= len
- 5 - s
->mp_next_hop_len
;
540 s
->mp_reach_nlri
= data
+ 5 + s
->mp_next_hop_len
;
545 bgp_decode_mp_unreach_nlri(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags UNUSED
, byte
*data
, uint len
, ea_list
**to UNUSED
)
548 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
549 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
550 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
554 bgp_parse_error(s
, 9);
556 s
->mp_unreach_af
= get_af3(data
);
557 s
->mp_unreach_len
= len
- 3;
558 s
->mp_unreach_nlri
= data
+ 3;
563 bgp_export_ext_community(struct bgp_export_state
*s
, eattr
*a
)
565 if (a
->u
.ptr
->length
== 0)
568 a
->u
.ptr
= ec_set_sort(s
->pool
, a
->u
.ptr
);
572 bgp_decode_ext_community(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
574 if (!len
|| (len
% 8))
575 WITHDRAW(BAD_LENGTH
, "EXT_COMMUNITY", len
);
577 struct adata
*ad
= lp_alloc_adata(s
->pool
, len
);
578 get_u32s(data
, (u32
*) ad
->data
, len
/ 4);
579 bgp_set_attr_ptr(to
, s
->pool
, BA_EXT_COMMUNITY
, flags
, ad
);
584 bgp_decode_as4_aggregator(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
587 DISCARD(NEW_BGP
, "AS4_AGGREGATOR");
590 DISCARD(BAD_LENGTH
, "AS4_AGGREGATOR", len
);
592 bgp_set_attr_data(to
, s
->pool
, BA_AS4_AGGREGATOR
, flags
, data
, len
);
596 bgp_decode_as4_path(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
601 DISCARD(NEW_BGP
, "AS4_PATH");
604 DISCARD(BAD_LENGTH
, "AS4_PATH", len
);
606 if (!as_path_valid(data
, len
, 4, err
, sizeof(err
)))
607 DISCARD("Malformed AS4_PATH attribute - %s", err
);
609 /* XXXX remove CONFED segments */
610 bgp_set_attr_data(to
, s
->pool
, BA_AS4_PATH
, flags
, data
, len
);
614 bgp_export_large_community(struct bgp_export_state
*s
, eattr
*a
)
616 if (a
->u
.ptr
->length
== 0)
619 a
->u
.ptr
= lc_set_sort(s
->pool
, a
->u
.ptr
);
623 bgp_decode_large_community(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
625 if (!len
|| (len
% 12))
626 WITHDRAW(BAD_LENGTH
, "LARGE_COMMUNITY", len
);
628 struct adata
*ad
= lp_alloc_adata(s
->pool
, len
);
629 get_u32s(data
, (u32
*) ad
->data
, len
/ 4);
630 bgp_set_attr_ptr(to
, s
->pool
, BA_LARGE_COMMUNITY
, flags
, ad
);
634 bgp_decode_unknown(struct bgp_parse_state
*s
, uint code
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
636 bgp_set_attr_data(to
, s
->pool
, code
, flags
, data
, len
);
644 static const struct bgp_attr_desc bgp_attr_table
[] = {
647 .type
= EAF_TYPE_INT
,
648 .flags
= BAF_TRANSITIVE
,
649 .export
= bgp_export_origin
,
650 .encode
= bgp_encode_u8
,
651 .decode
= bgp_decode_origin
,
652 .format
= bgp_format_origin
,
656 .type
= EAF_TYPE_AS_PATH
,
657 .flags
= BAF_TRANSITIVE
,
658 .encode
= bgp_encode_as_path
,
659 .decode
= bgp_decode_as_path
,
663 .type
= EAF_TYPE_IP_ADDRESS
,
664 .flags
= BAF_TRANSITIVE
,
665 .encode
= bgp_encode_next_hop
,
666 .decode
= bgp_decode_next_hop
,
667 .format
= bgp_format_next_hop
,
669 [BA_MULTI_EXIT_DISC
] = {
671 .type
= EAF_TYPE_INT
,
672 .flags
= BAF_OPTIONAL
,
673 .encode
= bgp_encode_u32
,
674 .decode
= bgp_decode_med
,
677 .name
= "local_pref",
678 .type
= EAF_TYPE_INT
,
679 .flags
= BAF_TRANSITIVE
,
680 .export
= bgp_export_local_pref
,
681 .encode
= bgp_encode_u32
,
682 .decode
= bgp_decode_local_pref
,
685 .name
= "atomic_aggr",
686 .type
= EAF_TYPE_OPAQUE
,
687 .flags
= BAF_TRANSITIVE
,
688 .encode
= bgp_encode_raw
,
689 .decode
= bgp_decode_atomic_aggr
,
692 .name
= "aggregator",
693 .type
= EAF_TYPE_OPAQUE
,
694 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
695 .encode
= bgp_encode_aggregator
,
696 .decode
= bgp_decode_aggregator
,
697 .format
= bgp_format_aggregator
,
701 .type
= EAF_TYPE_INT_SET
,
702 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
703 .export
= bgp_export_community
,
704 .encode
= bgp_encode_u32s
,
705 .decode
= bgp_decode_community
,
707 [BA_ORIGINATOR_ID
] = {
708 .name
= "originator_id",
709 .type
= EAF_TYPE_ROUTER_ID
,
710 .flags
= BAF_OPTIONAL
,
711 .export
= bgp_export_originator_id
,
712 .encode
= bgp_encode_u32
,
713 .decode
= bgp_decode_originator_id
,
715 [BA_CLUSTER_LIST
] = {
716 .name
= "cluster_list",
717 .type
= EAF_TYPE_INT_SET
,
718 .flags
= BAF_OPTIONAL
,
719 .export
= bgp_export_cluster_list
,
720 .encode
= bgp_encode_u32s
,
721 .decode
= bgp_decode_cluster_list
,
722 .format
= bgp_format_cluster_list
,
724 [BA_MP_REACH_NLRI
] = {
725 .name
= "mp_reach_nlri",
726 .type
= EAF_TYPE_OPAQUE
,
727 .flags
= BAF_OPTIONAL
,
728 .decode
= bgp_decode_mp_reach_nlri
,
730 [BA_MP_UNREACH_NLRI
] = {
731 .name
= "mp_unreach_nlri",
732 .type
= EAF_TYPE_OPAQUE
,
733 .flags
= BAF_OPTIONAL
,
734 .decode
= bgp_decode_mp_unreach_nlri
,
736 [BA_EXT_COMMUNITY
] = {
737 .name
= "ext_community",
738 .type
= EAF_TYPE_EC_SET
,
739 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
740 .export
= bgp_export_ext_community
,
741 .encode
= bgp_encode_u32s
,
742 .decode
= bgp_decode_ext_community
,
746 .type
= EAF_TYPE_AS_PATH
,
747 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
748 .encode
= bgp_encode_raw
,
749 .decode
= bgp_decode_as4_path
,
751 [BA_AS4_AGGREGATOR
] = {
752 .name
= "as4_aggregator",
753 .type
= EAF_TYPE_OPAQUE
,
754 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
755 .encode
= bgp_encode_raw
,
756 .decode
= bgp_decode_as4_aggregator
,
757 .format
= bgp_format_aggregator
,
759 [BA_LARGE_COMMUNITY
] = {
760 .name
= "large_community",
761 .type
= EAF_TYPE_LC_SET
,
762 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
763 .export
= bgp_export_large_community
,
764 .encode
= bgp_encode_u32s
,
765 .decode
= bgp_decode_large_community
,
770 bgp_attr_known(uint code
)
772 return (code
< ARRAY_SIZE(bgp_attr_table
)) && bgp_attr_table
[code
].name
;
781 bgp_export_attr(struct bgp_export_state
*s
, eattr
*a
, ea_list
*to
)
783 if (EA_PROTO(a
->id
) != EAP_BGP
)
786 uint code
= EA_ID(a
->id
);
788 if (bgp_attr_known(code
))
790 const struct bgp_attr_desc
*desc
= &bgp_attr_table
[code
];
792 /* The flags might have been zero if the attr was added by filters */
793 a
->flags
= (a
->flags
& BAF_PARTIAL
) | desc
->flags
;
795 /* Set partial bit if new opt-trans attribute is attached to non-local route */
796 if ((s
->src
!= NULL
) && (a
->type
& EAF_ORIGINATED
) &&
797 (a
->flags
& BAF_OPTIONAL
) && (a
->flags
& BAF_TRANSITIVE
))
798 a
->flags
|= BAF_PARTIAL
;
800 /* Call specific hook */
801 CALL(desc
->export
, s
, a
);
803 /* Attribute might become undefined in hook */
804 if ((a
->type
& EAF_TYPE_MASK
) == EAF_TYPE_UNDEF
)
809 /* Don't re-export unknown non-transitive attributes */
810 if (!(a
->flags
& BAF_TRANSITIVE
))
813 a
->flags
|= BAF_PARTIAL
;
816 /* Append updated attribute */
817 to
->attrs
[to
->count
++] = *a
;
821 * bgp_export_attrs - export BGP attributes
822 * @s: BGP export state
823 * @attrs: a list of extended attributes
825 * The bgp_export_attrs() function takes a list of attributes and merges it to
826 * one newly allocated and sorted segment. Attributes are validated and
827 * normalized by type-specific export hooks and attribute flags are updated.
828 * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
829 * empty community sets).
831 * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
833 static inline ea_list
*
834 bgp_export_attrs(struct bgp_export_state
*s
, ea_list
*attrs
)
836 /* Merge the attribute list */
837 ea_list
*new = lp_alloc(s
->pool
, ea_scan(attrs
));
838 ea_merge(attrs
, new);
845 /* Export each attribute */
846 for (i
= 0; i
< count
; i
++)
847 bgp_export_attr(s
, &new->attrs
[i
], new);
862 bgp_encode_attr(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size
)
864 ASSERT(EA_PROTO(a
->id
) == EAP_BGP
);
866 uint code
= EA_ID(a
->id
);
868 if (bgp_attr_known(code
))
869 return bgp_attr_table
[code
].encode(s
, a
, buf
, size
);
871 return bgp_encode_raw(s
, a
, buf
, size
);
875 * bgp_encode_attrs - encode BGP attributes
876 * @s: BGP write state
877 * @attrs: a list of extended attributes
881 * The bgp_encode_attrs() function takes a list of extended attributes
882 * and converts it to its BGP representation (a part of an Update message).
884 * Result: Length of the attribute block generated or -1 if not enough space.
887 bgp_encode_attrs(struct bgp_write_state
*s
, ea_list
*attrs
, byte
*buf
, byte
*end
)
892 for (i
= 0; i
< attrs
->count
; i
++)
894 len
= bgp_encode_attr(s
, &attrs
->attrs
[i
], pos
, end
- pos
);
910 static void bgp_process_as4_attrs(ea_list
**attrs
, struct linpool
*pool
);
913 bgp_as_path_loopy(struct bgp_proto
*p
, ea_list
*attrs
, u32 asn
)
915 eattr
*e
= bgp_find_attr(attrs
, BA_AS_PATH
);
916 int num
= p
->cf
->allow_local_as
+ 1;
917 return (e
&& (num
> 0) && as_path_contains(e
->u
.ptr
, asn
, num
));
921 bgp_originator_id_loopy(struct bgp_proto
*p
, ea_list
*attrs
)
923 eattr
*e
= bgp_find_attr(attrs
, BA_ORIGINATOR_ID
);
924 return (e
&& (e
->u
.data
== p
->local_id
));
928 bgp_cluster_list_loopy(struct bgp_proto
*p
, ea_list
*attrs
)
930 eattr
*e
= bgp_find_attr(attrs
, BA_CLUSTER_LIST
);
931 return (e
&& int_set_contains(e
->u
.ptr
, p
->rr_cluster_id
));
935 bgp_decode_attr(struct bgp_parse_state
*s
, uint code
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
937 /* Handle duplicate attributes; RFC 7606 3 (g) */
938 if (BIT32_TEST(s
->attrs_seen
, code
))
940 if ((code
== BA_MP_REACH_NLRI
) || (code
== BA_MP_UNREACH_NLRI
))
941 bgp_parse_error(s
, 1);
943 DISCARD("Discarding duplicate attribute (code %u)", code
);
945 BIT32_SET(s
->attrs_seen
, code
);
947 if (bgp_attr_known(code
))
949 const struct bgp_attr_desc
*desc
= &bgp_attr_table
[code
];
951 /* Handle conflicting flags; RFC 7606 3 (c) */
952 if ((flags
^ desc
->flags
) & (BAF_OPTIONAL
| BAF_TRANSITIVE
))
953 WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc
->name
, flags
);
955 desc
->decode(s
, code
, flags
, data
, len
, to
);
957 else /* Unknown attribute */
959 if (!(flags
& BAF_OPTIONAL
))
960 WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code
, flags
);
962 bgp_decode_unknown(s
, code
, flags
, data
, len
, to
);
967 * bgp_decode_attrs - check and decode BGP attributes
968 * @s: BGP parse state
969 * @data: start of attribute block
970 * @len: length of attribute block
972 * This function takes a BGP attribute block (a part of an Update message), checks
973 * its consistency and converts it to a list of BIRD route attributes represented
974 * by an (uncached) &rta.
977 bgp_decode_attrs(struct bgp_parse_state
*s
, byte
*data
, uint len
)
979 struct bgp_proto
*p
= s
->proto
;
980 ea_list
*attrs
= NULL
;
981 uint code
, flags
, alen
;
984 /* Parse the attributes */
989 /* Read attribute type */
994 ADVANCE(pos
, len
, 2);
996 /* Read attribute length */
997 if (flags
& BAF_EXT_LEN
)
1001 alen
= get_u16(pos
);
1002 ADVANCE(pos
, len
, 2);
1009 ADVANCE(pos
, len
, 1);
1015 DBG("Attr %02x %02x %u\n", code
, flags
, alen
);
1017 bgp_decode_attr(s
, code
, flags
, pos
, alen
, &attrs
);
1018 ADVANCE(pos
, len
, alen
);
1021 if (s
->err_withdraw
)
1024 /* If there is no reachability NLRI, we are finished */
1025 if (!s
->ip_reach_len
&& !s
->mp_reach_len
)
1029 /* Handle missing mandatory attributes; RFC 7606 3 (d) */
1030 if (!BIT32_TEST(s
->attrs_seen
, BA_ORIGIN
))
1031 { REPORT(NO_MANDATORY
, "ORIGIN"); goto withdraw
; }
1033 if (!BIT32_TEST(s
->attrs_seen
, BA_AS_PATH
))
1034 { REPORT(NO_MANDATORY
, "AS_PATH"); goto withdraw
; }
1036 /* When receiving attributes from non-AS4-aware BGP speaker, we have to
1037 reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
1038 if (!p
->as4_session
)
1039 bgp_process_as4_attrs(&attrs
, s
->pool
);
1041 /* Reject routes with our ASN in AS_PATH attribute */
1042 if (bgp_as_path_loopy(p
, attrs
, p
->local_as
))
1045 /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4 */
1046 if ((p
->public_as
!= p
->local_as
) && bgp_as_path_loopy(p
, attrs
, p
->public_as
))
1049 /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
1050 if (p
->is_internal
&& bgp_originator_id_loopy(p
, attrs
))
1053 /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
1054 if (p
->rr_client
&& bgp_cluster_list_loopy(p
, attrs
))
1057 /* If there is no local preference, define one */
1058 if (!BIT32_TEST(s
->attrs_seen
, BA_LOCAL_PREF
))
1059 bgp_set_attr_u32(&attrs
, s
->pool
, BA_LOCAL_PREF
, 0, p
->cf
->default_local_pref
);
1065 /* RFC 7606 4 - handle attribute framing errors */
1066 REPORT("Malformed attribute list - framing error (%u/%u) at %d",
1067 alen
, len
, (int) (pos
- s
->attrs
));
1070 /* RFC 7606 5.2 - handle missing NLRI during errors */
1071 if (!s
->ip_reach_len
&& !s
->mp_reach_len
)
1072 bgp_parse_error(s
, 1);
1074 s
->err_withdraw
= 1;
1080 * Route bucket hash table
1083 #define RBH_KEY(b) b->eattrs, b->hash
1084 #define RBH_NEXT(b) b->next
1085 #define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
1086 #define RBH_FN(a,h) h
1088 #define RBH_REHASH bgp_rbh_rehash
1089 #define RBH_PARAMS /8, *2, 2, 2, 8, 20
1092 HASH_DEFINE_REHASH_FN(RBH
, struct bgp_bucket
)
1095 bgp_init_bucket_table(struct bgp_channel
*c
)
1097 HASH_INIT(c
->bucket_hash
, c
->pool
, 8);
1099 init_list(&c
->bucket_queue
);
1100 c
->withdraw_bucket
= NULL
;
1103 static struct bgp_bucket
*
1104 bgp_get_bucket(struct bgp_channel
*c
, ea_list
*new)
1106 /* Hash and lookup */
1107 u32 hash
= ea_hash(new);
1108 struct bgp_bucket
*b
= HASH_FIND(c
->bucket_hash
, RBH
, new, hash
);
1113 uint ea_size
= sizeof(ea_list
) + new->count
* sizeof(eattr
);
1114 uint ea_size_aligned
= BIRD_ALIGN(ea_size
, CPU_STRUCT_ALIGN
);
1115 uint size
= sizeof(struct bgp_bucket
) + ea_size_aligned
;
1119 /* Gather total size of non-inline attributes */
1120 for (i
= 0; i
< new->count
; i
++)
1122 eattr
*a
= &new->attrs
[i
];
1124 if (!(a
->type
& EAF_EMBEDDED
))
1125 size
+= BIRD_ALIGN(sizeof(struct adata
) + a
->u
.ptr
->length
, CPU_STRUCT_ALIGN
);
1128 /* Create the bucket */
1129 b
= mb_alloc(c
->pool
, size
);
1130 init_list(&b
->prefixes
);
1133 /* Copy list of extended attributes */
1134 memcpy(b
->eattrs
, new, ea_size
);
1135 dest
= ((byte
*) b
->eattrs
) + ea_size_aligned
;
1137 /* Copy values of non-inline attributes */
1138 for (i
= 0; i
< new->count
; i
++)
1140 eattr
*a
= &b
->eattrs
->attrs
[i
];
1142 if (!(a
->type
& EAF_EMBEDDED
))
1144 struct adata
*oa
= a
->u
.ptr
;
1145 struct adata
*na
= (struct adata
*) dest
;
1146 memcpy(na
, oa
, sizeof(struct adata
) + oa
->length
);
1148 dest
+= BIRD_ALIGN(sizeof(struct adata
) + na
->length
, CPU_STRUCT_ALIGN
);
1152 /* Insert the bucket to send queue and bucket hash */
1153 add_tail(&c
->bucket_queue
, &b
->send_node
);
1154 HASH_INSERT2(c
->bucket_hash
, RBH
, c
->pool
, b
);
1159 static struct bgp_bucket
*
1160 bgp_get_withdraw_bucket(struct bgp_channel
*c
)
1162 if (!c
->withdraw_bucket
)
1164 c
->withdraw_bucket
= mb_allocz(c
->pool
, sizeof(struct bgp_bucket
));
1165 init_list(&c
->withdraw_bucket
->prefixes
);
1168 return c
->withdraw_bucket
;
1172 bgp_free_bucket(struct bgp_channel
*c
, struct bgp_bucket
*b
)
1174 rem_node(&b
->send_node
);
1175 HASH_REMOVE2(c
->bucket_hash
, RBH
, c
->pool
, b
);
1180 bgp_defer_bucket(struct bgp_channel
*c
, struct bgp_bucket
*b
)
1182 rem_node(&b
->send_node
);
1183 add_tail(&c
->bucket_queue
, &b
->send_node
);
1187 bgp_withdraw_bucket(struct bgp_channel
*c
, struct bgp_bucket
*b
)
1189 struct bgp_proto
*p
= (void *) c
->c
.proto
;
1190 struct bgp_bucket
*wb
= bgp_get_withdraw_bucket(c
);
1192 log(L_ERR
"%s: Attribute list too long", p
->p
.name
);
1193 while (!EMPTY_LIST(b
->prefixes
))
1195 struct bgp_prefix
*px
= HEAD(b
->prefixes
);
1197 log(L_ERR
"%s: - withdrawing %N", p
->p
.name
, &px
->net
);
1198 rem_node(&px
->buck_node
);
1199 add_tail(&wb
->prefixes
, &px
->buck_node
);
1208 #define PXH_KEY(px) px->net, px->path_id, px->hash
1209 #define PXH_NEXT(px) px->next
1210 #define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
1211 #define PXH_FN(n,i,h) h
1213 #define PXH_REHASH bgp_pxh_rehash
1214 #define PXH_PARAMS /8, *2, 2, 2, 8, 20
1217 HASH_DEFINE_REHASH_FN(PXH
, struct bgp_prefix
)
1220 bgp_init_prefix_table(struct bgp_channel
*c
)
1222 HASH_INIT(c
->prefix_hash
, c
->pool
, 8);
1224 c
->prefix_slab
= sl_new(c
->pool
, sizeof(struct bgp_prefix
) +
1225 net_addr_length
[c
->c
.net_type
]);
1228 static struct bgp_prefix
*
1229 bgp_get_prefix(struct bgp_channel
*c
, net_addr
*net
, u32 path_id
)
1231 u32 hash
= net_hash(net
) ^ u32_hash(path_id
);
1232 struct bgp_prefix
*px
= HASH_FIND(c
->prefix_hash
, PXH
, net
, path_id
, hash
);
1236 rem_node(&px
->buck_node
);
1240 px
= sl_alloc(c
->prefix_slab
);
1241 px
->buck_node
.next
= NULL
;
1242 px
->buck_node
.prev
= NULL
;
1244 px
->path_id
= path_id
;
1245 net_copy(px
->net
, net
);
1247 HASH_INSERT2(c
->prefix_hash
, PXH
, c
->pool
, px
);
1253 bgp_free_prefix(struct bgp_channel
*c
, struct bgp_prefix
*px
)
1255 rem_node(&px
->buck_node
);
1256 HASH_REMOVE2(c
->prefix_hash
, PXH
, c
->pool
, px
);
1257 sl_free(c
->prefix_slab
, px
);
1266 bgp_import_control(struct proto
*P
, rte
**new, ea_list
**attrs UNUSED
, struct linpool
*pool UNUSED
)
1269 struct proto
*SRC
= e
->attrs
->src
->proto
;
1270 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1271 struct bgp_proto
*src
= (SRC
->proto
== &proto_bgp
) ? (struct bgp_proto
*) SRC
: NULL
;
1273 /* Reject our routes */
1277 /* Accept non-BGP routes */
1281 /* IBGP route reflection, RFC 4456 */
1282 if (p
->is_internal
&& src
->is_internal
&& (p
->local_as
== src
->local_as
))
1284 /* Rejected unless configured as route reflector */
1285 if (!p
->rr_client
&& !src
->rr_client
)
1288 /* Generally, this should be handled when path is received, but we check it
1289 also here as rr_cluster_id may be undefined or different in src. */
1290 if (p
->rr_cluster_id
&& bgp_cluster_list_loopy(p
, e
->attrs
->eattrs
))
1294 /* Handle well-known communities, RFC 1997 */
1296 if (p
->cf
->interpret_communities
&&
1297 (c
= ea_find(e
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_COMMUNITY
))))
1299 struct adata
*d
= c
->u
.ptr
;
1301 /* Do not export anywhere */
1302 if (int_set_contains(d
, BGP_COMM_NO_ADVERTISE
))
1305 /* Do not export outside of AS (or member-AS) */
1306 if (!p
->is_internal
&& int_set_contains(d
, BGP_COMM_NO_EXPORT_SUBCONFED
))
1309 /* Do not export outside of AS (or confederation) */
1310 if (!p
->is_interior
&& int_set_contains(d
, BGP_COMM_NO_EXPORT
))
1317 static const adata null_adata
; /* adata of length 0 */
1320 bgp_path_prepend(ea_list
**attrs
, struct linpool
*pool
, int seg
, u32 as
, int strip
)
1322 eattr
*a
= bgp_find_attr(*attrs
, BA_AS_PATH
);
1323 adata
*d
= as_path_prepend2(pool
, a
? a
->u
.ptr
: &null_adata
, seg
, as
, strip
);
1324 bgp_set_attr_ptr(attrs
, pool
, BA_AS_PATH
, 0, d
);
1328 bgp_cluster_list_prepend(ea_list
**attrs
, struct linpool
*pool
, u32 id
)
1330 eattr
*a
= bgp_find_attr(*attrs
, BA_CLUSTER_LIST
);
1331 adata
*d
= int_set_add(pool
, a
? a
->u
.ptr
: NULL
, id
);
1332 bgp_set_attr_ptr(attrs
, pool
, BA_CLUSTER_LIST
, 0, d
);
1336 bgp_update_attrs(struct bgp_proto
*p
, struct bgp_channel
*c
, rte
*e
, ea_list
*attrs
, struct linpool
*pool
)
1338 struct proto
*SRC
= e
->attrs
->src
->proto
;
1339 struct bgp_proto
*src
= (SRC
->proto
== &proto_bgp
) ? (void *) SRC
: NULL
;
1340 struct bgp_export_state s
= { .proto
= p
, .channel
=c
, .pool
= pool
, .src
= src
, .route
= e
};
1343 /* ORIGIN attribute - mandatory, attach if missing */
1344 if (! bgp_find_attr(attrs
, BA_ORIGIN
))
1345 bgp_set_attr_u32(&attrs
, pool
, BA_ORIGIN
, 0, src
? ORIGIN_INCOMPLETE
: ORIGIN_IGP
);
1347 /* AS_PATH attribute - keep or prepend ASN */
1348 if (p
->is_internal
||
1349 (p
->rs_client
&& src
&& src
->rs_client
))
1351 /* IBGP or route server -> just ensure there is one */
1352 if (! bgp_find_attr(attrs
, BA_AS_PATH
))
1353 bgp_set_attr_ptr(&attrs
, pool
, BA_AS_PATH
, 0, lp_alloc_adata(pool
, 0));
1355 else if (p
->is_interior
)
1357 /* Confederation -> prepend ASN as CONFED_SEQUENCE, keep CONFED_* segments */
1358 bgp_path_prepend(&attrs
, pool
, AS_PATH_CONFED_SEQUENCE
, p
->public_as
, 0);
1360 else /* Regular EBGP (no RS, no confederation) */
1362 /* Regular EBGP -> prepend ASN as regular segment, strip CONFED_* segments */
1363 bgp_path_prepend(&attrs
, pool
, AS_PATH_SEQUENCE
, p
->public_as
, 1);
1365 /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
1366 a
= bgp_find_attr(attrs
, BA_MULTI_EXIT_DISC
);
1367 if (a
&& !(a
->type
& EAF_FRESH
))
1368 bgp_unset_attr(&attrs
, pool
, BA_MULTI_EXIT_DISC
);
1371 /* NEXT_HOP attribute - delegated to AF-specific hook */
1372 a
= bgp_find_attr(attrs
, BA_NEXT_HOP
);
1373 bgp_update_next_hop(&s
, a
, &attrs
);
1375 /* LOCAL_PREF attribute - required for IBGP, attach if missing */
1376 if (p
->is_interior
&& ! bgp_find_attr(attrs
, BA_LOCAL_PREF
))
1377 bgp_set_attr_u32(&attrs
, pool
, BA_LOCAL_PREF
, 0, p
->cf
->default_local_pref
);
1379 /* IBGP route reflection, RFC 4456 */
1380 if (src
&& src
->is_internal
&& p
->is_internal
&& (src
->local_as
== p
->local_as
))
1382 /* ORIGINATOR_ID attribute - attach if not already set */
1383 if (! bgp_find_attr(attrs
, BA_ORIGINATOR_ID
))
1384 bgp_set_attr_u32(&attrs
, pool
, BA_ORIGINATOR_ID
, 0, src
->remote_id
);
1386 /* CLUSTER_LIST attribute - prepend cluster ID */
1387 if (src
->rr_cluster_id
)
1388 bgp_cluster_list_prepend(&attrs
, pool
, src
->rr_cluster_id
);
1390 /* Handle different src and dst cluster ID - prepend both ones */
1391 if (p
->rr_cluster_id
&& (src
->rr_cluster_id
!= p
->rr_cluster_id
))
1392 bgp_cluster_list_prepend(&attrs
, pool
, p
->rr_cluster_id
);
1395 /* AS4_* transition attributes, RFC 6793 4.2.2 */
1396 if (! p
->as4_session
)
1398 a
= bgp_find_attr(attrs
, BA_AS_PATH
);
1399 if (a
&& as_path_contains_as4(a
->u
.ptr
))
1401 bgp_set_attr_ptr(&attrs
, pool
, BA_AS_PATH
, 0, as_path_to_old(pool
, a
->u
.ptr
));
1402 bgp_set_attr_ptr(&attrs
, pool
, BA_AS4_PATH
, 0, as_path_strip_confed(pool
, a
->u
.ptr
));
1405 a
= bgp_find_attr(attrs
, BA_AGGREGATOR
);
1406 if (a
&& aggregator_contains_as4(a
->u
.ptr
))
1408 bgp_set_attr_ptr(&attrs
, pool
, BA_AGGREGATOR
, 0, aggregator_to_old(pool
, a
->u
.ptr
));
1409 bgp_set_attr_ptr(&attrs
, pool
, BA_AS4_AGGREGATOR
, 0, a
->u
.ptr
);
1413 /* Apply per-attribute export hooks for validatation and normalization */
1414 return bgp_export_attrs(&s
, attrs
);
1418 bgp_rt_notify(struct proto
*P
, struct channel
*C
, net
*n
, rte
*new, rte
*old
, ea_list
*attrs
)
1420 struct bgp_proto
*p
= (void *) P
;
1421 struct bgp_channel
*c
= (void *) C
;
1422 struct bgp_bucket
*buck
;
1423 struct bgp_prefix
*px
;
1428 attrs
= bgp_update_attrs(p
, c
, new, attrs
, bgp_linpool
);
1430 /* If attributes are invalid, we fail back to withdraw */
1431 buck
= attrs
? bgp_get_bucket(c
, attrs
) : bgp_get_withdraw_bucket(c
);
1432 path
= new->attrs
->src
->global_id
;
1434 lp_flush(bgp_linpool
);
1438 buck
= bgp_get_withdraw_bucket(c
);
1439 path
= old
->attrs
->src
->global_id
;
1442 px
= bgp_get_prefix(c
, n
->n
.addr
, c
->add_path_tx
? path
: 0);
1443 add_tail(&buck
->prefixes
, &px
->buck_node
);
1445 bgp_schedule_packet(p
->conn
, c
, PKT_UPDATE
);
1450 bgp_get_neighbor(rte
*r
)
1452 eattr
*e
= ea_find(r
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
1455 if (e
&& as_path_get_first(e
->u
.ptr
, &as
))
1458 return ((struct bgp_proto
*) r
->attrs
->src
->proto
)->remote_as
;
1462 rte_resolvable(rte
*rt
)
1464 return rt
->attrs
->dest
== RTD_UNICAST
;
1468 bgp_rte_better(rte
*new, rte
*old
)
1470 struct bgp_proto
*new_bgp
= (struct bgp_proto
*) new->attrs
->src
->proto
;
1471 struct bgp_proto
*old_bgp
= (struct bgp_proto
*) old
->attrs
->src
->proto
;
1475 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1476 n
= new->u
.bgp
.suppressed
;
1477 o
= old
->u
.bgp
.suppressed
;
1483 /* RFC 4271 9.1.2.1. Route resolvability test */
1484 n
= rte_resolvable(new);
1485 o
= rte_resolvable(old
);
1491 /* Start with local preferences */
1492 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_LOCAL_PREF
));
1493 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_LOCAL_PREF
));
1494 n
= x
? x
->u
.data
: new_bgp
->cf
->default_local_pref
;
1495 o
= y
? y
->u
.data
: old_bgp
->cf
->default_local_pref
;
1501 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1502 if (new_bgp
->cf
->compare_path_lengths
|| old_bgp
->cf
->compare_path_lengths
)
1504 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
1505 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
1506 n
= x
? as_path_getlen(x
->u
.ptr
) : AS_PATH_MAXLEN
;
1507 o
= y
? as_path_getlen(y
->u
.ptr
) : AS_PATH_MAXLEN
;
1514 /* RFC 4271 9.1.2.2. b) Use origins */
1515 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGIN
));
1516 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGIN
));
1517 n
= x
? x
->u
.data
: ORIGIN_INCOMPLETE
;
1518 o
= y
? y
->u
.data
: ORIGIN_INCOMPLETE
;
1524 /* RFC 4271 9.1.2.2. c) Compare MED's */
1525 /* Proper RFC 4271 path selection cannot be interpreted as finding
1526 * the best path in some ordering. It is implemented partially in
1527 * bgp_rte_recalculate() when deterministic_med option is
1528 * active. Without that option, the behavior is just an
1529 * approximation, which in specific situations may lead to
1530 * persistent routing loops, because it is nondeterministic - it
1531 * depends on the order in which routes appeared. But it is also the
1532 * same behavior as used by default in Cisco routers, so it is
1533 * probably not a big issue.
1535 if (new_bgp
->cf
->med_metric
|| old_bgp
->cf
->med_metric
||
1536 (bgp_get_neighbor(new) == bgp_get_neighbor(old
)))
1538 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_MULTI_EXIT_DISC
));
1539 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_MULTI_EXIT_DISC
));
1540 n
= x
? x
->u
.data
: new_bgp
->cf
->default_med
;
1541 o
= y
? y
->u
.data
: old_bgp
->cf
->default_med
;
1548 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1549 if (new_bgp
->is_interior
> old_bgp
->is_interior
)
1551 if (new_bgp
->is_interior
< old_bgp
->is_interior
)
1554 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1555 n
= new_bgp
->cf
->igp_metric
? new->attrs
->igp_metric
: 0;
1556 o
= old_bgp
->cf
->igp_metric
? old
->attrs
->igp_metric
: 0;
1562 /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
1563 /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
1564 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGINATOR_ID
));
1565 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGINATOR_ID
));
1566 n
= x
? x
->u
.data
: new_bgp
->remote_id
;
1567 o
= y
? y
->u
.data
: old_bgp
->remote_id
;
1569 /* RFC 5004 - prefer older routes */
1570 /* (if both are external and from different peer) */
1571 if ((new_bgp
->cf
->prefer_older
|| old_bgp
->cf
->prefer_older
) &&
1572 !new_bgp
->is_internal
&& n
!= o
)
1575 /* rest of RFC 4271 9.1.2.2. f) */
1581 /* RFC 4456 9. b) Compare cluster list lengths */
1582 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_CLUSTER_LIST
));
1583 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_CLUSTER_LIST
));
1584 n
= x
? int_set_get_size(x
->u
.ptr
) : 0;
1585 o
= y
? int_set_get_size(y
->u
.ptr
) : 0;
1591 /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
1592 return (ipa_compare(new_bgp
->cf
->remote_ip
, old_bgp
->cf
->remote_ip
) < 0);
1597 bgp_rte_mergable(rte
*pri
, rte
*sec
)
1599 struct bgp_proto
*pri_bgp
= (struct bgp_proto
*) pri
->attrs
->src
->proto
;
1600 struct bgp_proto
*sec_bgp
= (struct bgp_proto
*) sec
->attrs
->src
->proto
;
1604 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1605 if (pri
->u
.bgp
.suppressed
!= sec
->u
.bgp
.suppressed
)
1608 /* RFC 4271 9.1.2.1. Route resolvability test */
1609 if (!rte_resolvable(sec
))
1612 /* Start with local preferences */
1613 x
= ea_find(pri
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_LOCAL_PREF
));
1614 y
= ea_find(sec
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_LOCAL_PREF
));
1615 p
= x
? x
->u
.data
: pri_bgp
->cf
->default_local_pref
;
1616 s
= y
? y
->u
.data
: sec_bgp
->cf
->default_local_pref
;
1620 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1621 if (pri_bgp
->cf
->compare_path_lengths
|| sec_bgp
->cf
->compare_path_lengths
)
1623 x
= ea_find(pri
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
1624 y
= ea_find(sec
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
1625 p
= x
? as_path_getlen(x
->u
.ptr
) : AS_PATH_MAXLEN
;
1626 s
= y
? as_path_getlen(y
->u
.ptr
) : AS_PATH_MAXLEN
;
1631 // if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
1635 /* RFC 4271 9.1.2.2. b) Use origins */
1636 x
= ea_find(pri
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGIN
));
1637 y
= ea_find(sec
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGIN
));
1638 p
= x
? x
->u
.data
: ORIGIN_INCOMPLETE
;
1639 s
= y
? y
->u
.data
: ORIGIN_INCOMPLETE
;
1643 /* RFC 4271 9.1.2.2. c) Compare MED's */
1644 if (pri_bgp
->cf
->med_metric
|| sec_bgp
->cf
->med_metric
||
1645 (bgp_get_neighbor(pri
) == bgp_get_neighbor(sec
)))
1647 x
= ea_find(pri
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_MULTI_EXIT_DISC
));
1648 y
= ea_find(sec
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_MULTI_EXIT_DISC
));
1649 p
= x
? x
->u
.data
: pri_bgp
->cf
->default_med
;
1650 s
= y
? y
->u
.data
: sec_bgp
->cf
->default_med
;
1655 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1656 if (pri_bgp
->is_internal
!= sec_bgp
->is_internal
)
1659 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1660 p
= pri_bgp
->cf
->igp_metric
? pri
->attrs
->igp_metric
: 0;
1661 s
= sec_bgp
->cf
->igp_metric
? sec
->attrs
->igp_metric
: 0;
1665 /* Remaining criteria are ignored */
1672 same_group(rte
*r
, u32 lpref
, u32 lasn
)
1674 return (r
->pref
== lpref
) && (bgp_get_neighbor(r
) == lasn
);
1678 use_deterministic_med(rte
*r
)
1680 struct proto
*P
= r
->attrs
->src
->proto
;
1681 return (P
->proto
== &proto_bgp
) && ((struct bgp_proto
*) P
)->cf
->deterministic_med
;
1685 bgp_rte_recalculate(rtable
*table
, net
*net
, rte
*new, rte
*old
, rte
*old_best
)
1688 rte
*key
= new ? new : old
;
1689 u32 lpref
= key
->pref
;
1690 u32 lasn
= bgp_get_neighbor(key
);
1691 int old_is_group_best
= 0;
1694 * Proper RFC 4271 path selection is a bit complicated, it cannot be
1695 * implemented just by rte_better(), because it is not a linear
1696 * ordering. But it can be splitted to two levels, where the lower
1697 * level chooses the best routes in each group of routes from the
1698 * same neighboring AS and higher level chooses the best route (with
1699 * a slightly different ordering) between the best-in-group routes.
1701 * When deterministic_med is disabled, we just ignore this issue and
1702 * choose the best route by bgp_rte_better() alone. If enabled, the
1703 * lower level of the route selection is done here (for the group
1704 * to which the changed route belongs), all routes in group are
1705 * marked as suppressed, just chosen best-in-group is not.
1707 * Global best route selection then implements higher level by
1708 * choosing between non-suppressed routes (as they are always
1709 * preferred over suppressed routes). Routes from BGP protocols
1710 * that do not set deterministic_med are just never suppressed. As
1711 * they do not participate in the lower level selection, it is OK
1712 * that this fn is not called for them.
1714 * The idea is simple, the implementation is more problematic,
1715 * mostly because of optimizations in rte_recalculate() that
1716 * avoids full recalculation in most cases.
1718 * We can assume that at least one of new, old is non-NULL and both
1719 * are from the same protocol with enabled deterministic_med. We
1720 * group routes by both neighbor AS (lasn) and preference (lpref),
1721 * because bgp_rte_better() does not handle preference itself.
1724 /* If new and old are from different groups, we just process that
1725 as two independent events */
1726 if (new && old
&& !same_group(old
, lpref
, lasn
))
1729 i1
= bgp_rte_recalculate(table
, net
, NULL
, old
, old_best
);
1730 i2
= bgp_rte_recalculate(table
, net
, new, NULL
, old_best
);
1735 * We could find the best-in-group and then make some shortcuts like
1736 * in rte_recalculate, but as we would have to walk through all
1737 * net->routes just to find it, it is probably not worth. So we
1738 * just have two simpler fast cases that use just the old route.
1739 * We also set suppressed flag to avoid using it in bgp_rte_better().
1743 new->u
.bgp
.suppressed
= 1;
1747 old_is_group_best
= !old
->u
.bgp
.suppressed
;
1748 old
->u
.bgp
.suppressed
= 1;
1749 int new_is_better
= new && bgp_rte_better(new, old
);
1751 /* The first case - replace not best with worse (or remove not best) */
1752 if (!old_is_group_best
&& !new_is_better
)
1755 /* The second case - replace the best with better */
1756 if (old_is_group_best
&& new_is_better
)
1758 /* new is best-in-group, the see discussion below - this is
1759 a special variant of NBG && OBG. From OBG we can deduce
1760 that same_group(old_best) iff (old == old_best) */
1761 new->u
.bgp
.suppressed
= 0;
1762 return (old
== old_best
);
1766 /* The default case - find a new best-in-group route */
1767 r
= new; /* new may not be in the list */
1768 for (s
=net
->routes
; rte_is_valid(s
); s
=s
->next
)
1769 if (use_deterministic_med(s
) && same_group(s
, lpref
, lasn
))
1771 s
->u
.bgp
.suppressed
= 1;
1772 if (!r
|| bgp_rte_better(s
, r
))
1776 /* Simple case - the last route in group disappears */
1780 /* Found best-in-group */
1781 r
->u
.bgp
.suppressed
= 0;
1784 * There are generally two reasons why we have to force
1785 * recalculation (return 1): First, the new route may be wrongfully
1786 * chosen to be the best in the first case check in
1787 * rte_recalculate(), this may happen only if old_best is from the
1788 * same group. Second, another (different than new route)
1789 * best-in-group is chosen and that may be the proper best (although
1790 * rte_recalculate() without ignore that possibility).
1792 * There are three possible cases according to whether the old route
1793 * was the best in group (OBG, stored in old_is_group_best) and
1794 * whether the new route is the best in group (NBG, tested by r == new).
1795 * These cases work even if old or new is NULL.
1797 * NBG -> new is a possible candidate for the best route, so we just
1798 * check for the first reason using same_group().
1800 * !NBG && OBG -> Second reason applies, return 1
1802 * !NBG && !OBG -> Best in group does not change, old != old_best,
1803 * rte_better(new, old_best) is false and therefore
1804 * the first reason does not apply, return 0
1808 return old_best
&& same_group(old_best
, lpref
, lasn
);
1810 return old_is_group_best
;
1815 * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
1818 bgp_process_as4_attrs(ea_list
**attrs
, struct linpool
*pool
)
1820 eattr
*p2
= bgp_find_attr(*attrs
, BA_AS_PATH
);
1821 eattr
*p4
= bgp_find_attr(*attrs
, BA_AS4_PATH
);
1822 eattr
*a2
= bgp_find_attr(*attrs
, BA_AGGREGATOR
);
1823 eattr
*a4
= bgp_find_attr(*attrs
, BA_AS4_AGGREGATOR
);
1825 /* First, unset AS4_* attributes */
1826 if (p4
) bgp_unset_attr(attrs
, pool
, BA_AS4_PATH
);
1827 if (a4
) bgp_unset_attr(attrs
, pool
, BA_AS4_AGGREGATOR
);
1829 /* Handle AGGREGATOR attribute */
1832 u32 a2_asn
= get_u32(a2
->u
.ptr
->data
);
1834 /* If routes were aggregated by an old router, then AS4_PATH and
1835 AS4_AGGREGATOR are invalid. In that case we give up. */
1836 if (a2_asn
!= AS_TRANS
)
1839 /* Use AS4_AGGREGATOR instead of AGGREGATOR */
1840 a2
->u
.ptr
= a4
->u
.ptr
;
1843 /* Handle AS_PATH attribute */
1846 int p2_len
= as_path_getlen(p2
->u
.ptr
);
1847 int p4_len
= as_path_getlen(p4
->u
.ptr
);
1849 /* AS_PATH is too short, give up */
1850 if (p2_len
< p4_len
)
1853 /* Merge AS_PATH and AS4_PATH */
1854 as_path_cut(p2
->u
.ptr
, p2_len
- p4_len
);
1855 p2
->u
.ptr
= as_path_merge(pool
, p2
->u
.ptr
, p4
->u
.ptr
);
1860 bgp_get_attr(eattr
*a
, byte
*buf
, int buflen
)
1862 uint i
= EA_ID(a
->id
);
1863 const struct bgp_attr_desc
*d
;
1866 if (bgp_attr_known(i
))
1868 d
= &bgp_attr_table
[i
];
1869 len
= bsprintf(buf
, "%s", d
->name
);
1875 d
->format(a
, buf
, buflen
- len
- 2);
1881 bsprintf(buf
, "%02x%s", i
, (a
->flags
& BAF_TRANSITIVE
) ? " [t]" : "");
1886 bgp_get_route_info(rte
*e
, byte
*buf
, ea_list
*attrs
)
1888 eattr
*p
= ea_find(attrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
1889 eattr
*o
= ea_find(attrs
, EA_CODE(EAP_BGP
, BA_ORIGIN
));
1892 buf
+= bsprintf(buf
, " (%d", e
->pref
);
1894 if (e
->u
.bgp
.suppressed
)
1895 buf
+= bsprintf(buf
, "-");
1897 if (e
->attrs
->hostentry
)
1899 if (!rte_resolvable(e
))
1900 buf
+= bsprintf(buf
, "/-");
1901 else if (e
->attrs
->igp_metric
>= IGP_METRIC_UNKNOWN
)
1902 buf
+= bsprintf(buf
, "/?");
1904 buf
+= bsprintf(buf
, "/%d", e
->attrs
->igp_metric
);
1906 buf
+= bsprintf(buf
, ") [");
1908 if (p
&& as_path_get_last(p
->u
.ptr
, &origas
))
1909 buf
+= bsprintf(buf
, "AS%u", origas
);
1911 buf
+= bsprintf(buf
, "%c", "ie?"[o
->u
.data
]);