2 * BIRD -- BGP Attributes
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
8 * Can be freely distributed and used under the terms of the GNU GPL.
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "conf/conf.h"
21 #include "lib/resource.h"
22 #include "lib/string.h"
23 #include "lib/unaligned.h"
28 * UPDATE message error handling
30 * All checks from RFC 4271 6.3 are done as specified with these exceptions:
31 * - The semantic check of an IP address from NEXT_HOP attribute is missing.
32 * - Checks of some optional attribute values are missing.
33 * - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
34 * are probably inadequate.
36 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
37 * 4271 does not explicitly specify the behavior in that case.
39 * Loop detection related to route reflection (based on ORIGINATOR_ID
40 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
41 * specifies that such updates should be ignored, but that is generally
44 * BGP attribute table has several hooks:
46 * export - Hook that validates and normalizes attribute during export phase.
47 * Receives eattr, may modify it (e.g., sort community lists for canonical
48 * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
49 * necessary. May assume that eattr has value valid w.r.t. its type, but may be
50 * invalid w.r.t. BGP constraints. Optional.
52 * encode - Hook that converts internal representation to external one during
53 * packet writing. Receives eattr and puts it in the buffer (including attribute
54 * header). Returns number of bytes, or -1 if not enough space. May assume that
55 * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
56 * for all known attributes that exist internally after export phase (i.e., all
57 * except pseudoattributes MP_(UN)REACH_NLRI).
59 * decode - Hook that converts external representation to internal one during
60 * packet parsing. Receives attribute data in buffer, validates it and adds
61 * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
62 * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
64 * format - Optional hook that converts eattr to textual representation.
68 struct bgp_attr_desc
{
72 void (*export
)(struct bgp_export_state
*s
, eattr
*a
);
73 int (*encode
)(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size
);
74 void (*decode
)(struct bgp_parse_state
*s
, uint code
, uint flags
, byte
*data
, uint len
, ea_list
**to
);
75 void (*format
)(eattr
*ea
, byte
*buf
, uint size
);
78 static const struct bgp_attr_desc bgp_attr_table
[];
80 static inline int bgp_attr_known(uint code
);
83 bgp_set_attr(ea_list
**attrs
, struct linpool
*pool
, uint code
, uint flags
, uintptr_t val
)
85 ASSERT(bgp_attr_known(code
));
90 EA_CODE(PROTOCOL_BGP
, code
),
92 bgp_attr_table
[code
].type
,
99 #define REPORT(msg, args...) \
100 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
102 #define DISCARD(msg, args...) \
103 ({ REPORT(msg, ## args); return; })
105 #define WITHDRAW(msg, args...) \
106 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
109 ({ a->type = EAF_TYPE_UNDEF; return; })
111 #define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
112 #define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
113 #define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
114 #define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
115 #define NO_MANDATORY "Missing mandatory %s attribute"
119 bgp_put_attr_hdr3(byte
*buf
, uint code
, uint flags
, uint len
)
128 bgp_put_attr_hdr4(byte
*buf
, uint code
, uint flags
, uint len
)
130 *buf
++ = flags
| BAF_EXT_LEN
;
137 bgp_put_attr_hdr(byte
*buf
, uint code
, uint flags
, uint len
)
140 return bgp_put_attr_hdr3(buf
, code
, flags
, len
);
142 return bgp_put_attr_hdr4(buf
, code
, flags
, len
);
146 bgp_encode_u8(struct bgp_write_state
*s UNUSED
, eattr
*a
, byte
*buf
, uint size
)
151 bgp_put_attr_hdr3(buf
, EA_ID(a
->id
), a
->flags
, 1);
158 bgp_encode_u32(struct bgp_write_state
*s UNUSED
, eattr
*a
, byte
*buf
, uint size
)
163 bgp_put_attr_hdr3(buf
, EA_ID(a
->id
), a
->flags
, 4);
164 put_u32(buf
+3, a
->u
.data
);
170 bgp_encode_u32s(struct bgp_write_state
*s UNUSED
, eattr
*a
, byte
*buf
, uint size
)
172 uint len
= a
->u
.ptr
->length
;
177 uint hdr
= bgp_put_attr_hdr(buf
, EA_ID(a
->id
), a
->flags
, len
);
178 put_u32s(buf
+ hdr
, (u32
*) a
->u
.ptr
->data
, len
/ 4);
184 bgp_put_attr(byte
*buf
, uint size
, uint code
, uint flags
, const byte
*data
, uint len
)
189 uint hdr
= bgp_put_attr_hdr(buf
, code
, flags
, len
);
190 memcpy(buf
+ hdr
, data
, len
);
196 bgp_encode_raw(struct bgp_write_state
*s UNUSED
, eattr
*a
, byte
*buf
, uint size
)
198 return bgp_put_attr(buf
, size
, EA_ID(a
->id
), a
->flags
, a
->u
.ptr
->data
, a
->u
.ptr
->length
);
207 bgp_aigp_valid(byte
*data
, uint len
, char *err
, uint elen
)
210 char *err_dsc
= NULL
;
213 #define BAD(DSC,VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; })
217 BAD("TLV framing error", len
);
219 /* Process one TLV */
221 uint plen
= get_u16(pos
+ 1);
224 BAD("TLV framing error", plen
);
227 BAD("Bad TLV length", plen
);
229 if ((ptype
== BGP_AIGP_METRIC
) && (plen
!= 11))
230 BAD("Bad AIGP TLV length", plen
);
232 ADVANCE(pos
, len
, plen
);
240 if (bsnprintf(err
, elen
, "%s (%u) at %d", err_dsc
, err_val
, (int) (pos
- data
)) < 0)
247 bgp_aigp_get_tlv(const struct adata
*ad
, uint type
)
252 uint len
= ad
->length
;
253 const byte
*pos
= ad
->data
;
258 uint plen
= get_u16(pos
+ 1);
263 ADVANCE(pos
, len
, plen
);
269 static const struct adata
*
270 bgp_aigp_set_tlv(struct linpool
*pool
, const struct adata
*ad
, uint type
, byte
*data
, uint dlen
)
272 uint len
= ad
? ad
->length
: 0;
273 const byte
*pos
= ad
? ad
->data
: NULL
;
274 struct adata
*res
= lp_alloc_adata(pool
, len
+ 3 + dlen
);
275 byte
*dst
= res
->data
;
282 uint plen
= get_u16(pos
+ 1);
284 /* Find position for new TLV */
285 if ((ptype
>= type
) && !tlv
)
291 /* Skip first matching TLV, copy others */
292 if ((ptype
== type
) && !del
)
296 memcpy(dst
, pos
, plen
);
300 ADVANCE(pos
, len
, plen
);
310 put_u8(tlv
+ 0, type
);
311 put_u16(tlv
+ 1, 3 + dlen
);
312 memcpy(tlv
+ 3, data
, dlen
);
315 res
->length
= dst
- res
->data
;
321 bgp_aigp_get_metric(const struct adata
*ad
, u64 def
)
323 const byte
*b
= bgp_aigp_get_tlv(ad
, BGP_AIGP_METRIC
);
324 return b
? get_u64(b
+ 3) : def
;
327 static const struct adata
*
328 bgp_aigp_set_metric(struct linpool
*pool
, const struct adata
*ad
, u64 metric
)
331 put_u64(data
, metric
);
332 return bgp_aigp_set_tlv(pool
, ad
, BGP_AIGP_METRIC
, data
, 8);
336 bgp_total_aigp_metric_(rte
*e
, u64
*metric
, const struct adata
**ad
)
338 eattr
*a
= ea_find(e
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_AIGP
));
342 const byte
*b
= bgp_aigp_get_tlv(a
->u
.ptr
, BGP_AIGP_METRIC
);
346 u64 aigp
= get_u64(b
+ 3);
347 u64 step
= e
->attrs
->igp_metric
;
349 if (!rte_resolvable(e
) || (step
>= IGP_METRIC_UNKNOWN
))
356 *metric
= aigp
+ step
;
358 *metric
= BGP_AIGP_MAX
;
364 bgp_init_aigp_metric(rte
*e
, u64
*metric
, const struct adata
**ad
)
366 if (e
->attrs
->source
== RTS_BGP
)
369 *metric
= rt_get_igp_metric(e
);
371 return *metric
< IGP_METRIC_UNKNOWN
;
380 bgp_export_origin(struct bgp_export_state
*s
, eattr
*a
)
383 WITHDRAW(BAD_VALUE
, "ORIGIN", a
->u
.data
);
387 bgp_decode_origin(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
390 WITHDRAW(BAD_LENGTH
, "ORIGIN", len
);
393 WITHDRAW(BAD_VALUE
, "ORIGIN", data
[0]);
395 bgp_set_attr_u32(to
, s
->pool
, BA_ORIGIN
, flags
, data
[0]);
399 bgp_format_origin(eattr
*a
, byte
*buf
, uint size UNUSED
)
401 static const char *bgp_origin_names
[] = { "IGP", "EGP", "Incomplete" };
403 bsprintf(buf
, (a
->u
.data
<= 2) ? bgp_origin_names
[a
->u
.data
] : "?");
408 bgp_as_path_first_as_equal(const byte
*data
, uint len
, u32 asn
)
411 ((data
[0] == AS_PATH_SEQUENCE
) || (data
[0] == AS_PATH_CONFED_SEQUENCE
)) &&
413 (get_u32(data
+2) == asn
);
417 bgp_encode_as_path(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size
)
419 const byte
*data
= a
->u
.ptr
->data
;
420 uint len
= a
->u
.ptr
->length
;
424 /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
425 byte
*dst
= alloca(len
);
426 len
= as_path_32to16(dst
, data
, len
);
430 return bgp_put_attr(buf
, size
, BA_AS_PATH
, a
->flags
, data
, len
);
434 bgp_decode_as_path(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
436 struct bgp_proto
*p
= s
->proto
;
437 int as_length
= s
->as4_session
? 4 : 2;
438 int as_sets
= p
->cf
->allow_as_sets
;
439 int as_confed
= p
->cf
->confederation
&& p
->is_interior
;
442 if (!as_path_valid(data
, len
, as_length
, as_sets
, as_confed
, err
, sizeof(err
)))
443 WITHDRAW("Malformed AS_PATH attribute - %s", err
);
447 /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
449 data
= alloca(2*len
);
450 len
= as_path_16to32(data
, src
, len
);
453 /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
454 if (p
->is_interior
&& !p
->is_internal
&&
455 ((len
< 2) || (data
[0] != AS_PATH_CONFED_SEQUENCE
)))
456 WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
458 /* Reject routes with first AS in AS_PATH not matching neighbor AS; RFC 4271 6.3 */
459 if (!p
->is_internal
&& p
->cf
->enforce_first_as
&&
460 !bgp_as_path_first_as_equal(data
, len
, p
->remote_as
))
461 WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS");
463 bgp_set_attr_data(to
, s
->pool
, BA_AS_PATH
, flags
, data
, len
);
468 bgp_encode_next_hop(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size
)
471 * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
472 * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
473 * store it and encode it later by AFI-specific hooks.
478 // ASSERT(a->u.ptr->length == sizeof(ip_addr));
480 /* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */
481 ip_addr
*addr
= (void *) a
->u
.ptr
->data
;
482 if ((a
->u
.ptr
->length
!= sizeof(ip_addr
)) || !ipa_is_ip4(*addr
))
488 bgp_put_attr_hdr3(buf
, BA_NEXT_HOP
, a
->flags
, 4);
489 put_ip4(buf
+3, ipa_to_ip4(*addr
));
501 bgp_decode_next_hop(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags UNUSED
, byte
*data
, uint len
, ea_list
**to UNUSED
)
504 WITHDRAW(BAD_LENGTH
, "NEXT_HOP", len
);
506 /* Semantic checks are done later */
507 s
->ip_next_hop_len
= len
;
508 s
->ip_next_hop_data
= data
;
511 /* TODO: This function should use AF-specific hook */
513 bgp_format_next_hop(eattr
*a
, byte
*buf
, uint size UNUSED
)
515 ip_addr
*nh
= (void *) a
->u
.ptr
->data
;
516 uint len
= a
->u
.ptr
->length
;
518 ASSERT((len
== 16) || (len
== 32));
520 /* in IPv6, we may have two addresses in NEXT HOP */
521 if ((len
== 16) || ipa_zero(nh
[1]))
522 bsprintf(buf
, "%I", nh
[0]);
524 bsprintf(buf
, "%I %I", nh
[0], nh
[1]);
529 bgp_decode_med(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
532 WITHDRAW(BAD_LENGTH
, "MULTI_EXIT_DISC", len
);
534 u32 val
= get_u32(data
);
535 bgp_set_attr_u32(to
, s
->pool
, BA_MULTI_EXIT_DISC
, flags
, val
);
540 bgp_export_local_pref(struct bgp_export_state
*s
, eattr
*a
)
542 if (!s
->proto
->is_interior
&& !s
->proto
->cf
->allow_local_pref
)
547 bgp_decode_local_pref(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
549 if (!s
->proto
->is_interior
&& !s
->proto
->cf
->allow_local_pref
)
550 DISCARD(BAD_EBGP
, "LOCAL_PREF");
553 WITHDRAW(BAD_LENGTH
, "LOCAL_PREF", len
);
555 u32 val
= get_u32(data
);
556 bgp_set_attr_u32(to
, s
->pool
, BA_LOCAL_PREF
, flags
, val
);
561 bgp_decode_atomic_aggr(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data UNUSED
, uint len
, ea_list
**to
)
564 DISCARD(BAD_LENGTH
, "ATOMIC_AGGR", len
);
566 bgp_set_attr_data(to
, s
->pool
, BA_ATOMIC_AGGR
, flags
, NULL
, 0);
570 bgp_encode_aggregator(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size
)
572 const byte
*data
= a
->u
.ptr
->data
;
573 uint len
= a
->u
.ptr
->length
;
577 /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
578 byte
*dst
= alloca(6);
579 len
= aggregator_32to16(dst
, data
);
582 return bgp_put_attr(buf
, size
, BA_AGGREGATOR
, a
->flags
, data
, len
);
586 bgp_decode_aggregator(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
588 if (len
!= (s
->as4_session
? 8 : 6))
589 DISCARD(BAD_LENGTH
, "AGGREGATOR", len
);
593 /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
596 len
= aggregator_16to32(data
, src
);
599 bgp_set_attr_data(to
, s
->pool
, BA_AGGREGATOR
, flags
, data
, len
);
603 bgp_format_aggregator(eattr
*a
, byte
*buf
, uint size UNUSED
)
605 const byte
*data
= a
->u
.ptr
->data
;
607 bsprintf(buf
, "%I4 AS%u", get_ip4(data
+4), get_u32(data
+0));
612 bgp_export_community(struct bgp_export_state
*s
, eattr
*a
)
614 if (a
->u
.ptr
->length
== 0)
617 a
->u
.ptr
= int_set_sort(s
->pool
, a
->u
.ptr
);
621 bgp_decode_community(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
623 if (!len
|| (len
% 4))
624 WITHDRAW(BAD_LENGTH
, "COMMUNITY", len
);
626 struct adata
*ad
= lp_alloc_adata(s
->pool
, len
);
627 get_u32s(data
, (u32
*) ad
->data
, len
/ 4);
628 bgp_set_attr_ptr(to
, s
->pool
, BA_COMMUNITY
, flags
, ad
);
633 bgp_export_originator_id(struct bgp_export_state
*s
, eattr
*a
)
635 if (!s
->proto
->is_internal
)
640 bgp_decode_originator_id(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
642 if (!s
->proto
->is_internal
)
643 DISCARD(BAD_EBGP
, "ORIGINATOR_ID");
646 WITHDRAW(BAD_LENGTH
, "ORIGINATOR_ID", len
);
648 u32 val
= get_u32(data
);
649 bgp_set_attr_u32(to
, s
->pool
, BA_ORIGINATOR_ID
, flags
, val
);
654 bgp_export_cluster_list(struct bgp_export_state
*s UNUSED
, eattr
*a
)
656 if (!s
->proto
->is_internal
)
659 if (a
->u
.ptr
->length
== 0)
664 bgp_decode_cluster_list(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
666 if (!s
->proto
->is_internal
)
667 DISCARD(BAD_EBGP
, "CLUSTER_LIST");
669 if (!len
|| (len
% 4))
670 WITHDRAW(BAD_LENGTH
, "CLUSTER_LIST", len
);
672 struct adata
*ad
= lp_alloc_adata(s
->pool
, len
);
673 get_u32s(data
, (u32
*) ad
->data
, len
/ 4);
674 bgp_set_attr_ptr(to
, s
->pool
, BA_CLUSTER_LIST
, flags
, ad
);
678 bgp_format_cluster_list(eattr
*a
, byte
*buf
, uint size
)
680 /* Truncates cluster lists larger than buflen, probably not a problem */
681 int_set_format(a
->u
.ptr
, 0, -1, buf
, size
);
688 return (get_u16(buf
) << 16) | buf
[2];
692 bgp_decode_mp_reach_nlri(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags UNUSED
, byte
*data
, uint len
, ea_list
**to UNUSED
)
695 * 2 B MP_REACH_NLRI data - Address Family Identifier
696 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
697 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
698 * var MP_REACH_NLRI data - Network Address of Next Hop
699 * 1 B MP_REACH_NLRI data - Reserved (zero)
700 * var MP_REACH_NLRI data - Network Layer Reachability Information
703 if ((len
< 5) || (len
< (5 + (uint
) data
[3])))
704 bgp_parse_error(s
, 9);
706 s
->mp_reach_af
= get_af3(data
);
707 s
->mp_next_hop_len
= data
[3];
708 s
->mp_next_hop_data
= data
+ 4;
709 s
->mp_reach_len
= len
- 5 - s
->mp_next_hop_len
;
710 s
->mp_reach_nlri
= data
+ 5 + s
->mp_next_hop_len
;
715 bgp_decode_mp_unreach_nlri(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags UNUSED
, byte
*data
, uint len
, ea_list
**to UNUSED
)
718 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
719 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
720 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
724 bgp_parse_error(s
, 9);
726 s
->mp_unreach_af
= get_af3(data
);
727 s
->mp_unreach_len
= len
- 3;
728 s
->mp_unreach_nlri
= data
+ 3;
733 bgp_export_ext_community(struct bgp_export_state
*s
, eattr
*a
)
735 if (!s
->proto
->is_interior
)
737 struct adata
*ad
= ec_set_del_nontrans(s
->pool
, a
->u
.ptr
);
747 if (a
->u
.ptr
->length
== 0)
750 a
->u
.ptr
= ec_set_sort(s
->pool
, a
->u
.ptr
);
755 bgp_decode_ext_community(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
757 if (!len
|| (len
% 8))
758 WITHDRAW(BAD_LENGTH
, "EXT_COMMUNITY", len
);
760 struct adata
*ad
= lp_alloc_adata(s
->pool
, len
);
761 get_u32s(data
, (u32
*) ad
->data
, len
/ 4);
762 bgp_set_attr_ptr(to
, s
->pool
, BA_EXT_COMMUNITY
, flags
, ad
);
767 bgp_decode_as4_aggregator(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
770 DISCARD(NEW_BGP
, "AS4_AGGREGATOR");
773 DISCARD(BAD_LENGTH
, "AS4_AGGREGATOR", len
);
775 bgp_set_attr_data(to
, s
->pool
, BA_AS4_AGGREGATOR
, flags
, data
, len
);
779 bgp_decode_as4_path(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
781 struct bgp_proto
*p
= s
->proto
;
782 int sets
= p
->cf
->allow_as_sets
;
787 DISCARD(NEW_BGP
, "AS4_PATH");
790 DISCARD(BAD_LENGTH
, "AS4_PATH", len
);
792 if (!as_path_valid(data
, len
, 4, sets
, 1, err
, sizeof(err
)))
793 DISCARD("Malformed AS4_PATH attribute - %s", err
);
795 struct adata
*a
= lp_alloc_adata(s
->pool
, len
);
796 memcpy(a
->data
, data
, len
);
798 /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
799 if (as_path_contains_confed(a
))
801 REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
802 a
= as_path_strip_confed(s
->pool
, a
);
805 bgp_set_attr_ptr(to
, s
->pool
, BA_AS4_PATH
, flags
, a
);
810 bgp_export_aigp(struct bgp_export_state
*s
, eattr
*a
)
812 if (!s
->channel
->cf
->aigp
)
817 bgp_decode_aigp(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
821 /* Acceptability test postponed to bgp_finish_attrs() */
823 if ((flags
^ bgp_attr_table
[BA_AIGP
].flags
) & (BAF_OPTIONAL
| BAF_TRANSITIVE
))
824 DISCARD("Malformed AIGP attribute - conflicting flags (%02x)", flags
);
826 if (!bgp_aigp_valid(data
, len
, err
, sizeof(err
)))
827 DISCARD("Malformed AIGP attribute - %s", err
);
829 bgp_set_attr_data(to
, s
->pool
, BA_AIGP
, flags
, data
, len
);
833 bgp_format_aigp(eattr
*a
, byte
*buf
, uint size UNUSED
)
835 const byte
*b
= bgp_aigp_get_tlv(a
->u
.ptr
, BGP_AIGP_METRIC
);
840 bsprintf(buf
, "%lu", get_u64(b
+ 3));
845 bgp_export_large_community(struct bgp_export_state
*s
, eattr
*a
)
847 if (a
->u
.ptr
->length
== 0)
850 a
->u
.ptr
= lc_set_sort(s
->pool
, a
->u
.ptr
);
854 bgp_decode_large_community(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
856 if (!len
|| (len
% 12))
857 WITHDRAW(BAD_LENGTH
, "LARGE_COMMUNITY", len
);
859 struct adata
*ad
= lp_alloc_adata(s
->pool
, len
);
860 get_u32s(data
, (u32
*) ad
->data
, len
/ 4);
861 bgp_set_attr_ptr(to
, s
->pool
, BA_LARGE_COMMUNITY
, flags
, ad
);
865 bgp_export_mpls_label_stack(struct bgp_export_state
*s
, eattr
*a
)
867 net_addr
*n
= s
->route
->net
->n
.addr
;
868 u32
*labels
= (u32
*) a
->u
.ptr
->data
;
869 uint lnum
= a
->u
.ptr
->length
/ 4;
871 /* Perhaps we should just ignore it? */
873 WITHDRAW("Unexpected MPLS stack");
875 /* Empty MPLS stack is not allowed */
877 WITHDRAW("Malformed MPLS stack - empty");
879 /* This is ugly, but we must ensure that labels fit into NLRI field */
880 if ((24*lnum
+ (net_is_vpn(n
) ? 64 : 0) + net_pxlen(n
)) > 255)
881 WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum
);
883 for (uint i
= 0; i
< lnum
; i
++)
885 if (labels
[i
] > 0xfffff)
886 WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels
[i
]);
888 /* TODO: Check for special-purpose label values? */
893 bgp_encode_mpls_label_stack(struct bgp_write_state
*s
, eattr
*a
, byte
*buf UNUSED
, uint size UNUSED
)
896 * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
897 * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
900 s
->mpls_labels
= a
->u
.ptr
;
905 bgp_decode_mpls_label_stack(struct bgp_parse_state
*s
, uint code UNUSED
, uint flags UNUSED
, byte
*data UNUSED
, uint len UNUSED
, ea_list
**to UNUSED
)
907 DISCARD("Discarding received attribute #0");
911 bgp_format_mpls_label_stack(eattr
*a
, byte
*buf
, uint size
)
913 u32
*labels
= (u32
*) a
->u
.ptr
->data
;
914 uint lnum
= a
->u
.ptr
->length
/ 4;
917 for (uint i
= 0; i
< lnum
; i
++)
921 bsprintf(pos
, "...");
925 uint l
= bsprintf(pos
, "%d/", labels
[i
]);
926 ADVANCE(pos
, size
, l
);
929 /* Clear last slash or terminate empty string */
930 pos
[lnum
? -1 : 0] = 0;
934 bgp_decode_unknown(struct bgp_parse_state
*s
, uint code
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
936 /* Cannot use bgp_set_attr_data() as it works on known attributes only */
937 ea_set_attr_data(to
, s
->pool
, EA_CODE(PROTOCOL_BGP
, code
), flags
, EAF_TYPE_OPAQUE
, data
, len
);
945 static const struct bgp_attr_desc bgp_attr_table
[] = {
948 .type
= EAF_TYPE_INT
,
949 .flags
= BAF_TRANSITIVE
,
950 .export
= bgp_export_origin
,
951 .encode
= bgp_encode_u8
,
952 .decode
= bgp_decode_origin
,
953 .format
= bgp_format_origin
,
957 .type
= EAF_TYPE_AS_PATH
,
958 .flags
= BAF_TRANSITIVE
,
959 .encode
= bgp_encode_as_path
,
960 .decode
= bgp_decode_as_path
,
964 .type
= EAF_TYPE_IP_ADDRESS
,
965 .flags
= BAF_TRANSITIVE
,
966 .encode
= bgp_encode_next_hop
,
967 .decode
= bgp_decode_next_hop
,
968 .format
= bgp_format_next_hop
,
970 [BA_MULTI_EXIT_DISC
] = {
972 .type
= EAF_TYPE_INT
,
973 .flags
= BAF_OPTIONAL
,
974 .encode
= bgp_encode_u32
,
975 .decode
= bgp_decode_med
,
978 .name
= "local_pref",
979 .type
= EAF_TYPE_INT
,
980 .flags
= BAF_TRANSITIVE
,
981 .export
= bgp_export_local_pref
,
982 .encode
= bgp_encode_u32
,
983 .decode
= bgp_decode_local_pref
,
986 .name
= "atomic_aggr",
987 .type
= EAF_TYPE_OPAQUE
,
988 .flags
= BAF_TRANSITIVE
,
989 .encode
= bgp_encode_raw
,
990 .decode
= bgp_decode_atomic_aggr
,
993 .name
= "aggregator",
994 .type
= EAF_TYPE_OPAQUE
,
995 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
996 .encode
= bgp_encode_aggregator
,
997 .decode
= bgp_decode_aggregator
,
998 .format
= bgp_format_aggregator
,
1001 .name
= "community",
1002 .type
= EAF_TYPE_INT_SET
,
1003 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
1004 .export
= bgp_export_community
,
1005 .encode
= bgp_encode_u32s
,
1006 .decode
= bgp_decode_community
,
1008 [BA_ORIGINATOR_ID
] = {
1009 .name
= "originator_id",
1010 .type
= EAF_TYPE_ROUTER_ID
,
1011 .flags
= BAF_OPTIONAL
,
1012 .export
= bgp_export_originator_id
,
1013 .encode
= bgp_encode_u32
,
1014 .decode
= bgp_decode_originator_id
,
1016 [BA_CLUSTER_LIST
] = {
1017 .name
= "cluster_list",
1018 .type
= EAF_TYPE_INT_SET
,
1019 .flags
= BAF_OPTIONAL
,
1020 .export
= bgp_export_cluster_list
,
1021 .encode
= bgp_encode_u32s
,
1022 .decode
= bgp_decode_cluster_list
,
1023 .format
= bgp_format_cluster_list
,
1025 [BA_MP_REACH_NLRI
] = {
1026 .name
= "mp_reach_nlri",
1027 .type
= EAF_TYPE_OPAQUE
,
1028 .flags
= BAF_OPTIONAL
,
1029 .decode
= bgp_decode_mp_reach_nlri
,
1031 [BA_MP_UNREACH_NLRI
] = {
1032 .name
= "mp_unreach_nlri",
1033 .type
= EAF_TYPE_OPAQUE
,
1034 .flags
= BAF_OPTIONAL
,
1035 .decode
= bgp_decode_mp_unreach_nlri
,
1037 [BA_EXT_COMMUNITY
] = {
1038 .name
= "ext_community",
1039 .type
= EAF_TYPE_EC_SET
,
1040 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
1041 .export
= bgp_export_ext_community
,
1042 .encode
= bgp_encode_u32s
,
1043 .decode
= bgp_decode_ext_community
,
1047 .type
= EAF_TYPE_AS_PATH
,
1048 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
1049 .encode
= bgp_encode_raw
,
1050 .decode
= bgp_decode_as4_path
,
1052 [BA_AS4_AGGREGATOR
] = {
1053 .name
= "as4_aggregator",
1054 .type
= EAF_TYPE_OPAQUE
,
1055 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
1056 .encode
= bgp_encode_raw
,
1057 .decode
= bgp_decode_as4_aggregator
,
1058 .format
= bgp_format_aggregator
,
1062 .type
= EAF_TYPE_OPAQUE
,
1063 .flags
= BAF_OPTIONAL
| BAF_DECODE_FLAGS
,
1064 .export
= bgp_export_aigp
,
1065 .encode
= bgp_encode_raw
,
1066 .decode
= bgp_decode_aigp
,
1067 .format
= bgp_format_aigp
,
1069 [BA_LARGE_COMMUNITY
] = {
1070 .name
= "large_community",
1071 .type
= EAF_TYPE_LC_SET
,
1072 .flags
= BAF_OPTIONAL
| BAF_TRANSITIVE
,
1073 .export
= bgp_export_large_community
,
1074 .encode
= bgp_encode_u32s
,
1075 .decode
= bgp_decode_large_community
,
1077 [BA_MPLS_LABEL_STACK
] = {
1078 .name
= "mpls_label_stack",
1079 .type
= EAF_TYPE_INT_SET
,
1080 .export
= bgp_export_mpls_label_stack
,
1081 .encode
= bgp_encode_mpls_label_stack
,
1082 .decode
= bgp_decode_mpls_label_stack
,
1083 .format
= bgp_format_mpls_label_stack
,
1088 bgp_attr_known(uint code
)
1090 return (code
< ARRAY_SIZE(bgp_attr_table
)) && bgp_attr_table
[code
].name
;
1099 bgp_export_attr(struct bgp_export_state
*s
, eattr
*a
, ea_list
*to
)
1101 if (EA_PROTO(a
->id
) != PROTOCOL_BGP
)
1104 uint code
= EA_ID(a
->id
);
1106 if (bgp_attr_known(code
))
1108 const struct bgp_attr_desc
*desc
= &bgp_attr_table
[code
];
1110 /* The flags might have been zero if the attr was added by filters */
1111 a
->flags
= (a
->flags
& BAF_PARTIAL
) | desc
->flags
;
1113 /* Set partial bit if new opt-trans attribute is attached to non-local route */
1114 if ((s
->src
!= NULL
) && (a
->type
& EAF_ORIGINATED
) &&
1115 (a
->flags
& BAF_OPTIONAL
) && (a
->flags
& BAF_TRANSITIVE
))
1116 a
->flags
|= BAF_PARTIAL
;
1118 /* Call specific hook */
1119 CALL(desc
->export
, s
, a
);
1121 /* Attribute might become undefined in hook */
1122 if ((a
->type
& EAF_TYPE_MASK
) == EAF_TYPE_UNDEF
)
1127 /* Don't re-export unknown non-transitive attributes */
1128 if (!(a
->flags
& BAF_TRANSITIVE
))
1131 a
->flags
|= BAF_PARTIAL
;
1134 /* Append updated attribute */
1135 to
->attrs
[to
->count
++] = *a
;
1139 * bgp_export_attrs - export BGP attributes
1140 * @s: BGP export state
1141 * @attrs: a list of extended attributes
1143 * The bgp_export_attrs() function takes a list of attributes and merges it to
1144 * one newly allocated and sorted segment. Attributes are validated and
1145 * normalized by type-specific export hooks and attribute flags are updated.
1146 * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
1147 * empty community sets).
1149 * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
1151 static inline ea_list
*
1152 bgp_export_attrs(struct bgp_export_state
*s
, ea_list
*attrs
)
1154 /* Merge the attribute list */
1155 ea_list
*new = lp_alloc(s
->pool
, ea_scan(attrs
));
1156 ea_merge(attrs
, new);
1163 /* Export each attribute */
1164 for (i
= 0; i
< count
; i
++)
1165 bgp_export_attr(s
, &new->attrs
[i
], new);
1167 if (s
->err_withdraw
)
1175 * Attribute encoding
1179 bgp_encode_attr(struct bgp_write_state
*s
, eattr
*a
, byte
*buf
, uint size
)
1181 ASSERT(EA_PROTO(a
->id
) == PROTOCOL_BGP
);
1183 uint code
= EA_ID(a
->id
);
1185 if (bgp_attr_known(code
))
1186 return bgp_attr_table
[code
].encode(s
, a
, buf
, size
);
1188 return bgp_encode_raw(s
, a
, buf
, size
);
1192 * bgp_encode_attrs - encode BGP attributes
1193 * @s: BGP write state
1194 * @attrs: a list of extended attributes
1198 * The bgp_encode_attrs() function takes a list of extended attributes
1199 * and converts it to its BGP representation (a part of an Update message).
1200 * BGP write state may be fake when called from MRT protocol.
1202 * Result: Length of the attribute block generated or -1 if not enough space.
1205 bgp_encode_attrs(struct bgp_write_state
*s
, ea_list
*attrs
, byte
*buf
, byte
*end
)
1210 for (i
= 0; i
< attrs
->count
; i
++)
1212 len
= bgp_encode_attr(s
, &attrs
->attrs
[i
], pos
, end
- pos
);
1225 * Attribute decoding
1228 static void bgp_process_as4_attrs(ea_list
**attrs
, struct linpool
*pool
);
1231 bgp_as_path_loopy(struct bgp_proto
*p
, ea_list
*attrs
, u32 asn
)
1233 eattr
*e
= bgp_find_attr(attrs
, BA_AS_PATH
);
1234 int num
= p
->cf
->allow_local_as
+ 1;
1235 return (e
&& (num
> 0) && as_path_contains(e
->u
.ptr
, asn
, num
));
1239 bgp_originator_id_loopy(struct bgp_proto
*p
, ea_list
*attrs
)
1241 eattr
*e
= bgp_find_attr(attrs
, BA_ORIGINATOR_ID
);
1242 return (e
&& (e
->u
.data
== p
->local_id
));
1246 bgp_cluster_list_loopy(struct bgp_proto
*p
, ea_list
*attrs
)
1248 eattr
*e
= bgp_find_attr(attrs
, BA_CLUSTER_LIST
);
1249 return (e
&& int_set_contains(e
->u
.ptr
, p
->rr_cluster_id
));
1253 bgp_decode_attr(struct bgp_parse_state
*s
, uint code
, uint flags
, byte
*data
, uint len
, ea_list
**to
)
1255 /* Handle duplicate attributes; RFC 7606 3 (g) */
1256 if (BIT32_TEST(s
->attrs_seen
, code
))
1258 if ((code
== BA_MP_REACH_NLRI
) || (code
== BA_MP_UNREACH_NLRI
))
1259 bgp_parse_error(s
, 1);
1261 DISCARD("Discarding duplicate attribute (code %u)", code
);
1263 BIT32_SET(s
->attrs_seen
, code
);
1265 if (bgp_attr_known(code
))
1267 const struct bgp_attr_desc
*desc
= &bgp_attr_table
[code
];
1269 /* Handle conflicting flags; RFC 7606 3 (c) */
1270 if (((flags
^ desc
->flags
) & (BAF_OPTIONAL
| BAF_TRANSITIVE
)) &&
1271 !(desc
->flags
& BAF_DECODE_FLAGS
))
1272 WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc
->name
, flags
);
1274 desc
->decode(s
, code
, flags
, data
, len
, to
);
1276 else /* Unknown attribute */
1278 if (!(flags
& BAF_OPTIONAL
))
1279 WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code
, flags
);
1281 bgp_decode_unknown(s
, code
, flags
, data
, len
, to
);
1286 * bgp_decode_attrs - check and decode BGP attributes
1287 * @s: BGP parse state
1288 * @data: start of attribute block
1289 * @len: length of attribute block
1291 * This function takes a BGP attribute block (a part of an Update message), checks
1292 * its consistency and converts it to a list of BIRD route attributes represented
1293 * by an (uncached) &rta.
1296 bgp_decode_attrs(struct bgp_parse_state
*s
, byte
*data
, uint len
)
1298 struct bgp_proto
*p
= s
->proto
;
1299 ea_list
*attrs
= NULL
;
1300 uint code
, flags
, alen
;
1303 /* Parse the attributes */
1308 /* Read attribute type */
1313 ADVANCE(pos
, len
, 2);
1315 /* Read attribute length */
1316 if (flags
& BAF_EXT_LEN
)
1320 alen
= get_u16(pos
);
1321 ADVANCE(pos
, len
, 2);
1328 ADVANCE(pos
, len
, 1);
1334 DBG("Attr %02x %02x %u\n", code
, flags
, alen
);
1336 bgp_decode_attr(s
, code
, flags
, pos
, alen
, &attrs
);
1337 ADVANCE(pos
, len
, alen
);
1340 if (s
->err_withdraw
)
1343 /* If there is no reachability NLRI, we are finished */
1344 if (!s
->ip_reach_len
&& !s
->mp_reach_len
)
1348 /* Handle missing mandatory attributes; RFC 7606 3 (d) */
1349 if (!BIT32_TEST(s
->attrs_seen
, BA_ORIGIN
))
1350 { REPORT(NO_MANDATORY
, "ORIGIN"); goto withdraw
; }
1352 if (!BIT32_TEST(s
->attrs_seen
, BA_AS_PATH
))
1353 { REPORT(NO_MANDATORY
, "AS_PATH"); goto withdraw
; }
1355 if (s
->ip_reach_len
&& !BIT32_TEST(s
->attrs_seen
, BA_NEXT_HOP
))
1356 { REPORT(NO_MANDATORY
, "NEXT_HOP"); goto withdraw
; }
1358 /* When receiving attributes from non-AS4-aware BGP speaker, we have to
1359 reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
1360 if (!p
->as4_session
)
1361 bgp_process_as4_attrs(&attrs
, s
->pool
);
1363 /* Reject routes with our ASN in AS_PATH attribute */
1364 if (bgp_as_path_loopy(p
, attrs
, p
->local_as
))
1367 /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
1368 if ((p
->public_as
!= p
->local_as
) && bgp_as_path_loopy(p
, attrs
, p
->public_as
))
1371 /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
1372 if (p
->is_internal
&& bgp_originator_id_loopy(p
, attrs
))
1375 /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
1376 if (p
->rr_client
&& bgp_cluster_list_loopy(p
, attrs
))
1379 /* If there is no local preference, define one */
1380 if (!BIT32_TEST(s
->attrs_seen
, BA_LOCAL_PREF
))
1381 bgp_set_attr_u32(&attrs
, s
->pool
, BA_LOCAL_PREF
, 0, p
->cf
->default_local_pref
);
1387 /* RFC 7606 4 - handle attribute framing errors */
1388 REPORT("Malformed attribute list - framing error (%u/%u) at %d",
1389 alen
, len
, (int) (pos
- s
->attrs
));
1392 /* RFC 7606 5.2 - handle missing NLRI during errors */
1393 if (!s
->ip_reach_len
&& !s
->mp_reach_len
)
1394 bgp_parse_error(s
, 1);
1396 s
->err_withdraw
= 1;
1401 bgp_finish_attrs(struct bgp_parse_state
*s
, rta
*a
)
1403 /* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
1404 if (BIT32_TEST(s
->attrs_seen
, BA_AIGP
) && !s
->channel
->cf
->aigp
)
1406 REPORT("Discarding AIGP attribute received on non-AIGP session");
1407 bgp_unset_attr(&a
->eattrs
, s
->pool
, BA_AIGP
);
1413 * Route bucket hash table
1416 #define RBH_KEY(b) b->eattrs, b->hash
1417 #define RBH_NEXT(b) b->next
1418 #define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
1419 #define RBH_FN(a,h) h
1421 #define RBH_REHASH bgp_rbh_rehash
1422 #define RBH_PARAMS /8, *2, 2, 2, 8, 20
1425 HASH_DEFINE_REHASH_FN(RBH
, struct bgp_bucket
)
1428 bgp_init_bucket_table(struct bgp_channel
*c
)
1430 HASH_INIT(c
->bucket_hash
, c
->pool
, 8);
1432 init_list(&c
->bucket_queue
);
1433 c
->withdraw_bucket
= NULL
;
1437 bgp_free_bucket_table(struct bgp_channel
*c
)
1439 HASH_FREE(c
->bucket_hash
);
1441 struct bgp_bucket
*b
;
1442 WALK_LIST_FIRST(b
, c
->bucket_queue
)
1444 rem_node(&b
->send_node
);
1448 mb_free(c
->withdraw_bucket
);
1449 c
->withdraw_bucket
= NULL
;
1452 static struct bgp_bucket
*
1453 bgp_get_bucket(struct bgp_channel
*c
, ea_list
*new)
1455 /* Hash and lookup */
1456 u32 hash
= ea_hash(new);
1457 struct bgp_bucket
*b
= HASH_FIND(c
->bucket_hash
, RBH
, new, hash
);
1462 uint ea_size
= sizeof(ea_list
) + new->count
* sizeof(eattr
);
1463 uint ea_size_aligned
= BIRD_ALIGN(ea_size
, CPU_STRUCT_ALIGN
);
1464 uint size
= sizeof(struct bgp_bucket
) + ea_size_aligned
;
1468 /* Gather total size of non-inline attributes */
1469 for (i
= 0; i
< new->count
; i
++)
1471 eattr
*a
= &new->attrs
[i
];
1473 if (!(a
->type
& EAF_EMBEDDED
))
1474 size
+= BIRD_ALIGN(sizeof(struct adata
) + a
->u
.ptr
->length
, CPU_STRUCT_ALIGN
);
1477 /* Create the bucket */
1478 b
= mb_alloc(c
->pool
, size
);
1479 init_list(&b
->prefixes
);
1482 /* Copy list of extended attributes */
1483 memcpy(b
->eattrs
, new, ea_size
);
1484 dest
= ((byte
*) b
->eattrs
) + ea_size_aligned
;
1486 /* Copy values of non-inline attributes */
1487 for (i
= 0; i
< new->count
; i
++)
1489 eattr
*a
= &b
->eattrs
->attrs
[i
];
1491 if (!(a
->type
& EAF_EMBEDDED
))
1493 const struct adata
*oa
= a
->u
.ptr
;
1494 struct adata
*na
= (struct adata
*) dest
;
1495 memcpy(na
, oa
, sizeof(struct adata
) + oa
->length
);
1497 dest
+= BIRD_ALIGN(sizeof(struct adata
) + na
->length
, CPU_STRUCT_ALIGN
);
1501 /* Insert the bucket to send queue and bucket hash */
1502 add_tail(&c
->bucket_queue
, &b
->send_node
);
1503 HASH_INSERT2(c
->bucket_hash
, RBH
, c
->pool
, b
);
1508 static struct bgp_bucket
*
1509 bgp_get_withdraw_bucket(struct bgp_channel
*c
)
1511 if (!c
->withdraw_bucket
)
1513 c
->withdraw_bucket
= mb_allocz(c
->pool
, sizeof(struct bgp_bucket
));
1514 init_list(&c
->withdraw_bucket
->prefixes
);
1517 return c
->withdraw_bucket
;
1521 bgp_free_bucket(struct bgp_channel
*c
, struct bgp_bucket
*b
)
1523 rem_node(&b
->send_node
);
1524 HASH_REMOVE2(c
->bucket_hash
, RBH
, c
->pool
, b
);
1529 bgp_defer_bucket(struct bgp_channel
*c
, struct bgp_bucket
*b
)
1531 rem_node(&b
->send_node
);
1532 add_tail(&c
->bucket_queue
, &b
->send_node
);
1536 bgp_withdraw_bucket(struct bgp_channel
*c
, struct bgp_bucket
*b
)
1538 struct bgp_proto
*p
= (void *) c
->c
.proto
;
1539 struct bgp_bucket
*wb
= bgp_get_withdraw_bucket(c
);
1541 log(L_ERR
"%s: Attribute list too long", p
->p
.name
);
1542 while (!EMPTY_LIST(b
->prefixes
))
1544 struct bgp_prefix
*px
= HEAD(b
->prefixes
);
1546 log(L_ERR
"%s: - withdrawing %N", p
->p
.name
, &px
->net
);
1547 rem_node(&px
->buck_node
);
1548 add_tail(&wb
->prefixes
, &px
->buck_node
);
1557 #define PXH_KEY(px) px->net, px->path_id, px->hash
1558 #define PXH_NEXT(px) px->next
1559 #define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
1560 #define PXH_FN(n,i,h) h
1562 #define PXH_REHASH bgp_pxh_rehash
1563 #define PXH_PARAMS /8, *2, 2, 2, 8, 24
1566 HASH_DEFINE_REHASH_FN(PXH
, struct bgp_prefix
)
1569 bgp_init_prefix_table(struct bgp_channel
*c
)
1571 HASH_INIT(c
->prefix_hash
, c
->pool
, 8);
1573 uint alen
= net_addr_length
[c
->c
.net_type
];
1574 c
->prefix_slab
= alen
? sl_new(c
->pool
, sizeof(struct bgp_prefix
) + alen
) : NULL
;
1578 bgp_free_prefix_table(struct bgp_channel
*c
)
1580 HASH_FREE(c
->prefix_hash
);
1582 rfree(c
->prefix_slab
);
1583 c
->prefix_slab
= NULL
;
1586 static struct bgp_prefix
*
1587 bgp_get_prefix(struct bgp_channel
*c
, net_addr
*net
, u32 path_id
)
1589 u32 hash
= net_hash(net
) ^ u32_hash(path_id
);
1590 struct bgp_prefix
*px
= HASH_FIND(c
->prefix_hash
, PXH
, net
, path_id
, hash
);
1594 rem_node(&px
->buck_node
);
1599 px
= sl_alloc(c
->prefix_slab
);
1601 px
= mb_alloc(c
->pool
, sizeof(struct bgp_prefix
) + net
->length
);
1603 px
->buck_node
.next
= NULL
;
1604 px
->buck_node
.prev
= NULL
;
1606 px
->path_id
= path_id
;
1607 net_copy(px
->net
, net
);
1609 HASH_INSERT2(c
->prefix_hash
, PXH
, c
->pool
, px
);
1615 bgp_free_prefix(struct bgp_channel
*c
, struct bgp_prefix
*px
)
1617 rem_node(&px
->buck_node
);
1618 HASH_REMOVE2(c
->prefix_hash
, PXH
, c
->pool
, px
);
1621 sl_free(c
->prefix_slab
, px
);
1632 bgp_preexport(struct proto
*P
, rte
**new, struct linpool
*pool UNUSED
)
1635 struct proto
*SRC
= e
->attrs
->src
->proto
;
1636 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
1637 struct bgp_proto
*src
= (SRC
->proto
== &proto_bgp
) ? (struct bgp_proto
*) SRC
: NULL
;
1639 /* Reject our routes */
1643 /* Accept non-BGP routes */
1647 /* IBGP route reflection, RFC 4456 */
1648 if (p
->is_internal
&& src
->is_internal
&& (p
->local_as
== src
->local_as
))
1650 /* Rejected unless configured as route reflector */
1651 if (!p
->rr_client
&& !src
->rr_client
)
1654 /* Generally, this should be handled when path is received, but we check it
1655 also here as rr_cluster_id may be undefined or different in src. */
1656 if (p
->rr_cluster_id
&& bgp_cluster_list_loopy(p
, e
->attrs
->eattrs
))
1660 /* Handle well-known communities, RFC 1997 */
1662 if (p
->cf
->interpret_communities
&&
1663 (c
= ea_find(e
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_COMMUNITY
))))
1665 const struct adata
*d
= c
->u
.ptr
;
1667 /* Do not export anywhere */
1668 if (int_set_contains(d
, BGP_COMM_NO_ADVERTISE
))
1671 /* Do not export outside of AS (or member-AS) */
1672 if (!p
->is_internal
&& int_set_contains(d
, BGP_COMM_NO_EXPORT_SUBCONFED
))
1675 /* Do not export outside of AS (or confederation) */
1676 if (!p
->is_interior
&& int_set_contains(d
, BGP_COMM_NO_EXPORT
))
1679 /* Do not export LLGR_STALE routes to LLGR-ignorant peers */
1680 if (!p
->conn
->remote_caps
->llgr_aware
&& int_set_contains(d
, BGP_COMM_LLGR_STALE
))
1688 bgp_update_attrs(struct bgp_proto
*p
, struct bgp_channel
*c
, rte
*e
, ea_list
*attrs0
, struct linpool
*pool
)
1690 struct proto
*SRC
= e
->attrs
->src
->proto
;
1691 struct bgp_proto
*src
= (SRC
->proto
== &proto_bgp
) ? (void *) SRC
: NULL
;
1692 struct bgp_export_state s
= { .proto
= p
, .channel
= c
, .pool
= pool
, .src
= src
, .route
= e
, .mpls
= c
->desc
->mpls
};
1693 ea_list
*attrs
= attrs0
;
1697 /* ORIGIN attribute - mandatory, attach if missing */
1698 if (! bgp_find_attr(attrs0
, BA_ORIGIN
))
1699 bgp_set_attr_u32(&attrs
, pool
, BA_ORIGIN
, 0, src
? ORIGIN_INCOMPLETE
: ORIGIN_IGP
);
1701 /* AS_PATH attribute - mandatory */
1702 a
= bgp_find_attr(attrs0
, BA_AS_PATH
);
1703 ad
= a
? a
->u
.ptr
: &null_adata
;
1705 /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
1706 if ((!p
->cf
->confederation
|| !p
->is_interior
) && as_path_contains_confed(ad
))
1707 ad
= as_path_strip_confed(pool
, ad
);
1709 /* AS_PATH attribute - keep or prepend ASN */
1710 if (p
->is_internal
|| p
->rs_client
)
1712 /* IBGP or route server -> just ensure there is one */
1714 bgp_set_attr_ptr(&attrs
, pool
, BA_AS_PATH
, 0, &null_adata
);
1716 else if (p
->is_interior
)
1718 /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
1719 ad
= as_path_prepend2(pool
, ad
, AS_PATH_CONFED_SEQUENCE
, p
->public_as
);
1720 bgp_set_attr_ptr(&attrs
, pool
, BA_AS_PATH
, 0, ad
);
1722 else /* Regular EBGP (no RS, no confederation) */
1724 /* Regular EBGP -> prepend ASN as regular sequence */
1725 ad
= as_path_prepend2(pool
, ad
, AS_PATH_SEQUENCE
, p
->public_as
);
1726 bgp_set_attr_ptr(&attrs
, pool
, BA_AS_PATH
, 0, ad
);
1728 /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
1729 a
= bgp_find_attr(attrs0
, BA_MULTI_EXIT_DISC
);
1730 if (a
&& !(a
->type
& EAF_FRESH
))
1731 bgp_unset_attr(&attrs
, pool
, BA_MULTI_EXIT_DISC
);
1734 /* NEXT_HOP attribute - delegated to AF-specific hook */
1735 a
= bgp_find_attr(attrs0
, BA_NEXT_HOP
);
1736 bgp_update_next_hop(&s
, a
, &attrs
);
1738 /* LOCAL_PREF attribute - required for IBGP, attach if missing */
1739 if (p
->is_interior
&& ! bgp_find_attr(attrs0
, BA_LOCAL_PREF
))
1740 bgp_set_attr_u32(&attrs
, pool
, BA_LOCAL_PREF
, 0, p
->cf
->default_local_pref
);
1742 /* AIGP attribute - accumulate local metric or originate new one */
1744 if (s
.local_next_hop
&&
1745 (bgp_total_aigp_metric_(e
, &metric
, &ad
) ||
1746 (c
->cf
->aigp_originate
&& bgp_init_aigp_metric(e
, &metric
, &ad
))))
1748 ad
= bgp_aigp_set_metric(pool
, ad
, metric
);
1749 bgp_set_attr_ptr(&attrs
, pool
, BA_AIGP
, 0, ad
);
1752 /* IBGP route reflection, RFC 4456 */
1753 if (src
&& src
->is_internal
&& p
->is_internal
&& (src
->local_as
== p
->local_as
))
1755 /* ORIGINATOR_ID attribute - attach if not already set */
1756 if (! bgp_find_attr(attrs0
, BA_ORIGINATOR_ID
))
1757 bgp_set_attr_u32(&attrs
, pool
, BA_ORIGINATOR_ID
, 0, src
->remote_id
);
1759 /* CLUSTER_LIST attribute - prepend cluster ID */
1760 a
= bgp_find_attr(attrs0
, BA_CLUSTER_LIST
);
1761 ad
= a
? a
->u
.ptr
: NULL
;
1763 /* Prepend src cluster ID */
1764 if (src
->rr_cluster_id
)
1765 ad
= int_set_prepend(pool
, ad
, src
->rr_cluster_id
);
1767 /* Prepend dst cluster ID if src and dst clusters are different */
1768 if (p
->rr_cluster_id
&& (src
->rr_cluster_id
!= p
->rr_cluster_id
))
1769 ad
= int_set_prepend(pool
, ad
, p
->rr_cluster_id
);
1771 /* Should be at least one prepended cluster ID */
1772 bgp_set_attr_ptr(&attrs
, pool
, BA_CLUSTER_LIST
, 0, ad
);
1775 /* AS4_* transition attributes, RFC 6793 4.2.2 */
1776 if (! p
->as4_session
)
1778 a
= bgp_find_attr(attrs
, BA_AS_PATH
);
1779 if (a
&& as_path_contains_as4(a
->u
.ptr
))
1781 bgp_set_attr_ptr(&attrs
, pool
, BA_AS_PATH
, 0, as_path_to_old(pool
, a
->u
.ptr
));
1782 bgp_set_attr_ptr(&attrs
, pool
, BA_AS4_PATH
, 0, as_path_strip_confed(pool
, a
->u
.ptr
));
1785 a
= bgp_find_attr(attrs
, BA_AGGREGATOR
);
1786 if (a
&& aggregator_contains_as4(a
->u
.ptr
))
1788 bgp_set_attr_ptr(&attrs
, pool
, BA_AGGREGATOR
, 0, aggregator_to_old(pool
, a
->u
.ptr
));
1789 bgp_set_attr_ptr(&attrs
, pool
, BA_AS4_AGGREGATOR
, 0, a
->u
.ptr
);
1794 * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
1795 * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
1796 * should be checked in AF-specific hooks.
1799 /* Apply per-attribute export hooks for validatation and normalization */
1800 return bgp_export_attrs(&s
, attrs
);
1804 bgp_rt_notify(struct proto
*P
, struct channel
*C
, net
*n
, rte
*new, rte
*old
)
1806 struct bgp_proto
*p
= (void *) P
;
1807 struct bgp_channel
*c
= (void *) C
;
1808 struct bgp_bucket
*buck
;
1809 struct bgp_prefix
*px
;
1814 struct ea_list
*attrs
= bgp_update_attrs(p
, c
, new, new->attrs
->eattrs
, bgp_linpool2
);
1816 /* If attributes are invalid, we fail back to withdraw */
1817 buck
= attrs
? bgp_get_bucket(c
, attrs
) : bgp_get_withdraw_bucket(c
);
1818 path
= new->attrs
->src
->global_id
;
1820 lp_flush(bgp_linpool2
);
1824 buck
= bgp_get_withdraw_bucket(c
);
1825 path
= old
->attrs
->src
->global_id
;
1828 px
= bgp_get_prefix(c
, n
->n
.addr
, c
->add_path_tx
? path
: 0);
1829 add_tail(&buck
->prefixes
, &px
->buck_node
);
1831 bgp_schedule_packet(p
->conn
, c
, PKT_UPDATE
);
1836 bgp_get_neighbor(rte
*r
)
1838 eattr
*e
= ea_find(r
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_AS_PATH
));
1841 if (e
&& as_path_get_first_regular(e
->u
.ptr
, &as
))
1844 /* If AS_PATH is not defined, we treat rte as locally originated */
1845 struct bgp_proto
*p
= (void *) r
->attrs
->src
->proto
;
1846 return p
->cf
->confederation
?: p
->local_as
;
1852 if (r
->u
.bgp
.stale
< 0)
1854 /* If staleness is unknown, compute and cache it */
1855 eattr
*a
= ea_find(r
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_COMMUNITY
));
1856 r
->u
.bgp
.stale
= a
&& int_set_contains(a
->u
.ptr
, BGP_COMM_LLGR_STALE
);
1859 return r
->u
.bgp
.stale
;
1863 bgp_rte_better(rte
*new, rte
*old
)
1865 struct bgp_proto
*new_bgp
= (struct bgp_proto
*) new->attrs
->src
->proto
;
1866 struct bgp_proto
*old_bgp
= (struct bgp_proto
*) old
->attrs
->src
->proto
;
1870 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1871 n
= new->u
.bgp
.suppressed
;
1872 o
= old
->u
.bgp
.suppressed
;
1878 /* RFC 4271 9.1.2.1. Route resolvability test */
1879 n
= rte_resolvable(new);
1880 o
= rte_resolvable(old
);
1886 /* LLGR draft - depreference stale routes */
1894 /* Start with local preferences */
1895 x
= ea_find(new->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_LOCAL_PREF
));
1896 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_LOCAL_PREF
));
1897 n
= x
? x
->u
.data
: new_bgp
->cf
->default_local_pref
;
1898 o
= y
? y
->u
.data
: old_bgp
->cf
->default_local_pref
;
1904 /* RFC 7311 4.1 - Apply AIGP metric */
1905 u64 n2
= bgp_total_aigp_metric(new);
1906 u64 o2
= bgp_total_aigp_metric(old
);
1912 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1913 if (new_bgp
->cf
->compare_path_lengths
|| old_bgp
->cf
->compare_path_lengths
)
1915 x
= ea_find(new->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_AS_PATH
));
1916 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_AS_PATH
));
1917 n
= x
? as_path_getlen(x
->u
.ptr
) : AS_PATH_MAXLEN
;
1918 o
= y
? as_path_getlen(y
->u
.ptr
) : AS_PATH_MAXLEN
;
1925 /* RFC 4271 9.1.2.2. b) Use origins */
1926 x
= ea_find(new->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_ORIGIN
));
1927 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_ORIGIN
));
1928 n
= x
? x
->u
.data
: ORIGIN_INCOMPLETE
;
1929 o
= y
? y
->u
.data
: ORIGIN_INCOMPLETE
;
1935 /* RFC 4271 9.1.2.2. c) Compare MED's */
1936 /* Proper RFC 4271 path selection cannot be interpreted as finding
1937 * the best path in some ordering. It is implemented partially in
1938 * bgp_rte_recalculate() when deterministic_med option is
1939 * active. Without that option, the behavior is just an
1940 * approximation, which in specific situations may lead to
1941 * persistent routing loops, because it is nondeterministic - it
1942 * depends on the order in which routes appeared. But it is also the
1943 * same behavior as used by default in Cisco routers, so it is
1944 * probably not a big issue.
1946 if (new_bgp
->cf
->med_metric
|| old_bgp
->cf
->med_metric
||
1947 (bgp_get_neighbor(new) == bgp_get_neighbor(old
)))
1949 x
= ea_find(new->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_MULTI_EXIT_DISC
));
1950 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_MULTI_EXIT_DISC
));
1951 n
= x
? x
->u
.data
: new_bgp
->cf
->default_med
;
1952 o
= y
? y
->u
.data
: old_bgp
->cf
->default_med
;
1959 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1960 if (new_bgp
->is_interior
> old_bgp
->is_interior
)
1962 if (new_bgp
->is_interior
< old_bgp
->is_interior
)
1965 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1966 n
= new_bgp
->cf
->igp_metric
? new->attrs
->igp_metric
: 0;
1967 o
= old_bgp
->cf
->igp_metric
? old
->attrs
->igp_metric
: 0;
1973 /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
1974 /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
1975 x
= ea_find(new->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_ORIGINATOR_ID
));
1976 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_ORIGINATOR_ID
));
1977 n
= x
? x
->u
.data
: new_bgp
->remote_id
;
1978 o
= y
? y
->u
.data
: old_bgp
->remote_id
;
1980 /* RFC 5004 - prefer older routes */
1981 /* (if both are external and from different peer) */
1982 if ((new_bgp
->cf
->prefer_older
|| old_bgp
->cf
->prefer_older
) &&
1983 !new_bgp
->is_internal
&& n
!= o
)
1986 /* rest of RFC 4271 9.1.2.2. f) */
1992 /* RFC 4456 9. b) Compare cluster list lengths */
1993 x
= ea_find(new->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_CLUSTER_LIST
));
1994 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_CLUSTER_LIST
));
1995 n
= x
? int_set_get_size(x
->u
.ptr
) : 0;
1996 o
= y
? int_set_get_size(y
->u
.ptr
) : 0;
2002 /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
2003 return ipa_compare(new_bgp
->remote_ip
, old_bgp
->remote_ip
) < 0;
2008 bgp_rte_mergable(rte
*pri
, rte
*sec
)
2010 struct bgp_proto
*pri_bgp
= (struct bgp_proto
*) pri
->attrs
->src
->proto
;
2011 struct bgp_proto
*sec_bgp
= (struct bgp_proto
*) sec
->attrs
->src
->proto
;
2015 /* Skip suppressed routes (see bgp_rte_recalculate()) */
2016 if (pri
->u
.bgp
.suppressed
!= sec
->u
.bgp
.suppressed
)
2019 /* RFC 4271 9.1.2.1. Route resolvability test */
2020 if (rte_resolvable(pri
) != rte_resolvable(sec
))
2023 /* LLGR draft - depreference stale routes */
2024 if (rte_stale(pri
) != rte_stale(sec
))
2027 /* Start with local preferences */
2028 x
= ea_find(pri
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_LOCAL_PREF
));
2029 y
= ea_find(sec
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_LOCAL_PREF
));
2030 p
= x
? x
->u
.data
: pri_bgp
->cf
->default_local_pref
;
2031 s
= y
? y
->u
.data
: sec_bgp
->cf
->default_local_pref
;
2035 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
2036 if (pri_bgp
->cf
->compare_path_lengths
|| sec_bgp
->cf
->compare_path_lengths
)
2038 x
= ea_find(pri
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_AS_PATH
));
2039 y
= ea_find(sec
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_AS_PATH
));
2040 p
= x
? as_path_getlen(x
->u
.ptr
) : AS_PATH_MAXLEN
;
2041 s
= y
? as_path_getlen(y
->u
.ptr
) : AS_PATH_MAXLEN
;
2046 // if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
2050 /* RFC 4271 9.1.2.2. b) Use origins */
2051 x
= ea_find(pri
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_ORIGIN
));
2052 y
= ea_find(sec
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_ORIGIN
));
2053 p
= x
? x
->u
.data
: ORIGIN_INCOMPLETE
;
2054 s
= y
? y
->u
.data
: ORIGIN_INCOMPLETE
;
2058 /* RFC 4271 9.1.2.2. c) Compare MED's */
2059 if (pri_bgp
->cf
->med_metric
|| sec_bgp
->cf
->med_metric
||
2060 (bgp_get_neighbor(pri
) == bgp_get_neighbor(sec
)))
2062 x
= ea_find(pri
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_MULTI_EXIT_DISC
));
2063 y
= ea_find(sec
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_MULTI_EXIT_DISC
));
2064 p
= x
? x
->u
.data
: pri_bgp
->cf
->default_med
;
2065 s
= y
? y
->u
.data
: sec_bgp
->cf
->default_med
;
2070 /* RFC 4271 9.1.2.2. d) Prefer external peers */
2071 if (pri_bgp
->is_interior
!= sec_bgp
->is_interior
)
2074 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
2075 p
= pri_bgp
->cf
->igp_metric
? pri
->attrs
->igp_metric
: 0;
2076 s
= sec_bgp
->cf
->igp_metric
? sec
->attrs
->igp_metric
: 0;
2080 /* Remaining criteria are ignored */
2087 same_group(rte
*r
, u32 lpref
, u32 lasn
)
2089 return (r
->pref
== lpref
) && (bgp_get_neighbor(r
) == lasn
);
2093 use_deterministic_med(rte
*r
)
2095 struct proto
*P
= r
->attrs
->src
->proto
;
2096 return (P
->proto
== &proto_bgp
) && ((struct bgp_proto
*) P
)->cf
->deterministic_med
;
2100 bgp_rte_recalculate(rtable
*table
, net
*net
, rte
*new, rte
*old
, rte
*old_best
)
2103 rte
*key
= new ? new : old
;
2104 u32 lpref
= key
->pref
;
2105 u32 lasn
= bgp_get_neighbor(key
);
2106 int old_suppressed
= old
? old
->u
.bgp
.suppressed
: 0;
2109 * Proper RFC 4271 path selection is a bit complicated, it cannot be
2110 * implemented just by rte_better(), because it is not a linear
2111 * ordering. But it can be splitted to two levels, where the lower
2112 * level chooses the best routes in each group of routes from the
2113 * same neighboring AS and higher level chooses the best route (with
2114 * a slightly different ordering) between the best-in-group routes.
2116 * When deterministic_med is disabled, we just ignore this issue and
2117 * choose the best route by bgp_rte_better() alone. If enabled, the
2118 * lower level of the route selection is done here (for the group
2119 * to which the changed route belongs), all routes in group are
2120 * marked as suppressed, just chosen best-in-group is not.
2122 * Global best route selection then implements higher level by
2123 * choosing between non-suppressed routes (as they are always
2124 * preferred over suppressed routes). Routes from BGP protocols
2125 * that do not set deterministic_med are just never suppressed. As
2126 * they do not participate in the lower level selection, it is OK
2127 * that this fn is not called for them.
2129 * The idea is simple, the implementation is more problematic,
2130 * mostly because of optimizations in rte_recalculate() that
2131 * avoids full recalculation in most cases.
2133 * We can assume that at least one of new, old is non-NULL and both
2134 * are from the same protocol with enabled deterministic_med. We
2135 * group routes by both neighbor AS (lasn) and preference (lpref),
2136 * because bgp_rte_better() does not handle preference itself.
2139 /* If new and old are from different groups, we just process that
2140 as two independent events */
2141 if (new && old
&& !same_group(old
, lpref
, lasn
))
2144 i1
= bgp_rte_recalculate(table
, net
, NULL
, old
, old_best
);
2145 i2
= bgp_rte_recalculate(table
, net
, new, NULL
, old_best
);
2150 * We could find the best-in-group and then make some shortcuts like
2151 * in rte_recalculate, but as we would have to walk through all
2152 * net->routes just to find it, it is probably not worth. So we
2153 * just have one simple fast case that use just the old route.
2154 * We also set suppressed flag to avoid using it in bgp_rte_better().
2158 new->u
.bgp
.suppressed
= 1;
2162 old
->u
.bgp
.suppressed
= 1;
2164 /* The fast case - replace not best with worse (or remove not best) */
2165 if (old_suppressed
&& !(new && bgp_rte_better(new, old
)))
2169 /* The default case - find a new best-in-group route */
2170 r
= new; /* new may not be in the list */
2171 for (s
=net
->routes
; rte_is_valid(s
); s
=s
->next
)
2172 if (use_deterministic_med(s
) && same_group(s
, lpref
, lasn
))
2174 s
->u
.bgp
.suppressed
= 1;
2175 if (!r
|| bgp_rte_better(s
, r
))
2179 /* Simple case - the last route in group disappears */
2183 /* Found if new is mergable with best-in-group */
2184 if (new && (new != r
) && bgp_rte_mergable(r
, new))
2185 new->u
.bgp
.suppressed
= 0;
2187 /* Found all existing routes mergable with best-in-group */
2188 for (s
=net
->routes
; rte_is_valid(s
); s
=s
->next
)
2189 if (use_deterministic_med(s
) && same_group(s
, lpref
, lasn
))
2190 if ((s
!= r
) && bgp_rte_mergable(r
, s
))
2191 s
->u
.bgp
.suppressed
= 0;
2193 /* Found best-in-group */
2194 r
->u
.bgp
.suppressed
= 0;
2197 * There are generally two reasons why we have to force
2198 * recalculation (return 1): First, the new route may be wrongfully
2199 * chosen to be the best in the first case check in
2200 * rte_recalculate(), this may happen only if old_best is from the
2201 * same group. Second, another (different than new route)
2202 * best-in-group is chosen and that may be the proper best (although
2203 * rte_recalculate() without ignore that possibility).
2205 * There are three possible cases according to whether the old route
2206 * was the best in group (OBG, i.e. !old_suppressed) and whether the
2207 * new route is the best in group (NBG, tested by r == new). These
2208 * cases work even if old or new is NULL.
2210 * NBG -> new is a possible candidate for the best route, so we just
2211 * check for the first reason using same_group().
2213 * !NBG && OBG -> Second reason applies, return 1
2215 * !NBG && !OBG -> Best in group does not change, old != old_best,
2216 * rte_better(new, old_best) is false and therefore
2217 * the first reason does not apply, return 0
2221 return old_best
&& same_group(old_best
, lpref
, lasn
);
2223 return !old_suppressed
;
2227 bgp_rte_modify_stale(struct rte
*r
, struct linpool
*pool
)
2229 eattr
*a
= ea_find(r
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_COMMUNITY
));
2230 const struct adata
*ad
= a
? a
->u
.ptr
: NULL
;
2231 uint flags
= a
? a
->flags
: BAF_PARTIAL
;
2233 if (ad
&& int_set_contains(ad
, BGP_COMM_NO_LLGR
))
2236 if (ad
&& int_set_contains(ad
, BGP_COMM_LLGR_STALE
))
2239 r
= rte_cow_rta(r
, pool
);
2240 bgp_set_attr_ptr(&(r
->attrs
->eattrs
), pool
, BA_COMMUNITY
, flags
,
2241 int_set_add(pool
, ad
, BGP_COMM_LLGR_STALE
));
2249 * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
2252 bgp_process_as4_attrs(ea_list
**attrs
, struct linpool
*pool
)
2254 eattr
*p2
= bgp_find_attr(*attrs
, BA_AS_PATH
);
2255 eattr
*p4
= bgp_find_attr(*attrs
, BA_AS4_PATH
);
2256 eattr
*a2
= bgp_find_attr(*attrs
, BA_AGGREGATOR
);
2257 eattr
*a4
= bgp_find_attr(*attrs
, BA_AS4_AGGREGATOR
);
2259 /* First, unset AS4_* attributes */
2260 if (p4
) bgp_unset_attr(attrs
, pool
, BA_AS4_PATH
);
2261 if (a4
) bgp_unset_attr(attrs
, pool
, BA_AS4_AGGREGATOR
);
2263 /* Handle AGGREGATOR attribute */
2266 u32 a2_asn
= get_u32(a2
->u
.ptr
->data
);
2268 /* If routes were aggregated by an old router, then AS4_PATH and
2269 AS4_AGGREGATOR are invalid. In that case we give up. */
2270 if (a2_asn
!= AS_TRANS
)
2273 /* Use AS4_AGGREGATOR instead of AGGREGATOR */
2274 a2
->u
.ptr
= a4
->u
.ptr
;
2277 /* Handle AS_PATH attribute */
2280 /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
2281 int p2_len
= as_path_getlen(p2
->u
.ptr
);
2282 int p4_len
= as_path_getlen(p4
->u
.ptr
);
2284 /* AS_PATH is too short, give up */
2285 if (p2_len
< p4_len
)
2288 /* Merge AS_PATH and AS4_PATH */
2289 struct adata
*apc
= as_path_cut(pool
, p2
->u
.ptr
, p2_len
- p4_len
);
2290 p2
->u
.ptr
= as_path_merge(pool
, apc
, p4
->u
.ptr
);
2295 bgp_get_attr(eattr
*a
, byte
*buf
, int buflen
)
2297 uint i
= EA_ID(a
->id
);
2298 const struct bgp_attr_desc
*d
;
2301 if (bgp_attr_known(i
))
2303 d
= &bgp_attr_table
[i
];
2304 len
= bsprintf(buf
, "%s", d
->name
);
2310 d
->format(a
, buf
, buflen
- len
- 2);
2316 bsprintf(buf
, "%02x%s", i
, (a
->flags
& BAF_TRANSITIVE
) ? " [t]" : "");
2321 bgp_get_route_info(rte
*e
, byte
*buf
)
2323 eattr
*p
= ea_find(e
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_AS_PATH
));
2324 eattr
*o
= ea_find(e
->attrs
->eattrs
, EA_CODE(PROTOCOL_BGP
, BA_ORIGIN
));
2327 buf
+= bsprintf(buf
, " (%d", e
->pref
);
2329 if (e
->u
.bgp
.suppressed
)
2330 buf
+= bsprintf(buf
, "-");
2333 buf
+= bsprintf(buf
, "s");
2335 u64 metric
= bgp_total_aigp_metric(e
);
2336 if (metric
< BGP_AIGP_MAX
)
2338 buf
+= bsprintf(buf
, "/%lu", metric
);
2340 else if (e
->attrs
->igp_metric
)
2342 if (!rte_resolvable(e
))
2343 buf
+= bsprintf(buf
, "/-");
2344 else if (e
->attrs
->igp_metric
>= IGP_METRIC_UNKNOWN
)
2345 buf
+= bsprintf(buf
, "/?");
2347 buf
+= bsprintf(buf
, "/%d", e
->attrs
->igp_metric
);
2349 buf
+= bsprintf(buf
, ") [");
2351 if (p
&& as_path_get_last(p
->u
.ptr
, &origas
))
2352 buf
+= bsprintf(buf
, "AS%u", origas
);
2354 buf
+= bsprintf(buf
, "%c", "ie?"[o
->u
.data
]);