2 * BIRD -- BGP Attributes
4 * (c) 2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
13 #include "nest/bird.h"
14 #include "nest/iface.h"
15 #include "nest/protocol.h"
16 #include "nest/route.h"
17 #include "nest/attrs.h"
18 #include "conf/conf.h"
19 #include "lib/resource.h"
20 #include "lib/string.h"
21 #include "lib/unaligned.h"
25 static byte bgp_mandatory_attrs
[] = { BA_ORIGIN
, BA_AS_PATH
37 int (*validate
)(struct bgp_proto
*p
, byte
*attr
, int len
);
38 void (*format
)(eattr
*ea
, byte
*buf
, int buflen
);
42 bgp_check_origin(struct bgp_proto
*p UNUSED
, byte
*a
, int len UNUSED
)
50 bgp_format_origin(eattr
*a
, byte
*buf
, int buflen
)
52 static char *bgp_origin_names
[] = { "IGP", "EGP", "Incomplete" };
54 bsprintf(buf
, bgp_origin_names
[a
->u
.data
]);
58 path_segment_contains(byte
*p
, int bs
, u32 asn
)
66 u32 asn2
= (bs
== 4) ? get_u32(p
) : get_u16(p
);
75 /* Validates path attribute, removes AS_CONFED_* segments, and also returns path length */
77 validate_path(struct bgp_proto
*p
, int as_path
, int bs
, byte
*idata
, unsigned int *ilength
)
102 case AS_PATH_SEQUENCE
:
107 case AS_PATH_CONFED_SEQUENCE
:
108 case AS_PATH_CONFED_SET
:
109 if (as_path
&& path_segment_contains(a
, bs
, p
->remote_as
))
111 log(L_WARN
"%s: AS_CONFED_* segment with peer ASN found, misconfigured confederation?", p
->p
.name
);
115 log(L_WARN
"%s: %s_PATH attribute contains AS_CONFED_* segment, skipping segment",
116 p
->p
.name
, as_path
? "AS" : "AS4");
127 memmove(dst
, a
, plen
);
135 *ilength
= dst
- idata
;
140 validate_as_path(struct bgp_proto
*p
, byte
*a
, int *len
)
142 return validate_path(p
, 1, p
->as4_session
? 4 : 2, a
, len
);
146 validate_as4_path(struct bgp_proto
*p
, struct adata
*path
)
148 return validate_path(p
, 0, 4, path
->data
, &path
->length
);
152 bgp_check_next_hop(struct bgp_proto
*p UNUSED
, byte
*a
, int len
)
159 memcpy(&addr
, a
, len
);
161 if (ipa_classify(addr
) & IADDR_HOST
)
169 bgp_format_next_hop(eattr
*a
, byte
*buf
, int buflen UNUSED
)
171 ip_addr
*ipp
= (ip_addr
*) a
->u
.ptr
->data
;
173 /* in IPv6, we might have two addresses in NEXT HOP */
174 if ((a
->u
.ptr
->length
== NEXT_HOP_LENGTH
) && ipa_nonzero(ipp
[1]))
176 bsprintf(buf
, "%I %I", ipp
[0], ipp
[1]);
181 bsprintf(buf
, "%I", ipp
[0]);
185 bgp_check_aggregator(struct bgp_proto
*p
, byte
*a UNUSED
, int len
)
187 int exp_len
= p
->as4_session
? 8 : 6;
189 return (len
== exp_len
) ? 0 : 5;
193 bgp_format_aggregator(eattr
*a
, byte
*buf
, int buflen UNUSED
)
195 struct adata
*ad
= a
->u
.ptr
;
196 byte
*data
= ad
->data
;
210 bsprintf(buf
, "%d.%d.%d.%d AS%d", data
[0], data
[1], data
[2], data
[3], as
);
214 bgp_check_cluster_list(struct bgp_proto
*p UNUSED
, byte
*a UNUSED
, int len
)
216 return ((len
% 4) == 0) ? 0 : 5;
220 bgp_format_cluster_list(eattr
*a
, byte
*buf
, int buflen UNUSED
)
222 int_set_format(a
->u
.ptr
, 0, buf
, buflen
);
226 bgp_check_reach_nlri(struct bgp_proto
*p UNUSED
, byte
*a UNUSED
, int len UNUSED
)
229 p
->mp_reach_start
= a
;
230 p
->mp_reach_len
= len
;
236 bgp_check_unreach_nlri(struct bgp_proto
*p UNUSED
, byte
*a UNUSED
, int len UNUSED
)
239 p
->mp_unreach_start
= a
;
240 p
->mp_unreach_len
= len
;
245 static struct attr_desc bgp_attr_table
[] = {
246 { NULL
, -1, 0, 0, 0, /* Undefined */
248 { "origin", 1, BAF_TRANSITIVE
, EAF_TYPE_INT
, 1, /* BA_ORIGIN */
249 bgp_check_origin
, bgp_format_origin
},
250 { "as_path", -1, BAF_TRANSITIVE
, EAF_TYPE_AS_PATH
, 1, /* BA_AS_PATH */
251 NULL
, NULL
}, /* is checked by validate_as_path() as a special case */
252 { "next_hop", 4, BAF_TRANSITIVE
, EAF_TYPE_IP_ADDRESS
, 1, /* BA_NEXT_HOP */
253 bgp_check_next_hop
, bgp_format_next_hop
},
254 { "med", 4, BAF_OPTIONAL
, EAF_TYPE_INT
, 1, /* BA_MULTI_EXIT_DISC */
256 { "local_pref", 4, BAF_TRANSITIVE
, EAF_TYPE_INT
, 0, /* BA_LOCAL_PREF */
258 { "atomic_aggr", 0, BAF_TRANSITIVE
, EAF_TYPE_OPAQUE
, 1, /* BA_ATOMIC_AGGR */
260 { "aggregator", -1, BAF_OPTIONAL
| BAF_TRANSITIVE
, EAF_TYPE_OPAQUE
, 1, /* BA_AGGREGATOR */
261 bgp_check_aggregator
, bgp_format_aggregator
},
262 { "community", -1, BAF_OPTIONAL
| BAF_TRANSITIVE
, EAF_TYPE_INT_SET
, 1, /* BA_COMMUNITY */
264 { "originator_id", 4, BAF_OPTIONAL
, EAF_TYPE_ROUTER_ID
, 0, /* BA_ORIGINATOR_ID */
266 { "cluster_list", -1, BAF_OPTIONAL
, EAF_TYPE_INT_SET
, 0, /* BA_CLUSTER_LIST */
267 bgp_check_cluster_list
, bgp_format_cluster_list
},
268 { NULL
, }, /* BA_DPA */
269 { NULL
, }, /* BA_ADVERTISER */
270 { NULL
, }, /* BA_RCID_PATH */
271 { "mp_reach_nlri", -1, BAF_OPTIONAL
, EAF_TYPE_OPAQUE
, 1, /* BA_MP_REACH_NLRI */
272 bgp_check_reach_nlri
, NULL
},
273 { "mp_unreach_nlri", -1, BAF_OPTIONAL
, EAF_TYPE_OPAQUE
, 1, /* BA_MP_UNREACH_NLRI */
274 bgp_check_unreach_nlri
, NULL
},
275 { NULL
, }, /* BA_EXTENDED_COMM */
276 { "as4_path", -1, BAF_OPTIONAL
| BAF_TRANSITIVE
, EAF_TYPE_OPAQUE
, 1, /* BA_AS4_PATH */
278 { "as4_aggregator", -1, BAF_OPTIONAL
| BAF_TRANSITIVE
, EAF_TYPE_OPAQUE
, 1, /* BA_AS4_PATH */
282 /* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH because
283 * EAF_TYPE_AS_PATH is supposed to have different format (2 or 4 B for each ASN)
284 * depending on bgp_as4_support variable.
287 #define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name)
289 static inline struct adata
*
290 bgp_alloc_adata(struct linpool
*pool
, unsigned len
)
292 struct adata
*ad
= lp_alloc(pool
, sizeof(struct adata
) + len
);
298 bgp_set_attr(eattr
*e
, unsigned attr
, uintptr_t val
)
300 ASSERT(ATTR_KNOWN(attr
));
301 e
->id
= EA_CODE(EAP_BGP
, attr
);
302 e
->type
= bgp_attr_table
[attr
].type
;
303 e
->flags
= bgp_attr_table
[attr
].expected_flags
;
304 if (e
->type
& EAF_EMBEDDED
)
307 e
->u
.ptr
= (struct adata
*) val
;
311 bgp_set_attr_wa(eattr
*e
, struct linpool
*pool
, unsigned attr
, unsigned len
)
313 struct adata
*ad
= bgp_alloc_adata(pool
, len
);
314 bgp_set_attr(e
, attr
, (uintptr_t) ad
);
319 bgp_attach_attr(ea_list
**to
, struct linpool
*pool
, unsigned attr
, uintptr_t val
)
321 ea_list
*a
= lp_alloc(pool
, sizeof(ea_list
) + sizeof(eattr
));
324 a
->flags
= EALF_SORTED
;
326 bgp_set_attr(a
->attrs
, attr
, val
);
330 bgp_attach_attr_wa(ea_list
**to
, struct linpool
*pool
, unsigned attr
, unsigned len
)
332 struct adata
*ad
= bgp_alloc_adata(pool
, len
);
333 bgp_attach_attr(to
, pool
, attr
, (uintptr_t) ad
);
338 bgp_encode_attr_hdr(byte
*dst
, unsigned int flags
, unsigned code
, int len
)
342 DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code
, len
, flags
);
353 *dst
++ = flags
| BAF_EXT_LEN
;
363 aggregator_convert_to_old(struct adata
*aggr
, byte
*dst
, int *new_used
)
365 byte
*src
= aggr
->data
;
368 u32 as
= get_u32(src
);
376 /* Copy IPv4 address */
377 memcpy(dst
+ 2, src
+ 4, 4);
381 aggregator_convert_to_new(struct adata
*aggr
, byte
*dst
)
383 byte
*src
= aggr
->data
;
385 u32 as
= get_u16(src
);
388 /* Copy IPv4 address */
389 memcpy(dst
+ 4, src
+ 2, 4);
393 bgp_get_attr_len(eattr
*a
)
396 if (ATTR_KNOWN(EA_ID(a
->id
)))
398 int code
= EA_ID(a
->id
);
399 struct attr_desc
*desc
= &bgp_attr_table
[code
];
400 len
= desc
->expected_length
;
403 ASSERT(!(a
->type
& EAF_EMBEDDED
));
404 len
= a
->u
.ptr
->length
;
409 ASSERT((a
->type
& EAF_TYPE_MASK
) == EAF_TYPE_OPAQUE
);
410 len
= a
->u
.ptr
->length
;
416 #define ADVANCE(w, r, l) do { r -= l; w += l; } while (0)
419 * bgp_encode_attrs - encode BGP attributes
422 * @attrs: a list of extended attributes
423 * @remains: remaining space in the buffer
425 * The bgp_encode_attrs() function takes a list of extended attributes
426 * and converts it to its BGP representation (a part of an Update message).
428 * Result: Length of the attribute block generated or -1 if not enough space.
431 bgp_encode_attrs(struct bgp_proto
*p
, byte
*w
, ea_list
*attrs
, int remains
)
433 unsigned int i
, code
, flags
;
437 for(i
=0; i
<attrs
->count
; i
++)
439 eattr
*a
= &attrs
->attrs
[i
];
440 ASSERT(EA_PROTO(a
->id
) == EAP_BGP
);
443 /* When talking multiprotocol BGP, the NEXT_HOP attributes are used only temporarily. */
444 if (code
== BA_NEXT_HOP
)
448 /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker,
449 * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH
450 * as optional AS4_PATH attribute.
452 if ((code
== BA_AS_PATH
) && bgp_as4_support
&& (! p
->as4_session
))
454 len
= a
->u
.ptr
->length
;
456 if (remains
< (len
+ 4))
459 /* Using temporary buffer because don't know a length of created attr
460 * and therefore a length of a header. Perhaps i should better always
461 * use BAF_EXT_LEN. */
465 int nl
= as_path_convert_to_old(a
->u
.ptr
, buf
, &new_used
);
467 DBG("BGP: Encoding old AS_PATH\n");
468 rv
= bgp_encode_attr_hdr(w
, BAF_TRANSITIVE
, BA_AS_PATH
, nl
);
469 ADVANCE(w
, remains
, rv
);
471 ADVANCE(w
, remains
, nl
);
476 if (remains
< (len
+ 4))
479 /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments
480 * here but we don't support confederations and such paths we already
481 * discarded in bgp_check_as_path().
484 DBG("BGP: Encoding AS4_PATH\n");
485 rv
= bgp_encode_attr_hdr(w
, BAF_OPTIONAL
| BAF_TRANSITIVE
, BA_AS4_PATH
, len
);
486 ADVANCE(w
, remains
, rv
);
487 memcpy(w
, a
->u
.ptr
->data
, len
);
488 ADVANCE(w
, remains
, len
);
493 /* The same issue with AGGREGATOR attribute */
494 if ((code
== BA_AGGREGATOR
) && bgp_as4_support
&& (! p
->as4_session
))
499 if (remains
< (len
+ 3))
502 rv
= bgp_encode_attr_hdr(w
, BAF_OPTIONAL
| BAF_TRANSITIVE
, BA_AGGREGATOR
, len
);
503 ADVANCE(w
, remains
, rv
);
504 aggregator_convert_to_old(a
->u
.ptr
, w
, &new_used
);
505 ADVANCE(w
, remains
, len
);
511 if (remains
< (len
+ 3))
514 rv
= bgp_encode_attr_hdr(w
, BAF_OPTIONAL
| BAF_TRANSITIVE
, BA_AS4_AGGREGATOR
, len
);
515 ADVANCE(w
, remains
, rv
);
516 memcpy(w
, a
->u
.ptr
->data
, len
);
517 ADVANCE(w
, remains
, len
);
522 /* Standard path continues here ... */
524 flags
= a
->flags
& (BAF_OPTIONAL
| BAF_TRANSITIVE
| BAF_PARTIAL
);
525 len
= bgp_get_attr_len(a
);
527 if (remains
< len
+ 4)
530 rv
= bgp_encode_attr_hdr(w
, flags
, code
, len
);
531 ADVANCE(w
, remains
, rv
);
533 switch (a
->type
& EAF_TYPE_MASK
)
536 case EAF_TYPE_ROUTER_ID
:
538 put_u32(w
, a
->u
.data
);
542 case EAF_TYPE_IP_ADDRESS
:
544 ip_addr ip
= *(ip_addr
*)a
->u
.ptr
->data
;
549 case EAF_TYPE_INT_SET
:
551 u32
*z
= (u32
*)a
->u
.ptr
->data
;
553 for(i
=0; i
<len
; i
+=4)
557 case EAF_TYPE_OPAQUE
:
558 case EAF_TYPE_AS_PATH
:
559 memcpy(w
, a
->u
.ptr
->data
, len
);
562 bug("bgp_encode_attrs: unknown attribute type %02x", a
->type
);
564 ADVANCE(w
, remains
, len
);
573 bgp_init_prefix(struct fib_node
*N
)
575 struct bgp_prefix
*p
= (struct bgp_prefix
*) N
;
576 p
->bucket_node
.next
= NULL
;
580 bgp_compare_u32(const u32
*x
, const u32
*y
)
582 return (*x
< *y
) ? -1 : (*x
> *y
) ? 1 : 0;
586 bgp_normalize_set(u32
*dest
, u32
*src
, unsigned cnt
)
588 memcpy(dest
, src
, sizeof(u32
) * cnt
);
589 qsort(dest
, cnt
, sizeof(u32
), (int(*)(const void *, const void *)) bgp_compare_u32
);
593 bgp_rehash_buckets(struct bgp_proto
*p
)
595 struct bgp_bucket
**old
= p
->bucket_hash
;
596 struct bgp_bucket
**new;
597 unsigned oldn
= p
->hash_size
;
599 struct bgp_bucket
*b
;
601 p
->hash_size
= p
->hash_limit
;
602 DBG("BGP: Rehashing bucket table from %d to %d\n", oldn
, p
->hash_size
);
604 if (p
->hash_limit
>= 65536)
606 new = p
->bucket_hash
= mb_allocz(p
->p
.pool
, p
->hash_size
* sizeof(struct bgp_bucket
*));
607 mask
= p
->hash_size
- 1;
608 for (i
=0; i
<oldn
; i
++)
611 old
[i
] = b
->hash_next
;
613 b
->hash_next
= new[e
];
615 b
->hash_next
->hash_prev
= b
;
622 static struct bgp_bucket
*
623 bgp_new_bucket(struct bgp_proto
*p
, ea_list
*new, unsigned hash
)
625 struct bgp_bucket
*b
;
626 unsigned ea_size
= sizeof(ea_list
) + new->count
* sizeof(eattr
);
627 unsigned ea_size_aligned
= BIRD_ALIGN(ea_size
, CPU_STRUCT_ALIGN
);
628 unsigned size
= sizeof(struct bgp_bucket
) + ea_size
;
631 unsigned index
= hash
& (p
->hash_size
- 1);
633 /* Gather total size of non-inline attributes */
634 for (i
=0; i
<new->count
; i
++)
636 eattr
*a
= &new->attrs
[i
];
637 if (!(a
->type
& EAF_EMBEDDED
))
638 size
+= BIRD_ALIGN(sizeof(struct adata
) + a
->u
.ptr
->length
, CPU_STRUCT_ALIGN
);
641 /* Create the bucket and hash it */
642 b
= mb_alloc(p
->p
.pool
, size
);
643 b
->hash_next
= p
->bucket_hash
[index
];
645 b
->hash_next
->hash_prev
= b
;
646 p
->bucket_hash
[index
] = b
;
649 add_tail(&p
->bucket_queue
, &b
->send_node
);
650 init_list(&b
->prefixes
);
651 memcpy(b
->eattrs
, new, ea_size
);
652 dest
= ((byte
*)b
->eattrs
) + ea_size_aligned
;
654 /* Copy values of non-inline attributes */
655 for (i
=0; i
<new->count
; i
++)
657 eattr
*a
= &b
->eattrs
->attrs
[i
];
658 if (!(a
->type
& EAF_EMBEDDED
))
660 struct adata
*oa
= a
->u
.ptr
;
661 struct adata
*na
= (struct adata
*) dest
;
662 memcpy(na
, oa
, sizeof(struct adata
) + oa
->length
);
664 dest
+= BIRD_ALIGN(sizeof(struct adata
) + na
->length
, CPU_STRUCT_ALIGN
);
668 /* If needed, rehash */
670 if (p
->hash_count
> p
->hash_limit
)
671 bgp_rehash_buckets(p
);
677 bgp_export_check(struct bgp_proto
*p
, ea_list
*new)
682 /* Check if next hop is valid */
683 a
= ea_find(new, EA_CODE(EAP_BGP
, BA_NEXT_HOP
));
684 if (!a
|| ipa_equal(p
->next_hop
, *(ip_addr
*)a
->u
.ptr
))
686 DBG("\tInvalid NEXT_HOP\n");
690 /* Check if we aren't forbidden to export the route by communities */
691 a
= ea_find(new, EA_CODE(EAP_BGP
, BA_COMMUNITY
));
695 if (int_set_contains(d
, BGP_COMM_NO_ADVERTISE
))
697 DBG("\tNO_ADVERTISE\n");
700 if (!p
->is_internal
&&
701 (int_set_contains(d
, BGP_COMM_NO_EXPORT
) ||
702 int_set_contains(d
, BGP_COMM_NO_EXPORT_SUBCONFED
)))
704 DBG("\tNO_EXPORT\n");
712 static struct bgp_bucket
*
713 bgp_get_bucket(struct bgp_proto
*p
, ea_list
*attrs
, int originate
)
716 unsigned i
, cnt
, hash
, code
;
719 struct bgp_bucket
*b
;
721 /* Merge the attribute list */
722 new = alloca(ea_scan(attrs
));
723 ea_merge(attrs
, new);
726 /* Normalize attributes */
735 byte buf
[EA_FORMAT_BUF_SIZE
];
740 if (EA_PROTO(a
->id
) != EAP_BGP
)
743 if (ATTR_KNOWN(code
))
745 if (!bgp_attr_table
[code
].allow_in_ebgp
&& !p
->is_internal
)
747 /* The flags might have been zero if the attr was added by filters */
748 a
->flags
= (a
->flags
& BAF_PARTIAL
) | bgp_attr_table
[code
].expected_flags
;
754 /* Don't re-export unknown non-transitive attributes */
755 if (!(a
->flags
& BAF_TRANSITIVE
))
759 if ((d
->type
& EAF_ORIGINATED
) && !originate
&& (d
->flags
& BAF_TRANSITIVE
) && (d
->flags
& BAF_OPTIONAL
))
760 d
->flags
|= BAF_PARTIAL
;
761 switch (d
->type
& EAF_TYPE_MASK
)
763 case EAF_TYPE_INT_SET
:
765 struct adata
*z
= alloca(sizeof(struct adata
) + d
->u
.ptr
->length
);
766 z
->length
= d
->u
.ptr
->length
;
767 bgp_normalize_set((u32
*) z
->data
, (u32
*) d
->u
.ptr
->data
, z
->length
/ 4);
779 for(b
=p
->bucket_hash
[hash
& (p
->hash_size
- 1)]; b
; b
=b
->hash_next
)
780 if (b
->hash
== hash
&& ea_same(b
->eattrs
, new))
782 DBG("Found bucket.\n");
786 /* Ensure that there are all mandatory attributes */
787 for(i
=0; i
<ARRAY_SIZE(bgp_mandatory_attrs
); i
++)
788 if (!(seen
& (1 << bgp_mandatory_attrs
[i
])))
790 log(L_ERR
"%s: Mandatory attribute %s missing", p
->p
.name
, bgp_attr_table
[bgp_mandatory_attrs
[i
]].name
);
794 if (!bgp_export_check(p
, new))
797 /* Create new bucket */
798 DBG("Creating bucket.\n");
799 return bgp_new_bucket(p
, new, hash
);
803 bgp_free_bucket(struct bgp_proto
*p
, struct bgp_bucket
*buck
)
806 buck
->hash_next
->hash_prev
= buck
->hash_prev
;
808 buck
->hash_prev
->hash_next
= buck
->hash_next
;
810 p
->bucket_hash
[buck
->hash
& (p
->hash_size
-1)] = buck
->hash_next
;
815 bgp_rt_notify(struct proto
*P
, net
*n
, rte
*new, rte
*old UNUSED
, ea_list
*attrs
)
817 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
818 struct bgp_bucket
*buck
;
819 struct bgp_prefix
*px
;
821 DBG("BGP: Got route %I/%d %s\n", n
->n
.prefix
, n
->n
.pxlen
, new ? "up" : "down");
825 buck
= bgp_get_bucket(p
, attrs
, new->attrs
->source
!= RTS_BGP
);
826 if (!buck
) /* Inconsistent attribute list */
831 if (!(buck
= p
->withdraw_bucket
))
833 buck
= p
->withdraw_bucket
= mb_alloc(P
->pool
, sizeof(struct bgp_bucket
));
834 init_list(&buck
->prefixes
);
837 px
= fib_get(&p
->prefix_fib
, &n
->n
.prefix
, n
->n
.pxlen
);
838 if (px
->bucket_node
.next
)
840 DBG("\tRemoving old entry.\n");
841 rem_node(&px
->bucket_node
);
843 add_tail(&buck
->prefixes
, &px
->bucket_node
);
844 bgp_schedule_packet(p
->conn
, PKT_UPDATE
);
849 bgp_create_attrs(struct bgp_proto
*p
, rte
*e
, ea_list
**attrs
, struct linpool
*pool
)
851 ea_list
*ea
= lp_alloc(pool
, sizeof(ea_list
) + 4*sizeof(eattr
));
857 ea
->flags
= EALF_SORTED
;
860 bgp_set_attr(ea
->attrs
, BA_ORIGIN
,
861 ((rta
->source
== RTS_OSPF_EXT1
) || (rta
->source
== RTS_OSPF_EXT2
)) ? ORIGIN_INCOMPLETE
: ORIGIN_IGP
);
864 bgp_set_attr_wa(ea
->attrs
+1, pool
, BA_AS_PATH
, 0);
867 z
= bgp_set_attr_wa(ea
->attrs
+1, pool
, BA_AS_PATH
, bgp_as4_support
? 6 : 4);
868 z
[0] = AS_PATH_SEQUENCE
;
872 put_u32(z
+2, p
->local_as
);
874 put_u16(z
+2, p
->local_as
);
877 z
= bgp_set_attr_wa(ea
->attrs
+2, pool
, BA_NEXT_HOP
, NEXT_HOP_LENGTH
);
878 if (p
->cf
->next_hop_self
||
879 rta
->dest
!= RTD_ROUTER
||
880 (!p
->is_internal
&& (e
->attrs
->iface
!= p
->neigh
->iface
)))
881 set_next_hop(z
, p
->source_addr
);
883 set_next_hop(z
, e
->attrs
->gw
);
885 bgp_set_attr(ea
->attrs
+3, BA_LOCAL_PREF
, 0);
887 return 0; /* Leave decision to the filters */
892 bgp_as_path_loopy(struct bgp_proto
*p
, rta
*a
)
894 eattr
*e
= ea_find(a
->eattrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
895 return (e
&& as_path_is_member(e
->u
.ptr
, p
->local_as
));
899 bgp_originator_id_loopy(struct bgp_proto
*p
, rta
*a
)
901 eattr
*e
= ea_find(a
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGINATOR_ID
));
902 return (e
&& (e
->u
.data
== p
->local_id
));
906 bgp_cluster_list_loopy(struct bgp_proto
*p
, rta
*a
)
908 eattr
*e
= ea_find(a
->eattrs
, EA_CODE(EAP_BGP
, BA_CLUSTER_LIST
));
909 return (e
&& p
->rr_client
&& int_set_contains(e
->u
.ptr
, p
->rr_cluster_id
));
914 bgp_path_prepend(rte
*e
, ea_list
**attrs
, struct linpool
*pool
, u32 as
)
916 eattr
*a
= ea_find(e
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
917 bgp_attach_attr(attrs
, pool
, BA_AS_PATH
, (uintptr_t) as_path_prepend(pool
, a
->u
.ptr
, as
));
921 bgp_cluster_list_prepend(rte
*e
, ea_list
**attrs
, struct linpool
*pool
, u32 cid
)
923 eattr
*a
= ea_find(e
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_CLUSTER_LIST
));
924 bgp_attach_attr(attrs
, pool
, BA_CLUSTER_LIST
, (uintptr_t) int_set_add(pool
, a
? a
->u
.ptr
: NULL
, cid
));
928 bgp_update_attrs(struct bgp_proto
*p
, rte
*e
, ea_list
**attrs
, struct linpool
*pool
, int rr
)
932 if (!p
->is_internal
&& !p
->rs_client
)
934 bgp_path_prepend(e
, attrs
, pool
, p
->local_as
);
936 /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be
937 * propagated to other neighboring ASes.
938 * Perhaps it would be better to undefine it.
940 a
= ea_find(e
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_MULTI_EXIT_DISC
));
942 bgp_attach_attr(attrs
, pool
, BA_MULTI_EXIT_DISC
, 0);
945 a
= ea_find(e
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_NEXT_HOP
));
946 if (a
&& !p
->cf
->next_hop_self
&& (p
->is_internal
|| (!p
->is_internal
&& e
->attrs
->iface
== p
->neigh
->iface
)))
948 /* Leave the original next hop attribute, will check later where does it point */
952 /* Need to create new one */
953 byte
*b
= bgp_attach_attr_wa(attrs
, pool
, BA_NEXT_HOP
, NEXT_HOP_LENGTH
);
954 set_next_hop(b
, p
->source_addr
);
959 /* Handling route reflection, RFC 4456 */
960 struct bgp_proto
*src
= (struct bgp_proto
*) e
->attrs
->proto
;
962 a
= ea_find(e
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGINATOR_ID
));
964 bgp_attach_attr(attrs
, pool
, BA_ORIGINATOR_ID
, src
->remote_id
);
966 /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */
967 bgp_cluster_list_prepend(e
, attrs
, pool
, src
->rr_client
? src
->rr_cluster_id
: p
->rr_cluster_id
);
969 /* Two RR clients with different cluster ID, hmmm */
970 if (src
->rr_client
&& p
->rr_client
&& (src
->rr_cluster_id
!= p
->rr_cluster_id
))
971 bgp_cluster_list_prepend(e
, attrs
, pool
, p
->rr_cluster_id
);
974 return 0; /* Leave decision to the filters */
978 bgp_import_control(struct proto
*P
, rte
**new, ea_list
**attrs
, struct linpool
*pool
)
981 struct bgp_proto
*p
= (struct bgp_proto
*) P
;
982 struct bgp_proto
*new_bgp
= (e
->attrs
->proto
->proto
== &proto_bgp
) ? (struct bgp_proto
*) e
->attrs
->proto
: NULL
;
984 if (p
== new_bgp
) /* Poison reverse updates */
988 /* We should check here for cluster list loop, because the receiving BGP instance
989 might have different cluster ID */
990 if (bgp_cluster_list_loopy(p
, e
->attrs
))
993 if (p
->local_as
== new_bgp
->local_as
&& p
->is_internal
&& new_bgp
->is_internal
)
995 /* Redistribution of internal routes with IBGP */
996 if (p
->rr_client
|| new_bgp
->rr_client
)
997 /* Route reflection, RFC 4456 */
998 return bgp_update_attrs(p
, e
, attrs
, pool
, 1);
1003 return bgp_update_attrs(p
, e
, attrs
, pool
, 0);
1006 return bgp_create_attrs(p
, e
, attrs
, pool
);
1010 bgp_get_neighbor(rte
*r
)
1012 eattr
*e
= ea_find(r
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
1015 if (e
&& as_path_get_first(e
->u
.ptr
, &as
))
1018 return ((struct bgp_proto
*) r
->attrs
->proto
)->remote_as
;
1022 bgp_rte_better(rte
*new, rte
*old
)
1024 struct bgp_proto
*new_bgp
= (struct bgp_proto
*) new->attrs
->proto
;
1025 struct bgp_proto
*old_bgp
= (struct bgp_proto
*) old
->attrs
->proto
;
1029 /* Start with local preferences */
1030 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_LOCAL_PREF
));
1031 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_LOCAL_PREF
));
1032 n
= x
? x
->u
.data
: new_bgp
->cf
->default_local_pref
;
1033 o
= y
? y
->u
.data
: old_bgp
->cf
->default_local_pref
;
1039 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1040 if (new_bgp
->cf
->compare_path_lengths
|| old_bgp
->cf
->compare_path_lengths
)
1042 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
1043 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
1044 n
= x
? as_path_getlen(x
->u
.ptr
) : AS_PATH_MAXLEN
;
1045 o
= y
? as_path_getlen(y
->u
.ptr
) : AS_PATH_MAXLEN
;
1052 /* RFC 4271 9.1.2.2. b) Use origins */
1053 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGIN
));
1054 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGIN
));
1055 n
= x
? x
->u
.data
: ORIGIN_INCOMPLETE
;
1056 o
= y
? y
->u
.data
: ORIGIN_INCOMPLETE
;
1062 /* RFC 4271 9.1.2.2. c) Compare MED's */
1063 if (bgp_get_neighbor(new) == bgp_get_neighbor(old
))
1065 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_MULTI_EXIT_DISC
));
1066 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_MULTI_EXIT_DISC
));
1067 n
= x
? x
->u
.data
: new_bgp
->cf
->default_med
;
1068 o
= y
? y
->u
.data
: old_bgp
->cf
->default_med
;
1075 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1076 if (new_bgp
->is_internal
> old_bgp
->is_internal
)
1078 if (new_bgp
->is_internal
< old_bgp
->is_internal
)
1081 /* Skipping RFC 4271 9.1.2.2. e) */
1082 /* We don't have interior distances */
1084 /* RFC 4456 9. b) Compare cluster list lengths */
1085 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_CLUSTER_LIST
));
1086 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_CLUSTER_LIST
));
1087 n
= x
? int_set_get_size(x
->u
.ptr
) : 0;
1088 o
= y
? int_set_get_size(y
->u
.ptr
) : 0;
1094 /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
1095 /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */
1096 x
= ea_find(new->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGINATOR_ID
));
1097 y
= ea_find(old
->attrs
->eattrs
, EA_CODE(EAP_BGP
, BA_ORIGINATOR_ID
));
1098 n
= x
? x
->u
.data
: new_bgp
->remote_id
;
1099 o
= y
? y
->u
.data
: old_bgp
->remote_id
;
1101 /* RFC 5004 - prefer older routes */
1102 /* (if both are external and from different peer) */
1103 if ((new_bgp
->cf
->prefer_older
|| old_bgp
->cf
->prefer_older
) &&
1104 !new_bgp
->is_internal
&& n
!= o
)
1107 /* rest of RFC 4271 9.1.2.2. f) */
1113 /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
1114 return (ipa_compare(new_bgp
->cf
->remote_ip
, old_bgp
->cf
->remote_ip
) < 0);
1117 static struct adata
*
1118 bgp_aggregator_convert_to_new(struct adata
*old
, struct linpool
*pool
)
1120 struct adata
*newa
= lp_alloc(pool
, sizeof(struct adata
) + 8);
1122 aggregator_convert_to_new(old
, newa
->data
);
1127 /* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format
1128 * and append path old4 (in 4B format).
1130 static struct adata
*
1131 bgp_merge_as_paths(struct adata
*old2
, struct adata
*old4
, int req_as
, struct linpool
*pool
)
1133 byte buf
[old2
->length
* 2];
1135 int ol
= as_path_convert_to_new(old2
, buf
, req_as
);
1136 int nl
= ol
+ (old4
? old4
->length
: 0);
1138 struct adata
*newa
= lp_alloc(pool
, sizeof(struct adata
) + nl
);
1140 memcpy(newa
->data
, buf
, ol
);
1141 if (old4
) memcpy(newa
->data
+ ol
, old4
->data
, old4
->length
);
1147 as4_aggregator_valid(struct adata
*aggr
)
1149 if (aggr
->length
!= 8)
1152 u32
*a
= (u32
*) aggr
->data
;
1154 if ((a
[0] == 0) || (a
[1] == 0))
1161 /* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */
1163 bgp_reconstruct_4b_atts(struct bgp_proto
*p
, rta
*a
, struct linpool
*pool
)
1165 eattr
*p2
=ea_find(a
->eattrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
1166 eattr
*p4
=ea_find(a
->eattrs
, EA_CODE(EAP_BGP
, BA_AS4_PATH
));
1167 eattr
*a2
=ea_find(a
->eattrs
, EA_CODE(EAP_BGP
, BA_AGGREGATOR
));
1168 eattr
*a4
=ea_find(a
->eattrs
, EA_CODE(EAP_BGP
, BA_AS4_AGGREGATOR
));
1171 if (a4
&& !as4_aggregator_valid(a4
->u
.ptr
))
1173 log(L_WARN
"%s: AS4_AGGREGATOR attribute is invalid, skipping attribute", p
->p
.name
);
1180 u32 a2_as
= get_u16(a2
->u
.ptr
->data
);
1184 if (a2_as
!= AS_TRANS
)
1186 /* Routes were aggregated by old router and therefore AS4_PATH
1187 * and AS4_AGGREGATOR is invalid
1189 * Convert AS_PATH and AGGREGATOR to 4B format and finish.
1192 a2
->u
.ptr
= bgp_aggregator_convert_to_new(a2
->u
.ptr
, pool
);
1193 p2
->u
.ptr
= bgp_merge_as_paths(p2
->u
.ptr
, NULL
, AS_PATH_MAXLEN
, pool
);
1199 /* Common case, use AS4_AGGREGATOR attribute */
1200 a2
->u
.ptr
= a4
->u
.ptr
;
1205 /* Common case, use old AGGREGATOR attribute */
1206 a2
->u
.ptr
= bgp_aggregator_convert_to_new(a2
->u
.ptr
, pool
);
1208 if ((a2_as
== AS_TRANS
) && !a4_removed
)
1209 log(L_WARN
"%s: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing", p
->p
.name
);
1214 log(L_WARN
"%s: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing", p
->p
.name
);
1216 int p2_len
= as_path_getlen_int(p2
->u
.ptr
, 2);
1217 int p4_len
= p4
? validate_as4_path(p
, p4
->u
.ptr
) : -1;
1219 if (p4
&& (p4_len
< 0))
1220 log(L_WARN
"%s: AS4_PATH attribute is malformed, skipping attribute", p
->p
.name
);
1222 if ((p4_len
<= 0) || (p2_len
< p4_len
))
1223 p2
->u
.ptr
= bgp_merge_as_paths(p2
->u
.ptr
, NULL
, AS_PATH_MAXLEN
, pool
);
1225 p2
->u
.ptr
= bgp_merge_as_paths(p2
->u
.ptr
, p4
->u
.ptr
, p2_len
- p4_len
, pool
);
1229 bgp_remove_as4_attrs(struct bgp_proto
*p
, rta
*a
)
1231 unsigned id1
= EA_CODE(EAP_BGP
, BA_AS4_PATH
);
1232 unsigned id2
= EA_CODE(EAP_BGP
, BA_AS4_AGGREGATOR
);
1233 ea_list
**el
= &(a
->eattrs
);
1235 /* We know that ea_lists constructed in bgp_decode attrs have one attribute per ea_list struct */
1238 unsigned fid
= (*el
)->attrs
[0].id
;
1240 if ((fid
== id1
) || (fid
== id2
))
1244 log(L_WARN
"BGP: Unexpected AS4_* attributes received");
1247 el
= &((*el
)->next
);
1252 * bgp_decode_attrs - check and decode BGP attributes
1254 * @attr: start of attribute block
1255 * @len: length of attribute block
1256 * @pool: linear pool to make all the allocations in
1257 * @mandatory: 1 iff presence of mandatory attributes has to be checked
1259 * This function takes a BGP attribute block (a part of an Update message), checks
1260 * its consistency and converts it to a list of BIRD route attributes represented
1264 bgp_decode_attrs(struct bgp_conn
*conn
, byte
*attr
, unsigned int len
, struct linpool
*pool
, int mandatory
)
1266 struct bgp_proto
*bgp
= conn
->bgp
;
1267 rta
*a
= lp_alloc(pool
, sizeof(struct rta
));
1268 unsigned int flags
, code
, l
, i
, type
;
1270 byte
*z
, *attr_start
;
1276 a
->source
= RTS_BGP
;
1277 a
->scope
= SCOPE_UNIVERSE
;
1278 a
->cast
= RTC_UNICAST
;
1279 a
->dest
= RTD_ROUTER
;
1282 a
->from
= bgp
->cf
->remote_ip
;
1285 /* Parse the attributes */
1286 bzero(seen
, sizeof(seen
));
1287 DBG("BGP: Parsing attributes\n");
1296 if (flags
& BAF_EXT_LEN
)
1316 DBG("Attr %02x %02x %d\n", code
, flags
, l
);
1317 if (seen
[code
/8] & (1 << (code
%8)))
1319 if (ATTR_KNOWN(code
))
1321 struct attr_desc
*desc
= &bgp_attr_table
[code
];
1322 if (desc
->expected_length
>= 0 && desc
->expected_length
!= (int) l
)
1323 { errcode
= 5; goto err
; }
1324 if ((desc
->expected_flags
^ flags
) & (BAF_OPTIONAL
| BAF_TRANSITIVE
))
1325 { errcode
= 4; goto err
; }
1326 if (!desc
->allow_in_ebgp
&& !bgp
->is_internal
)
1330 errcode
= desc
->validate(bgp
, z
, l
);
1336 else if (code
== BA_AS_PATH
)
1338 /* Special case as it might also trim the attribute */
1339 if (validate_as_path(bgp
, z
, &l
) < 0)
1340 { errcode
= 11; goto err
; }
1344 else /* Unknown attribute */
1346 if (!(flags
& BAF_OPTIONAL
))
1347 { errcode
= 2; goto err
; }
1348 type
= EAF_TYPE_OPAQUE
;
1351 // Only OPTIONAL and TRANSITIVE attributes may have non-zero PARTIAL flag
1352 // if (!((flags & BAF_OPTIONAL) && (flags & BAF_TRANSITIVE)) && (flags & BAF_PARTIAL))
1353 // { errcode = 4; goto err; }
1355 seen
[code
/8] |= (1 << (code
%8));
1356 ea
= lp_alloc(pool
, sizeof(ea_list
) + sizeof(eattr
));
1357 ea
->next
= a
->eattrs
;
1361 ea
->attrs
[0].id
= EA_CODE(EAP_BGP
, code
);
1362 ea
->attrs
[0].flags
= flags
;
1363 ea
->attrs
[0].type
= type
;
1364 if (type
& EAF_EMBEDDED
)
1368 ad
= lp_alloc(pool
, sizeof(struct adata
) + l
);
1369 ea
->attrs
[0].u
.ptr
= ad
;
1371 memcpy(ad
->data
, z
, l
);
1375 case EAF_TYPE_ROUTER_ID
:
1378 ea
->attrs
[0].u
.data
= *z
;
1380 ea
->attrs
[0].u
.data
= get_u32(z
);
1382 case EAF_TYPE_IP_ADDRESS
:
1383 ipa_ntoh(*(ip_addr
*)ad
->data
);
1385 case EAF_TYPE_INT_SET
:
1387 u32
*z
= (u32
*) ad
->data
;
1388 for(i
=0; i
<ad
->length
/4; i
++)
1396 /* If we received MP_REACH_NLRI we should check mandatory attributes */
1397 if (bgp
->mp_reach_len
!= 0)
1401 /* If there is no (reachability) NLRI, we should exit now */
1405 /* Check if all mandatory attributes are present */
1406 for(i
=0; i
< ARRAY_SIZE(bgp_mandatory_attrs
); i
++)
1408 code
= bgp_mandatory_attrs
[i
];
1409 if (!(seen
[code
/8] & (1 << (code
%8))))
1411 bgp_error(conn
, 3, 3, &bgp_mandatory_attrs
[i
], 1);
1416 /* When receiving attributes from non-AS4-aware BGP speaker,
1417 * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes
1419 if (bgp_as4_support
&& (! bgp
->as4_session
))
1420 bgp_reconstruct_4b_atts(bgp
, a
, pool
);
1422 if (bgp_as4_support
)
1423 bgp_remove_as4_attrs(bgp
, a
);
1425 /* If the AS path attribute contains our AS, reject the routes */
1426 if (bgp_as_path_loopy(bgp
, a
))
1429 /* Two checks for IBGP loops caused by route reflection, RFC 4456 */
1430 if (bgp_originator_id_loopy(bgp
, a
) ||
1431 bgp_cluster_list_loopy(bgp
, a
))
1434 /* If there's no local preference, define one */
1435 if (!(seen
[0] & (1 << BA_LOCAL_PREF
)))
1436 bgp_attach_attr(&a
->eattrs
, pool
, BA_LOCAL_PREF
, 0);
1441 DBG("BGP: Path loop!\n");
1445 bgp_error(conn
, 3, 1, NULL
, 0);
1449 bgp_error(conn
, 3, errcode
, attr_start
, z
+l
-attr_start
);
1454 bgp_get_attr(eattr
*a
, byte
*buf
, int buflen
)
1456 unsigned int i
= EA_ID(a
->id
);
1457 struct attr_desc
*d
;
1461 d
= &bgp_attr_table
[i
];
1462 buf
+= bsprintf(buf
, "%s", d
->name
);
1467 d
->format(a
, buf
, buflen
);
1472 bsprintf(buf
, "%02x%s", i
, (a
->flags
& BAF_TRANSITIVE
) ? " [t]" : "");
1477 bgp_attr_init(struct bgp_proto
*p
)
1480 p
->hash_limit
= p
->hash_size
* 4;
1481 p
->bucket_hash
= mb_allocz(p
->p
.pool
, p
->hash_size
* sizeof(struct bgp_bucket
*));
1482 init_list(&p
->bucket_queue
);
1483 p
->withdraw_bucket
= NULL
;
1484 fib_init(&p
->prefix_fib
, p
->p
.pool
, sizeof(struct bgp_prefix
), 0, bgp_init_prefix
);
1488 bgp_get_route_info(rte
*e
, byte
*buf
, ea_list
*attrs
)
1490 eattr
*p
= ea_find(attrs
, EA_CODE(EAP_BGP
, BA_AS_PATH
));
1491 eattr
*o
= ea_find(attrs
, EA_CODE(EAP_BGP
, BA_ORIGIN
));
1494 buf
+= bsprintf(buf
, " (%d) [", e
->pref
);
1495 if (p
&& as_path_get_last(p
->u
.ptr
, &origas
))
1496 buf
+= bsprintf(buf
, "AS%u", origas
);
1498 buf
+= bsprintf(buf
, "%c", "ie?"[o
->u
.data
]);