]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/attrs.c
9b243763212738bf3b6aaca98a659d83ff364cb0
[thirdparty/bird.git] / proto / bgp / attrs.c
1 /*
2 * BIRD -- BGP Attributes
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
11 #undef LOCAL_DEBUG
12
13 #include <stdlib.h>
14
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "conf/conf.h"
21 #include "lib/resource.h"
22 #include "lib/string.h"
23 #include "lib/unaligned.h"
24
25 #include "bgp.h"
26
27 /*
28 * UPDATE message error handling
29 *
30 * All checks from RFC 4271 6.3 are done as specified with these exceptions:
31 * - The semantic check of an IP address from NEXT_HOP attribute is missing.
32 * - Checks of some optional attribute values are missing.
33 * - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
34 * are probably inadequate.
35 *
36 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
37 * 4271 does not explicitly specify the behavior in that case.
38 *
39 * Loop detection related to route reflection (based on ORIGINATOR_ID
40 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
41 * specifies that such updates should be ignored, but that is generally
42 * a bad idea.
43 *
44 * BGP attribute table has several hooks:
45 *
46 * export - Hook that validates and normalizes attribute during export phase.
47 * Receives eattr, may modify it (e.g., sort community lists for canonical
48 * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
49 * necessary. May assume that eattr has value valid w.r.t. its type, but may be
50 * invalid w.r.t. BGP constraints. Optional.
51 *
52 * encode - Hook that converts internal representation to external one during
53 * packet writing. Receives eattr and puts it in the buffer (including attribute
54 * header). Returns number of bytes, or -1 if not enough space. May assume that
55 * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
56 * for all known attributes that exist internally after export phase (i.e., all
57 * except pseudoattributes MP_(UN)REACH_NLRI).
58 *
59 * decode - Hook that converts external representation to internal one during
60 * packet parsing. Receives attribute data in buffer, validates it and adds
61 * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
62 * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
63 *
64 * format - Optional hook that converts eattr to textual representation.
65 */
66
67
68 struct bgp_attr_desc {
69 const char *name;
70 uint type;
71 uint flags;
72 void (*export)(struct bgp_export_state *s, eattr *a);
73 int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
74 void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
75 void (*format)(eattr *ea, byte *buf, uint size);
76 };
77
78 static const struct bgp_attr_desc bgp_attr_table[];
79
80 static inline int bgp_attr_known(uint code);
81
82 eattr *
83 bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
84 {
85 ASSERT(bgp_attr_known(code));
86
87 return ea_set_attr(
88 attrs,
89 pool,
90 EA_CODE(PROTOCOL_BGP, code),
91 flags,
92 bgp_attr_table[code].type,
93 val
94 );
95 }
96
97
98
99 #define REPORT(msg, args...) \
100 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
101
102 #define DISCARD(msg, args...) \
103 ({ REPORT(msg, ## args); return; })
104
105 #define WITHDRAW(msg, args...) \
106 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
107
108 #define UNSET(a) \
109 ({ a->type = EAF_TYPE_UNDEF; return; })
110
111 #define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
112 #define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
113 #define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
114 #define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
115 #define NO_MANDATORY "Missing mandatory %s attribute"
116
117
118 static inline int
119 bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
120 {
121 *buf++ = flags;
122 *buf++ = code;
123 *buf++ = len;
124 return 3;
125 }
126
127 static inline int
128 bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
129 {
130 *buf++ = flags | BAF_EXT_LEN;
131 *buf++ = code;
132 put_u16(buf, len);
133 return 4;
134 }
135
136 static inline int
137 bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
138 {
139 if (len < 256)
140 return bgp_put_attr_hdr3(buf, code, flags, len);
141 else
142 return bgp_put_attr_hdr4(buf, code, flags, len);
143 }
144
145 static int
146 bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
147 {
148 if (size < (3+1))
149 return -1;
150
151 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
152 buf[3] = a->u.data;
153
154 return 3+1;
155 }
156
157 static int
158 bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
159 {
160 if (size < (3+4))
161 return -1;
162
163 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
164 put_u32(buf+3, a->u.data);
165
166 return 3+4;
167 }
168
169 static int
170 bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
171 {
172 uint len = a->u.ptr->length;
173
174 if (size < (4+len))
175 return -1;
176
177 uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
178 put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
179
180 return hdr + len;
181 }
182
183 static int
184 bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint len)
185 {
186 if (size < (4+len))
187 return -1;
188
189 uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
190 memcpy(buf + hdr, data, len);
191
192 return hdr + len;
193 }
194
195 static int
196 bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
197 {
198 return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
199 }
200
201
202 /*
203 * AIGP handling
204 */
205
206 static int
207 bgp_aigp_valid(byte *data, uint len, char *err, uint elen)
208 {
209 byte *pos = data;
210 char *err_dsc = NULL;
211 uint err_val = 0;
212
213 #define BAD(DSC,VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; })
214 while (len)
215 {
216 if (len < 3)
217 BAD("TLV framing error", len);
218
219 /* Process one TLV */
220 uint ptype = pos[0];
221 uint plen = get_u16(pos + 1);
222
223 if (len < plen)
224 BAD("TLV framing error", plen);
225
226 if (plen < 3)
227 BAD("Bad TLV length", plen);
228
229 if ((ptype == BGP_AIGP_METRIC) && (plen != 11))
230 BAD("Bad AIGP TLV length", plen);
231
232 ADVANCE(pos, len, plen);
233 }
234 #undef BAD
235
236 return 1;
237
238 bad:
239 if (err)
240 if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0)
241 err[0] = 0;
242
243 return 0;
244 }
245
246 static const byte *
247 bgp_aigp_get_tlv(const struct adata *ad, uint type)
248 {
249 if (!ad)
250 return NULL;
251
252 uint len = ad->length;
253 const byte *pos = ad->data;
254
255 while (len)
256 {
257 uint ptype = pos[0];
258 uint plen = get_u16(pos + 1);
259
260 if (ptype == type)
261 return pos;
262
263 ADVANCE(pos, len, plen);
264 }
265
266 return NULL;
267 }
268
269 static const struct adata *
270 bgp_aigp_set_tlv(struct linpool *pool, const struct adata *ad, uint type, byte *data, uint dlen)
271 {
272 uint len = ad ? ad->length : 0;
273 const byte *pos = ad ? ad->data : NULL;
274 struct adata *res = lp_alloc_adata(pool, len + 3 + dlen);
275 byte *dst = res->data;
276 byte *tlv = NULL;
277 int del = 0;
278
279 while (len)
280 {
281 uint ptype = pos[0];
282 uint plen = get_u16(pos + 1);
283
284 /* Find position for new TLV */
285 if ((ptype >= type) && !tlv)
286 {
287 tlv = dst;
288 dst += 3 + dlen;
289 }
290
291 /* Skip first matching TLV, copy others */
292 if ((ptype == type) && !del)
293 del = 1;
294 else
295 {
296 memcpy(dst, pos, plen);
297 dst += plen;
298 }
299
300 ADVANCE(pos, len, plen);
301 }
302
303 if (!tlv)
304 {
305 tlv = dst;
306 dst += 3 + dlen;
307 }
308
309 /* Store the TLD */
310 put_u8(tlv + 0, type);
311 put_u16(tlv + 1, 3 + dlen);
312 memcpy(tlv + 3, data, dlen);
313
314 /* Update length */
315 res->length = dst - res->data;
316
317 return res;
318 }
319
320 static u64 UNUSED
321 bgp_aigp_get_metric(const struct adata *ad, u64 def)
322 {
323 const byte *b = bgp_aigp_get_tlv(ad, BGP_AIGP_METRIC);
324 return b ? get_u64(b + 3) : def;
325 }
326
327 static const struct adata *
328 bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric)
329 {
330 byte data[8];
331 put_u64(data, metric);
332 return bgp_aigp_set_tlv(pool, ad, BGP_AIGP_METRIC, data, 8);
333 }
334
335 int
336 bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad)
337 {
338 eattr *a = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP));
339 if (!a)
340 return 0;
341
342 const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
343 if (!b)
344 return 0;
345
346 u64 aigp = get_u64(b + 3);
347 u64 step = e->attrs->igp_metric;
348
349 if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN))
350 step = BGP_AIGP_MAX;
351
352 if (!step)
353 step = 1;
354
355 *ad = a->u.ptr;
356 *metric = aigp + step;
357 if (*metric < aigp)
358 *metric = BGP_AIGP_MAX;
359
360 return 1;
361 }
362
363 static inline int
364 bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
365 {
366 if (e->attrs->source == RTS_BGP)
367 return 0;
368
369 *metric = rt_get_igp_metric(e);
370 *ad = NULL;
371 return *metric < IGP_METRIC_UNKNOWN;
372 }
373
374
375 /*
376 * Attribute hooks
377 */
378
379 static void
380 bgp_export_origin(struct bgp_export_state *s, eattr *a)
381 {
382 if (a->u.data > 2)
383 WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
384 }
385
386 static void
387 bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
388 {
389 if (len != 1)
390 WITHDRAW(BAD_LENGTH, "ORIGIN", len);
391
392 if (data[0] > 2)
393 WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
394
395 bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
396 }
397
398 static void
399 bgp_format_origin(eattr *a, byte *buf, uint size UNUSED)
400 {
401 static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
402
403 bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
404 }
405
406
407 static int
408 bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
409 {
410 const byte *data = a->u.ptr->data;
411 uint len = a->u.ptr->length;
412
413 if (!s->as4_session)
414 {
415 /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
416 byte *dst = alloca(len);
417 len = as_path_32to16(dst, data, len);
418 data = dst;
419 }
420
421 return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
422 }
423
424 static void
425 bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
426 {
427 struct bgp_proto *p = s->proto;
428 int as_length = s->as4_session ? 4 : 2;
429 int as_sets = p->cf->allow_as_sets;
430 int as_confed = p->cf->confederation && p->is_interior;
431 char err[128];
432
433 if (!as_path_valid(data, len, as_length, as_sets, as_confed, err, sizeof(err)))
434 WITHDRAW("Malformed AS_PATH attribute - %s", err);
435
436 /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
437 if (p->is_interior && !p->is_internal &&
438 ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
439 WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
440
441 if (!s->as4_session)
442 {
443 /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
444 byte *src = data;
445 data = alloca(2*len);
446 len = as_path_16to32(data, src, len);
447 }
448
449 bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
450 }
451
452
453 static int
454 bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
455 {
456 /*
457 * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
458 * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
459 * store it and encode it later by AFI-specific hooks.
460 */
461
462 if (!s->mp_reach)
463 {
464 // ASSERT(a->u.ptr->length == sizeof(ip_addr));
465
466 /* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */
467 ip_addr *addr = (void *) a->u.ptr->data;
468 if ((a->u.ptr->length != sizeof(ip_addr)) || !ipa_is_ip4(*addr))
469 return 0;
470
471 if (size < (3+4))
472 return -1;
473
474 bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
475 put_ip4(buf+3, ipa_to_ip4(*addr));
476
477 return 3+4;
478 }
479 else
480 {
481 s->mp_next_hop = a;
482 return 0;
483 }
484 }
485
486 static void
487 bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
488 {
489 if (len != 4)
490 WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
491
492 /* Semantic checks are done later */
493 s->ip_next_hop_len = len;
494 s->ip_next_hop_data = data;
495 }
496
497 /* TODO: This function should use AF-specific hook */
498 static void
499 bgp_format_next_hop(eattr *a, byte *buf, uint size UNUSED)
500 {
501 ip_addr *nh = (void *) a->u.ptr->data;
502 uint len = a->u.ptr->length;
503
504 ASSERT((len == 16) || (len == 32));
505
506 /* in IPv6, we may have two addresses in NEXT HOP */
507 if ((len == 16) || ipa_zero(nh[1]))
508 bsprintf(buf, "%I", nh[0]);
509 else
510 bsprintf(buf, "%I %I", nh[0], nh[1]);
511 }
512
513
514 static void
515 bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
516 {
517 if (len != 4)
518 WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
519
520 u32 val = get_u32(data);
521 bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
522 }
523
524
525 static void
526 bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
527 {
528 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
529 UNSET(a);
530 }
531
532 static void
533 bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
534 {
535 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
536 DISCARD(BAD_EBGP, "LOCAL_PREF");
537
538 if (len != 4)
539 WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
540
541 u32 val = get_u32(data);
542 bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
543 }
544
545
546 static void
547 bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
548 {
549 if (len != 0)
550 DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
551
552 bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
553 }
554
555 static int
556 bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
557 {
558 const byte *data = a->u.ptr->data;
559 uint len = a->u.ptr->length;
560
561 if (!s->as4_session)
562 {
563 /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
564 byte *dst = alloca(6);
565 len = aggregator_32to16(dst, data);
566 }
567
568 return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
569 }
570
571 static void
572 bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
573 {
574 if (len != (s->as4_session ? 8 : 6))
575 DISCARD(BAD_LENGTH, "AGGREGATOR", len);
576
577 if (!s->as4_session)
578 {
579 /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
580 byte *src = data;
581 data = alloca(8);
582 len = aggregator_16to32(data, src);
583 }
584
585 bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
586 }
587
588 static void
589 bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED)
590 {
591 const byte *data = a->u.ptr->data;
592
593 bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
594 }
595
596
597 static void
598 bgp_export_community(struct bgp_export_state *s, eattr *a)
599 {
600 if (a->u.ptr->length == 0)
601 UNSET(a);
602
603 a->u.ptr = int_set_sort(s->pool, a->u.ptr);
604 }
605
606 static void
607 bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
608 {
609 if (!len || (len % 4))
610 WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
611
612 struct adata *ad = lp_alloc_adata(s->pool, len);
613 get_u32s(data, (u32 *) ad->data, len / 4);
614 bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
615 }
616
617
618 static void
619 bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
620 {
621 if (!s->proto->is_internal)
622 UNSET(a);
623 }
624
625 static void
626 bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
627 {
628 if (!s->proto->is_internal)
629 DISCARD(BAD_EBGP, "ORIGINATOR_ID");
630
631 if (len != 4)
632 WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
633
634 u32 val = get_u32(data);
635 bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
636 }
637
638
639 static void
640 bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
641 {
642 if (!s->proto->is_internal)
643 UNSET(a);
644
645 if (a->u.ptr->length == 0)
646 UNSET(a);
647 }
648
649 static void
650 bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
651 {
652 if (!s->proto->is_internal)
653 DISCARD(BAD_EBGP, "CLUSTER_LIST");
654
655 if (!len || (len % 4))
656 WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
657
658 struct adata *ad = lp_alloc_adata(s->pool, len);
659 get_u32s(data, (u32 *) ad->data, len / 4);
660 bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
661 }
662
663 static void
664 bgp_format_cluster_list(eattr *a, byte *buf, uint size)
665 {
666 /* Truncates cluster lists larger than buflen, probably not a problem */
667 int_set_format(a->u.ptr, 0, -1, buf, size);
668 }
669
670
671 static inline u32
672 get_af3(byte *buf)
673 {
674 return (get_u16(buf) << 16) | buf[2];
675 }
676
677 static void
678 bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
679 {
680 /*
681 * 2 B MP_REACH_NLRI data - Address Family Identifier
682 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
683 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
684 * var MP_REACH_NLRI data - Network Address of Next Hop
685 * 1 B MP_REACH_NLRI data - Reserved (zero)
686 * var MP_REACH_NLRI data - Network Layer Reachability Information
687 */
688
689 if ((len < 5) || (len < (5 + (uint) data[3])))
690 bgp_parse_error(s, 9);
691
692 s->mp_reach_af = get_af3(data);
693 s->mp_next_hop_len = data[3];
694 s->mp_next_hop_data = data + 4;
695 s->mp_reach_len = len - 5 - s->mp_next_hop_len;
696 s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
697 }
698
699
700 static void
701 bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
702 {
703 /*
704 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
705 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
706 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
707 */
708
709 if (len < 3)
710 bgp_parse_error(s, 9);
711
712 s->mp_unreach_af = get_af3(data);
713 s->mp_unreach_len = len - 3;
714 s->mp_unreach_nlri = data + 3;
715 }
716
717
718 static void
719 bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
720 {
721 if (!s->proto->is_interior)
722 {
723 struct adata *ad = ec_set_del_nontrans(s->pool, a->u.ptr);
724
725 if (ad->length == 0)
726 UNSET(a);
727
728 ec_set_sort_x(ad);
729 a->u.ptr = ad;
730 }
731 else
732 {
733 if (a->u.ptr->length == 0)
734 UNSET(a);
735
736 a->u.ptr = ec_set_sort(s->pool, a->u.ptr);
737 }
738 }
739
740 static void
741 bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
742 {
743 if (!len || (len % 8))
744 WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
745
746 struct adata *ad = lp_alloc_adata(s->pool, len);
747 get_u32s(data, (u32 *) ad->data, len / 4);
748 bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
749 }
750
751
752 static void
753 bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
754 {
755 if (s->as4_session)
756 DISCARD(NEW_BGP, "AS4_AGGREGATOR");
757
758 if (len != 8)
759 DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
760
761 bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
762 }
763
764 static void
765 bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
766 {
767 struct bgp_proto *p = s->proto;
768 int sets = p->cf->allow_as_sets;
769
770 char err[128];
771
772 if (s->as4_session)
773 DISCARD(NEW_BGP, "AS4_PATH");
774
775 if (len < 6)
776 DISCARD(BAD_LENGTH, "AS4_PATH", len);
777
778 if (!as_path_valid(data, len, 4, sets, 1, err, sizeof(err)))
779 DISCARD("Malformed AS4_PATH attribute - %s", err);
780
781 struct adata *a = lp_alloc_adata(s->pool, len);
782 memcpy(a->data, data, len);
783
784 /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
785 if (as_path_contains_confed(a))
786 {
787 REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
788 a = as_path_strip_confed(s->pool, a);
789 }
790
791 bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
792 }
793
794
795 static void
796 bgp_export_aigp(struct bgp_export_state *s, eattr *a)
797 {
798 if (!s->channel->cf->aigp)
799 UNSET(a);
800 }
801
802 static void
803 bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
804 {
805 char err[128];
806
807 /* Acceptability test postponed to bgp_finish_attrs() */
808
809 if ((flags ^ bgp_attr_table[BA_AIGP].flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
810 DISCARD("Malformed AIGP attribute - conflicting flags (%02x)", flags);
811
812 if (!bgp_aigp_valid(data, len, err, sizeof(err)))
813 DISCARD("Malformed AIGP attribute - %s", err);
814
815 bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len);
816 }
817
818 static void
819 bgp_format_aigp(eattr *a, byte *buf, uint size UNUSED)
820 {
821 const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
822
823 if (!b)
824 bsprintf(buf, "?");
825 else
826 bsprintf(buf, "%lu", get_u64(b + 3));
827 }
828
829
830 static void
831 bgp_export_large_community(struct bgp_export_state *s, eattr *a)
832 {
833 if (a->u.ptr->length == 0)
834 UNSET(a);
835
836 a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
837 }
838
839 static void
840 bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
841 {
842 if (!len || (len % 12))
843 WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
844
845 struct adata *ad = lp_alloc_adata(s->pool, len);
846 get_u32s(data, (u32 *) ad->data, len / 4);
847 bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
848 }
849
850 static void
851 bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
852 {
853 net_addr *n = s->route->net->n.addr;
854 u32 *labels = (u32 *) a->u.ptr->data;
855 uint lnum = a->u.ptr->length / 4;
856
857 /* Perhaps we should just ignore it? */
858 if (!s->mpls)
859 WITHDRAW("Unexpected MPLS stack");
860
861 /* Empty MPLS stack is not allowed */
862 if (!lnum)
863 WITHDRAW("Malformed MPLS stack - empty");
864
865 /* This is ugly, but we must ensure that labels fit into NLRI field */
866 if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
867 WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
868
869 for (uint i = 0; i < lnum; i++)
870 {
871 if (labels[i] > 0xfffff)
872 WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
873
874 /* TODO: Check for special-purpose label values? */
875 }
876 }
877
878 static int
879 bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
880 {
881 /*
882 * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
883 * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
884 */
885
886 s->mpls_labels = a->u.ptr;
887 return 0;
888 }
889
890 static void
891 bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
892 {
893 DISCARD("Discarding received attribute #0");
894 }
895
896 static void
897 bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size)
898 {
899 u32 *labels = (u32 *) a->u.ptr->data;
900 uint lnum = a->u.ptr->length / 4;
901 char *pos = buf;
902
903 for (uint i = 0; i < lnum; i++)
904 {
905 if (size < 20)
906 {
907 bsprintf(pos, "...");
908 return;
909 }
910
911 uint l = bsprintf(pos, "%d/", labels[i]);
912 ADVANCE(pos, size, l);
913 }
914
915 /* Clear last slash or terminate empty string */
916 pos[lnum ? -1 : 0] = 0;
917 }
918
919 static inline void
920 bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
921 {
922 /* Cannot use bgp_set_attr_data() as it works on known attributes only */
923 ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
924 }
925
926
927 /*
928 * Attribute table
929 */
930
931 static const struct bgp_attr_desc bgp_attr_table[] = {
932 [BA_ORIGIN] = {
933 .name = "origin",
934 .type = EAF_TYPE_INT,
935 .flags = BAF_TRANSITIVE,
936 .export = bgp_export_origin,
937 .encode = bgp_encode_u8,
938 .decode = bgp_decode_origin,
939 .format = bgp_format_origin,
940 },
941 [BA_AS_PATH] = {
942 .name = "as_path",
943 .type = EAF_TYPE_AS_PATH,
944 .flags = BAF_TRANSITIVE,
945 .encode = bgp_encode_as_path,
946 .decode = bgp_decode_as_path,
947 },
948 [BA_NEXT_HOP] = {
949 .name = "next_hop",
950 .type = EAF_TYPE_IP_ADDRESS,
951 .flags = BAF_TRANSITIVE,
952 .encode = bgp_encode_next_hop,
953 .decode = bgp_decode_next_hop,
954 .format = bgp_format_next_hop,
955 },
956 [BA_MULTI_EXIT_DISC] = {
957 .name = "med",
958 .type = EAF_TYPE_INT,
959 .flags = BAF_OPTIONAL,
960 .encode = bgp_encode_u32,
961 .decode = bgp_decode_med,
962 },
963 [BA_LOCAL_PREF] = {
964 .name = "local_pref",
965 .type = EAF_TYPE_INT,
966 .flags = BAF_TRANSITIVE,
967 .export = bgp_export_local_pref,
968 .encode = bgp_encode_u32,
969 .decode = bgp_decode_local_pref,
970 },
971 [BA_ATOMIC_AGGR] = {
972 .name = "atomic_aggr",
973 .type = EAF_TYPE_OPAQUE,
974 .flags = BAF_TRANSITIVE,
975 .encode = bgp_encode_raw,
976 .decode = bgp_decode_atomic_aggr,
977 },
978 [BA_AGGREGATOR] = {
979 .name = "aggregator",
980 .type = EAF_TYPE_OPAQUE,
981 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
982 .encode = bgp_encode_aggregator,
983 .decode = bgp_decode_aggregator,
984 .format = bgp_format_aggregator,
985 },
986 [BA_COMMUNITY] = {
987 .name = "community",
988 .type = EAF_TYPE_INT_SET,
989 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
990 .export = bgp_export_community,
991 .encode = bgp_encode_u32s,
992 .decode = bgp_decode_community,
993 },
994 [BA_ORIGINATOR_ID] = {
995 .name = "originator_id",
996 .type = EAF_TYPE_ROUTER_ID,
997 .flags = BAF_OPTIONAL,
998 .export = bgp_export_originator_id,
999 .encode = bgp_encode_u32,
1000 .decode = bgp_decode_originator_id,
1001 },
1002 [BA_CLUSTER_LIST] = {
1003 .name = "cluster_list",
1004 .type = EAF_TYPE_INT_SET,
1005 .flags = BAF_OPTIONAL,
1006 .export = bgp_export_cluster_list,
1007 .encode = bgp_encode_u32s,
1008 .decode = bgp_decode_cluster_list,
1009 .format = bgp_format_cluster_list,
1010 },
1011 [BA_MP_REACH_NLRI] = {
1012 .name = "mp_reach_nlri",
1013 .type = EAF_TYPE_OPAQUE,
1014 .flags = BAF_OPTIONAL,
1015 .decode = bgp_decode_mp_reach_nlri,
1016 },
1017 [BA_MP_UNREACH_NLRI] = {
1018 .name = "mp_unreach_nlri",
1019 .type = EAF_TYPE_OPAQUE,
1020 .flags = BAF_OPTIONAL,
1021 .decode = bgp_decode_mp_unreach_nlri,
1022 },
1023 [BA_EXT_COMMUNITY] = {
1024 .name = "ext_community",
1025 .type = EAF_TYPE_EC_SET,
1026 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1027 .export = bgp_export_ext_community,
1028 .encode = bgp_encode_u32s,
1029 .decode = bgp_decode_ext_community,
1030 },
1031 [BA_AS4_PATH] = {
1032 .name = "as4_path",
1033 .type = EAF_TYPE_AS_PATH,
1034 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1035 .encode = bgp_encode_raw,
1036 .decode = bgp_decode_as4_path,
1037 },
1038 [BA_AS4_AGGREGATOR] = {
1039 .name = "as4_aggregator",
1040 .type = EAF_TYPE_OPAQUE,
1041 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1042 .encode = bgp_encode_raw,
1043 .decode = bgp_decode_as4_aggregator,
1044 .format = bgp_format_aggregator,
1045 },
1046 [BA_AIGP] = {
1047 .name = "aigp",
1048 .type = EAF_TYPE_OPAQUE,
1049 .flags = BAF_OPTIONAL | BAF_DECODE_FLAGS,
1050 .export = bgp_export_aigp,
1051 .encode = bgp_encode_raw,
1052 .decode = bgp_decode_aigp,
1053 .format = bgp_format_aigp,
1054 },
1055 [BA_LARGE_COMMUNITY] = {
1056 .name = "large_community",
1057 .type = EAF_TYPE_LC_SET,
1058 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1059 .export = bgp_export_large_community,
1060 .encode = bgp_encode_u32s,
1061 .decode = bgp_decode_large_community,
1062 },
1063 [BA_MPLS_LABEL_STACK] = {
1064 .name = "mpls_label_stack",
1065 .type = EAF_TYPE_INT_SET,
1066 .export = bgp_export_mpls_label_stack,
1067 .encode = bgp_encode_mpls_label_stack,
1068 .decode = bgp_decode_mpls_label_stack,
1069 .format = bgp_format_mpls_label_stack,
1070 },
1071 };
1072
1073 static inline int
1074 bgp_attr_known(uint code)
1075 {
1076 return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
1077 }
1078
1079
1080 /*
1081 * Attribute export
1082 */
1083
1084 static inline void
1085 bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
1086 {
1087 if (EA_PROTO(a->id) != PROTOCOL_BGP)
1088 return;
1089
1090 uint code = EA_ID(a->id);
1091
1092 if (bgp_attr_known(code))
1093 {
1094 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1095
1096 /* The flags might have been zero if the attr was added by filters */
1097 a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
1098
1099 /* Set partial bit if new opt-trans attribute is attached to non-local route */
1100 if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
1101 (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
1102 a->flags |= BAF_PARTIAL;
1103
1104 /* Call specific hook */
1105 CALL(desc->export, s, a);
1106
1107 /* Attribute might become undefined in hook */
1108 if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
1109 return;
1110 }
1111 else
1112 {
1113 /* Don't re-export unknown non-transitive attributes */
1114 if (!(a->flags & BAF_TRANSITIVE))
1115 return;
1116
1117 a->flags |= BAF_PARTIAL;
1118 }
1119
1120 /* Append updated attribute */
1121 to->attrs[to->count++] = *a;
1122 }
1123
1124 /**
1125 * bgp_export_attrs - export BGP attributes
1126 * @s: BGP export state
1127 * @attrs: a list of extended attributes
1128 *
1129 * The bgp_export_attrs() function takes a list of attributes and merges it to
1130 * one newly allocated and sorted segment. Attributes are validated and
1131 * normalized by type-specific export hooks and attribute flags are updated.
1132 * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
1133 * empty community sets).
1134 *
1135 * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
1136 */
1137 static inline ea_list *
1138 bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
1139 {
1140 /* Merge the attribute list */
1141 ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
1142 ea_merge(attrs, new);
1143 ea_sort(new);
1144
1145 uint i, count;
1146 count = new->count;
1147 new->count = 0;
1148
1149 /* Export each attribute */
1150 for (i = 0; i < count; i++)
1151 bgp_export_attr(s, &new->attrs[i], new);
1152
1153 if (s->err_withdraw)
1154 return NULL;
1155
1156 return new;
1157 }
1158
1159
1160 /*
1161 * Attribute encoding
1162 */
1163
1164 static inline int
1165 bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
1166 {
1167 ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP);
1168
1169 uint code = EA_ID(a->id);
1170
1171 if (bgp_attr_known(code))
1172 return bgp_attr_table[code].encode(s, a, buf, size);
1173 else
1174 return bgp_encode_raw(s, a, buf, size);
1175 }
1176
1177 /**
1178 * bgp_encode_attrs - encode BGP attributes
1179 * @s: BGP write state
1180 * @attrs: a list of extended attributes
1181 * @buf: buffer
1182 * @end: buffer end
1183 *
1184 * The bgp_encode_attrs() function takes a list of extended attributes
1185 * and converts it to its BGP representation (a part of an Update message).
1186 * BGP write state may be fake when called from MRT protocol.
1187 *
1188 * Result: Length of the attribute block generated or -1 if not enough space.
1189 */
1190 int
1191 bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
1192 {
1193 byte *pos = buf;
1194 int i, len;
1195
1196 for (i = 0; i < attrs->count; i++)
1197 {
1198 len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
1199
1200 if (len < 0)
1201 return -1;
1202
1203 pos += len;
1204 }
1205
1206 return pos - buf;
1207 }
1208
1209
1210 /*
1211 * Attribute decoding
1212 */
1213
1214 static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
1215
1216 static inline int
1217 bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
1218 {
1219 eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
1220 int num = p->cf->allow_local_as + 1;
1221 return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
1222 }
1223
1224 static inline int
1225 bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
1226 {
1227 eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
1228 return (e && (e->u.data == p->local_id));
1229 }
1230
1231 static inline int
1232 bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
1233 {
1234 eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
1235 return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
1236 }
1237
1238 static inline void
1239 bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
1240 {
1241 /* Handle duplicate attributes; RFC 7606 3 (g) */
1242 if (BIT32_TEST(s->attrs_seen, code))
1243 {
1244 if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
1245 bgp_parse_error(s, 1);
1246 else
1247 DISCARD("Discarding duplicate attribute (code %u)", code);
1248 }
1249 BIT32_SET(s->attrs_seen, code);
1250
1251 if (bgp_attr_known(code))
1252 {
1253 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1254
1255 /* Handle conflicting flags; RFC 7606 3 (c) */
1256 if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
1257 !(desc->flags & BAF_DECODE_FLAGS))
1258 WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
1259
1260 desc->decode(s, code, flags, data, len, to);
1261 }
1262 else /* Unknown attribute */
1263 {
1264 if (!(flags & BAF_OPTIONAL))
1265 WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
1266
1267 bgp_decode_unknown(s, code, flags, data, len, to);
1268 }
1269 }
1270
1271 /**
1272 * bgp_decode_attrs - check and decode BGP attributes
1273 * @s: BGP parse state
1274 * @data: start of attribute block
1275 * @len: length of attribute block
1276 *
1277 * This function takes a BGP attribute block (a part of an Update message), checks
1278 * its consistency and converts it to a list of BIRD route attributes represented
1279 * by an (uncached) &rta.
1280 */
1281 ea_list *
1282 bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
1283 {
1284 struct bgp_proto *p = s->proto;
1285 ea_list *attrs = NULL;
1286 uint code, flags, alen;
1287 byte *pos = data;
1288
1289 /* Parse the attributes */
1290 while (len)
1291 {
1292 alen = 0;
1293
1294 /* Read attribute type */
1295 if (len < 2)
1296 goto framing_error;
1297 flags = pos[0];
1298 code = pos[1];
1299 ADVANCE(pos, len, 2);
1300
1301 /* Read attribute length */
1302 if (flags & BAF_EXT_LEN)
1303 {
1304 if (len < 2)
1305 goto framing_error;
1306 alen = get_u16(pos);
1307 ADVANCE(pos, len, 2);
1308 }
1309 else
1310 {
1311 if (len < 1)
1312 goto framing_error;
1313 alen = *pos;
1314 ADVANCE(pos, len, 1);
1315 }
1316
1317 if (alen > len)
1318 goto framing_error;
1319
1320 DBG("Attr %02x %02x %u\n", code, flags, alen);
1321
1322 bgp_decode_attr(s, code, flags, pos, alen, &attrs);
1323 ADVANCE(pos, len, alen);
1324 }
1325
1326 if (s->err_withdraw)
1327 goto withdraw;
1328
1329 /* If there is no reachability NLRI, we are finished */
1330 if (!s->ip_reach_len && !s->mp_reach_len)
1331 return NULL;
1332
1333
1334 /* Handle missing mandatory attributes; RFC 7606 3 (d) */
1335 if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
1336 { REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
1337
1338 if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
1339 { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
1340
1341 if (s->ip_reach_len && !BIT32_TEST(s->attrs_seen, BA_NEXT_HOP))
1342 { REPORT(NO_MANDATORY, "NEXT_HOP"); goto withdraw; }
1343
1344 /* When receiving attributes from non-AS4-aware BGP speaker, we have to
1345 reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
1346 if (!p->as4_session)
1347 bgp_process_as4_attrs(&attrs, s->pool);
1348
1349 /* Reject routes with our ASN in AS_PATH attribute */
1350 if (bgp_as_path_loopy(p, attrs, p->local_as))
1351 goto withdraw;
1352
1353 /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
1354 if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
1355 goto withdraw;
1356
1357 /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
1358 if (p->is_internal && bgp_originator_id_loopy(p, attrs))
1359 goto withdraw;
1360
1361 /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
1362 if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
1363 goto withdraw;
1364
1365 /* If there is no local preference, define one */
1366 if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
1367 bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1368
1369 return attrs;
1370
1371
1372 framing_error:
1373 /* RFC 7606 4 - handle attribute framing errors */
1374 REPORT("Malformed attribute list - framing error (%u/%u) at %d",
1375 alen, len, (int) (pos - s->attrs));
1376
1377 withdraw:
1378 /* RFC 7606 5.2 - handle missing NLRI during errors */
1379 if (!s->ip_reach_len && !s->mp_reach_len)
1380 bgp_parse_error(s, 1);
1381
1382 s->err_withdraw = 1;
1383 return NULL;
1384 }
1385
1386 void
1387 bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
1388 {
1389 /* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
1390 if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp)
1391 {
1392 REPORT("Discarding AIGP attribute received on non-AIGP session");
1393 bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP);
1394 }
1395 }
1396
1397
1398 /*
1399 * Route bucket hash table
1400 */
1401
1402 #define RBH_KEY(b) b->eattrs, b->hash
1403 #define RBH_NEXT(b) b->next
1404 #define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
1405 #define RBH_FN(a,h) h
1406
1407 #define RBH_REHASH bgp_rbh_rehash
1408 #define RBH_PARAMS /8, *2, 2, 2, 8, 20
1409
1410
1411 HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
1412
1413 void
1414 bgp_init_bucket_table(struct bgp_channel *c)
1415 {
1416 HASH_INIT(c->bucket_hash, c->pool, 8);
1417
1418 init_list(&c->bucket_queue);
1419 c->withdraw_bucket = NULL;
1420 }
1421
1422 void
1423 bgp_free_bucket_table(struct bgp_channel *c)
1424 {
1425 HASH_FREE(c->bucket_hash);
1426
1427 struct bgp_bucket *b;
1428 WALK_LIST_FIRST(b, c->bucket_queue)
1429 {
1430 rem_node(&b->send_node);
1431 mb_free(b);
1432 }
1433
1434 mb_free(c->withdraw_bucket);
1435 c->withdraw_bucket = NULL;
1436 }
1437
1438 static struct bgp_bucket *
1439 bgp_get_bucket(struct bgp_channel *c, ea_list *new)
1440 {
1441 /* Hash and lookup */
1442 u32 hash = ea_hash(new);
1443 struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
1444
1445 if (b)
1446 return b;
1447
1448 uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
1449 uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
1450 uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
1451 uint i;
1452 byte *dest;
1453
1454 /* Gather total size of non-inline attributes */
1455 for (i = 0; i < new->count; i++)
1456 {
1457 eattr *a = &new->attrs[i];
1458
1459 if (!(a->type & EAF_EMBEDDED))
1460 size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
1461 }
1462
1463 /* Create the bucket */
1464 b = mb_alloc(c->pool, size);
1465 init_list(&b->prefixes);
1466 b->hash = hash;
1467
1468 /* Copy list of extended attributes */
1469 memcpy(b->eattrs, new, ea_size);
1470 dest = ((byte *) b->eattrs) + ea_size_aligned;
1471
1472 /* Copy values of non-inline attributes */
1473 for (i = 0; i < new->count; i++)
1474 {
1475 eattr *a = &b->eattrs->attrs[i];
1476
1477 if (!(a->type & EAF_EMBEDDED))
1478 {
1479 const struct adata *oa = a->u.ptr;
1480 struct adata *na = (struct adata *) dest;
1481 memcpy(na, oa, sizeof(struct adata) + oa->length);
1482 a->u.ptr = na;
1483 dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
1484 }
1485 }
1486
1487 /* Insert the bucket to send queue and bucket hash */
1488 add_tail(&c->bucket_queue, &b->send_node);
1489 HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
1490
1491 return b;
1492 }
1493
1494 static struct bgp_bucket *
1495 bgp_get_withdraw_bucket(struct bgp_channel *c)
1496 {
1497 if (!c->withdraw_bucket)
1498 {
1499 c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
1500 init_list(&c->withdraw_bucket->prefixes);
1501 }
1502
1503 return c->withdraw_bucket;
1504 }
1505
1506 void
1507 bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1508 {
1509 rem_node(&b->send_node);
1510 HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
1511 mb_free(b);
1512 }
1513
1514 void
1515 bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1516 {
1517 rem_node(&b->send_node);
1518 add_tail(&c->bucket_queue, &b->send_node);
1519 }
1520
1521 void
1522 bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1523 {
1524 struct bgp_proto *p = (void *) c->c.proto;
1525 struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
1526
1527 log(L_ERR "%s: Attribute list too long", p->p.name);
1528 while (!EMPTY_LIST(b->prefixes))
1529 {
1530 struct bgp_prefix *px = HEAD(b->prefixes);
1531
1532 log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
1533 rem_node(&px->buck_node);
1534 add_tail(&wb->prefixes, &px->buck_node);
1535 }
1536 }
1537
1538
1539 /*
1540 * Prefix hash table
1541 */
1542
1543 #define PXH_KEY(px) px->net, px->path_id, px->hash
1544 #define PXH_NEXT(px) px->next
1545 #define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
1546 #define PXH_FN(n,i,h) h
1547
1548 #define PXH_REHASH bgp_pxh_rehash
1549 #define PXH_PARAMS /8, *2, 2, 2, 8, 24
1550
1551
1552 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
1553
1554 void
1555 bgp_init_prefix_table(struct bgp_channel *c)
1556 {
1557 HASH_INIT(c->prefix_hash, c->pool, 8);
1558
1559 uint alen = net_addr_length[c->c.net_type];
1560 c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
1561 }
1562
1563 void
1564 bgp_free_prefix_table(struct bgp_channel *c)
1565 {
1566 HASH_FREE(c->prefix_hash);
1567
1568 rfree(c->prefix_slab);
1569 c->prefix_slab = NULL;
1570 }
1571
1572 static struct bgp_prefix *
1573 bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
1574 {
1575 u32 hash = net_hash(net) ^ u32_hash(path_id);
1576 struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
1577
1578 if (px)
1579 {
1580 rem_node(&px->buck_node);
1581 return px;
1582 }
1583
1584 if (c->prefix_slab)
1585 px = sl_alloc(c->prefix_slab);
1586 else
1587 px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
1588
1589 px->buck_node.next = NULL;
1590 px->buck_node.prev = NULL;
1591 px->hash = hash;
1592 px->path_id = path_id;
1593 net_copy(px->net, net);
1594
1595 HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
1596
1597 return px;
1598 }
1599
1600 void
1601 bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
1602 {
1603 rem_node(&px->buck_node);
1604 HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
1605
1606 if (c->prefix_slab)
1607 sl_free(c->prefix_slab, px);
1608 else
1609 mb_free(px);
1610 }
1611
1612
1613 /*
1614 * BGP protocol glue
1615 */
1616
1617 int
1618 bgp_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED)
1619 {
1620 rte *e = *new;
1621 struct proto *SRC = e->attrs->src->proto;
1622 struct bgp_proto *p = (struct bgp_proto *) P;
1623 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
1624
1625 /* Reject our routes */
1626 if (src == p)
1627 return -1;
1628
1629 /* Accept non-BGP routes */
1630 if (src == NULL)
1631 return 0;
1632
1633 /* IBGP route reflection, RFC 4456 */
1634 if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
1635 {
1636 /* Rejected unless configured as route reflector */
1637 if (!p->rr_client && !src->rr_client)
1638 return -1;
1639
1640 /* Generally, this should be handled when path is received, but we check it
1641 also here as rr_cluster_id may be undefined or different in src. */
1642 if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
1643 return -1;
1644 }
1645
1646 /* Handle well-known communities, RFC 1997 */
1647 struct eattr *c;
1648 if (p->cf->interpret_communities &&
1649 (c = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY))))
1650 {
1651 const struct adata *d = c->u.ptr;
1652
1653 /* Do not export anywhere */
1654 if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
1655 return -1;
1656
1657 /* Do not export outside of AS (or member-AS) */
1658 if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
1659 return -1;
1660
1661 /* Do not export outside of AS (or confederation) */
1662 if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
1663 return -1;
1664
1665 /* Do not export LLGR_STALE routes to LLGR-ignorant peers */
1666 if (!p->conn->remote_caps->llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
1667 return -1;
1668 }
1669
1670 return 0;
1671 }
1672
1673 static ea_list *
1674 bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
1675 {
1676 struct proto *SRC = e->attrs->src->proto;
1677 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
1678 struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
1679 ea_list *attrs = attrs0;
1680 eattr *a;
1681 const adata *ad;
1682
1683 /* ORIGIN attribute - mandatory, attach if missing */
1684 if (! bgp_find_attr(attrs0, BA_ORIGIN))
1685 bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
1686
1687 /* AS_PATH attribute - mandatory */
1688 a = bgp_find_attr(attrs0, BA_AS_PATH);
1689 ad = a ? a->u.ptr : &null_adata;
1690
1691 /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
1692 if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
1693 ad = as_path_strip_confed(pool, ad);
1694
1695 /* AS_PATH attribute - keep or prepend ASN */
1696 if (p->is_internal || p->rs_client)
1697 {
1698 /* IBGP or route server -> just ensure there is one */
1699 if (!a)
1700 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
1701 }
1702 else if (p->is_interior)
1703 {
1704 /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
1705 ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
1706 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1707 }
1708 else /* Regular EBGP (no RS, no confederation) */
1709 {
1710 /* Regular EBGP -> prepend ASN as regular sequence */
1711 ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
1712 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1713
1714 /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
1715 a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
1716 if (a && !(a->type & EAF_FRESH))
1717 bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
1718 }
1719
1720 /* NEXT_HOP attribute - delegated to AF-specific hook */
1721 a = bgp_find_attr(attrs0, BA_NEXT_HOP);
1722 bgp_update_next_hop(&s, a, &attrs);
1723
1724 /* LOCAL_PREF attribute - required for IBGP, attach if missing */
1725 if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
1726 bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1727
1728 /* AIGP attribute - accumulate local metric or originate new one */
1729 u64 metric;
1730 if (s.local_next_hop &&
1731 (bgp_total_aigp_metric_(e, &metric, &ad) ||
1732 (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad))))
1733 {
1734 ad = bgp_aigp_set_metric(pool, ad, metric);
1735 bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad);
1736 }
1737
1738 /* IBGP route reflection, RFC 4456 */
1739 if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
1740 {
1741 /* ORIGINATOR_ID attribute - attach if not already set */
1742 if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
1743 bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
1744
1745 /* CLUSTER_LIST attribute - prepend cluster ID */
1746 a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
1747 ad = a ? a->u.ptr : NULL;
1748
1749 /* Prepend src cluster ID */
1750 if (src->rr_cluster_id)
1751 ad = int_set_prepend(pool, ad, src->rr_cluster_id);
1752
1753 /* Prepend dst cluster ID if src and dst clusters are different */
1754 if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
1755 ad = int_set_prepend(pool, ad, p->rr_cluster_id);
1756
1757 /* Should be at least one prepended cluster ID */
1758 bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
1759 }
1760
1761 /* AS4_* transition attributes, RFC 6793 4.2.2 */
1762 if (! p->as4_session)
1763 {
1764 a = bgp_find_attr(attrs, BA_AS_PATH);
1765 if (a && as_path_contains_as4(a->u.ptr))
1766 {
1767 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
1768 bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
1769 }
1770
1771 a = bgp_find_attr(attrs, BA_AGGREGATOR);
1772 if (a && aggregator_contains_as4(a->u.ptr))
1773 {
1774 bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
1775 bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
1776 }
1777 }
1778
1779 /*
1780 * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
1781 * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
1782 * should be checked in AF-specific hooks.
1783 */
1784
1785 /* Apply per-attribute export hooks for validatation and normalization */
1786 return bgp_export_attrs(&s, attrs);
1787 }
1788
1789 void
1790 bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old)
1791 {
1792 struct bgp_proto *p = (void *) P;
1793 struct bgp_channel *c = (void *) C;
1794 struct bgp_bucket *buck;
1795 struct bgp_prefix *px;
1796 u32 path;
1797
1798 if (new)
1799 {
1800 struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2);
1801
1802 /* If attributes are invalid, we fail back to withdraw */
1803 buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
1804 path = new->attrs->src->global_id;
1805
1806 lp_flush(bgp_linpool2);
1807 }
1808 else
1809 {
1810 buck = bgp_get_withdraw_bucket(c);
1811 path = old->attrs->src->global_id;
1812 }
1813
1814 px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
1815 add_tail(&buck->prefixes, &px->buck_node);
1816
1817 bgp_schedule_packet(p->conn, c, PKT_UPDATE);
1818 }
1819
1820
1821 static inline u32
1822 bgp_get_neighbor(rte *r)
1823 {
1824 eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1825 u32 as;
1826
1827 if (e && as_path_get_first_regular(e->u.ptr, &as))
1828 return as;
1829
1830 /* If AS_PATH is not defined, we treat rte as locally originated */
1831 struct bgp_proto *p = (void *) r->attrs->src->proto;
1832 return p->cf->confederation ?: p->local_as;
1833 }
1834
1835 static inline int
1836 rte_stale(rte *r)
1837 {
1838 if (r->u.bgp.stale < 0)
1839 {
1840 /* If staleness is unknown, compute and cache it */
1841 eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
1842 r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
1843 }
1844
1845 return r->u.bgp.stale;
1846 }
1847
1848 int
1849 bgp_rte_better(rte *new, rte *old)
1850 {
1851 struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
1852 struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
1853 eattr *x, *y;
1854 u32 n, o;
1855
1856 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1857 n = new->u.bgp.suppressed;
1858 o = old->u.bgp.suppressed;
1859 if (n > o)
1860 return 0;
1861 if (n < o)
1862 return 1;
1863
1864 /* RFC 4271 9.1.2.1. Route resolvability test */
1865 n = rte_resolvable(new);
1866 o = rte_resolvable(old);
1867 if (n > o)
1868 return 1;
1869 if (n < o)
1870 return 0;
1871
1872 /* LLGR draft - depreference stale routes */
1873 n = rte_stale(new);
1874 o = rte_stale(old);
1875 if (n > o)
1876 return 0;
1877 if (n < o)
1878 return 1;
1879
1880 /* Start with local preferences */
1881 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1882 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1883 n = x ? x->u.data : new_bgp->cf->default_local_pref;
1884 o = y ? y->u.data : old_bgp->cf->default_local_pref;
1885 if (n > o)
1886 return 1;
1887 if (n < o)
1888 return 0;
1889
1890 /* RFC 7311 4.1 - Apply AIGP metric */
1891 u64 n2 = bgp_total_aigp_metric(new);
1892 u64 o2 = bgp_total_aigp_metric(old);
1893 if (n2 < o2)
1894 return 1;
1895 if (n2 > o2)
1896 return 0;
1897
1898 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1899 if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
1900 {
1901 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1902 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1903 n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1904 o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
1905 if (n < o)
1906 return 1;
1907 if (n > o)
1908 return 0;
1909 }
1910
1911 /* RFC 4271 9.1.2.2. b) Use origins */
1912 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1913 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1914 n = x ? x->u.data : ORIGIN_INCOMPLETE;
1915 o = y ? y->u.data : ORIGIN_INCOMPLETE;
1916 if (n < o)
1917 return 1;
1918 if (n > o)
1919 return 0;
1920
1921 /* RFC 4271 9.1.2.2. c) Compare MED's */
1922 /* Proper RFC 4271 path selection cannot be interpreted as finding
1923 * the best path in some ordering. It is implemented partially in
1924 * bgp_rte_recalculate() when deterministic_med option is
1925 * active. Without that option, the behavior is just an
1926 * approximation, which in specific situations may lead to
1927 * persistent routing loops, because it is nondeterministic - it
1928 * depends on the order in which routes appeared. But it is also the
1929 * same behavior as used by default in Cisco routers, so it is
1930 * probably not a big issue.
1931 */
1932 if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
1933 (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
1934 {
1935 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1936 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1937 n = x ? x->u.data : new_bgp->cf->default_med;
1938 o = y ? y->u.data : old_bgp->cf->default_med;
1939 if (n < o)
1940 return 1;
1941 if (n > o)
1942 return 0;
1943 }
1944
1945 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1946 if (new_bgp->is_interior > old_bgp->is_interior)
1947 return 0;
1948 if (new_bgp->is_interior < old_bgp->is_interior)
1949 return 1;
1950
1951 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1952 n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
1953 o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
1954 if (n < o)
1955 return 1;
1956 if (n > o)
1957 return 0;
1958
1959 /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
1960 /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
1961 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1962 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1963 n = x ? x->u.data : new_bgp->remote_id;
1964 o = y ? y->u.data : old_bgp->remote_id;
1965
1966 /* RFC 5004 - prefer older routes */
1967 /* (if both are external and from different peer) */
1968 if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
1969 !new_bgp->is_internal && n != o)
1970 return 0;
1971
1972 /* rest of RFC 4271 9.1.2.2. f) */
1973 if (n < o)
1974 return 1;
1975 if (n > o)
1976 return 0;
1977
1978 /* RFC 4456 9. b) Compare cluster list lengths */
1979 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1980 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1981 n = x ? int_set_get_size(x->u.ptr) : 0;
1982 o = y ? int_set_get_size(y->u.ptr) : 0;
1983 if (n < o)
1984 return 1;
1985 if (n > o)
1986 return 0;
1987
1988 /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
1989 return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0;
1990 }
1991
1992
1993 int
1994 bgp_rte_mergable(rte *pri, rte *sec)
1995 {
1996 struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
1997 struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
1998 eattr *x, *y;
1999 u32 p, s;
2000
2001 /* Skip suppressed routes (see bgp_rte_recalculate()) */
2002 if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
2003 return 0;
2004
2005 /* RFC 4271 9.1.2.1. Route resolvability test */
2006 if (rte_resolvable(pri) != rte_resolvable(sec))
2007 return 0;
2008
2009 /* LLGR draft - depreference stale routes */
2010 if (rte_stale(pri) != rte_stale(sec))
2011 return 0;
2012
2013 /* Start with local preferences */
2014 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
2015 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
2016 p = x ? x->u.data : pri_bgp->cf->default_local_pref;
2017 s = y ? y->u.data : sec_bgp->cf->default_local_pref;
2018 if (p != s)
2019 return 0;
2020
2021 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
2022 if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
2023 {
2024 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2025 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2026 p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
2027 s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
2028
2029 if (p != s)
2030 return 0;
2031
2032 // if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
2033 // return 0;
2034 }
2035
2036 /* RFC 4271 9.1.2.2. b) Use origins */
2037 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2038 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2039 p = x ? x->u.data : ORIGIN_INCOMPLETE;
2040 s = y ? y->u.data : ORIGIN_INCOMPLETE;
2041 if (p != s)
2042 return 0;
2043
2044 /* RFC 4271 9.1.2.2. c) Compare MED's */
2045 if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
2046 (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
2047 {
2048 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
2049 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
2050 p = x ? x->u.data : pri_bgp->cf->default_med;
2051 s = y ? y->u.data : sec_bgp->cf->default_med;
2052 if (p != s)
2053 return 0;
2054 }
2055
2056 /* RFC 4271 9.1.2.2. d) Prefer external peers */
2057 if (pri_bgp->is_interior != sec_bgp->is_interior)
2058 return 0;
2059
2060 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
2061 p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
2062 s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
2063 if (p != s)
2064 return 0;
2065
2066 /* Remaining criteria are ignored */
2067
2068 return 1;
2069 }
2070
2071
2072 static inline int
2073 same_group(rte *r, u32 lpref, u32 lasn)
2074 {
2075 return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
2076 }
2077
2078 static inline int
2079 use_deterministic_med(rte *r)
2080 {
2081 struct proto *P = r->attrs->src->proto;
2082 return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
2083 }
2084
2085 int
2086 bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
2087 {
2088 rte *r, *s;
2089 rte *key = new ? new : old;
2090 u32 lpref = key->pref;
2091 u32 lasn = bgp_get_neighbor(key);
2092 int old_suppressed = old ? old->u.bgp.suppressed : 0;
2093
2094 /*
2095 * Proper RFC 4271 path selection is a bit complicated, it cannot be
2096 * implemented just by rte_better(), because it is not a linear
2097 * ordering. But it can be splitted to two levels, where the lower
2098 * level chooses the best routes in each group of routes from the
2099 * same neighboring AS and higher level chooses the best route (with
2100 * a slightly different ordering) between the best-in-group routes.
2101 *
2102 * When deterministic_med is disabled, we just ignore this issue and
2103 * choose the best route by bgp_rte_better() alone. If enabled, the
2104 * lower level of the route selection is done here (for the group
2105 * to which the changed route belongs), all routes in group are
2106 * marked as suppressed, just chosen best-in-group is not.
2107 *
2108 * Global best route selection then implements higher level by
2109 * choosing between non-suppressed routes (as they are always
2110 * preferred over suppressed routes). Routes from BGP protocols
2111 * that do not set deterministic_med are just never suppressed. As
2112 * they do not participate in the lower level selection, it is OK
2113 * that this fn is not called for them.
2114 *
2115 * The idea is simple, the implementation is more problematic,
2116 * mostly because of optimizations in rte_recalculate() that
2117 * avoids full recalculation in most cases.
2118 *
2119 * We can assume that at least one of new, old is non-NULL and both
2120 * are from the same protocol with enabled deterministic_med. We
2121 * group routes by both neighbor AS (lasn) and preference (lpref),
2122 * because bgp_rte_better() does not handle preference itself.
2123 */
2124
2125 /* If new and old are from different groups, we just process that
2126 as two independent events */
2127 if (new && old && !same_group(old, lpref, lasn))
2128 {
2129 int i1, i2;
2130 i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
2131 i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
2132 return i1 || i2;
2133 }
2134
2135 /*
2136 * We could find the best-in-group and then make some shortcuts like
2137 * in rte_recalculate, but as we would have to walk through all
2138 * net->routes just to find it, it is probably not worth. So we
2139 * just have one simple fast case that use just the old route.
2140 * We also set suppressed flag to avoid using it in bgp_rte_better().
2141 */
2142
2143 if (new)
2144 new->u.bgp.suppressed = 1;
2145
2146 if (old)
2147 {
2148 old->u.bgp.suppressed = 1;
2149
2150 /* The fast case - replace not best with worse (or remove not best) */
2151 if (old_suppressed && !(new && bgp_rte_better(new, old)))
2152 return 0;
2153 }
2154
2155 /* The default case - find a new best-in-group route */
2156 r = new; /* new may not be in the list */
2157 for (s=net->routes; rte_is_valid(s); s=s->next)
2158 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
2159 {
2160 s->u.bgp.suppressed = 1;
2161 if (!r || bgp_rte_better(s, r))
2162 r = s;
2163 }
2164
2165 /* Simple case - the last route in group disappears */
2166 if (!r)
2167 return 0;
2168
2169 /* Found if new is mergable with best-in-group */
2170 if (new && (new != r) && bgp_rte_mergable(r, new))
2171 new->u.bgp.suppressed = 0;
2172
2173 /* Found all existing routes mergable with best-in-group */
2174 for (s=net->routes; rte_is_valid(s); s=s->next)
2175 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
2176 if ((s != r) && bgp_rte_mergable(r, s))
2177 s->u.bgp.suppressed = 0;
2178
2179 /* Found best-in-group */
2180 r->u.bgp.suppressed = 0;
2181
2182 /*
2183 * There are generally two reasons why we have to force
2184 * recalculation (return 1): First, the new route may be wrongfully
2185 * chosen to be the best in the first case check in
2186 * rte_recalculate(), this may happen only if old_best is from the
2187 * same group. Second, another (different than new route)
2188 * best-in-group is chosen and that may be the proper best (although
2189 * rte_recalculate() without ignore that possibility).
2190 *
2191 * There are three possible cases according to whether the old route
2192 * was the best in group (OBG, i.e. !old_suppressed) and whether the
2193 * new route is the best in group (NBG, tested by r == new). These
2194 * cases work even if old or new is NULL.
2195 *
2196 * NBG -> new is a possible candidate for the best route, so we just
2197 * check for the first reason using same_group().
2198 *
2199 * !NBG && OBG -> Second reason applies, return 1
2200 *
2201 * !NBG && !OBG -> Best in group does not change, old != old_best,
2202 * rte_better(new, old_best) is false and therefore
2203 * the first reason does not apply, return 0
2204 */
2205
2206 if (r == new)
2207 return old_best && same_group(old_best, lpref, lasn);
2208 else
2209 return !old_suppressed;
2210 }
2211
2212 struct rte *
2213 bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
2214 {
2215 eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
2216 const struct adata *ad = a ? a->u.ptr : NULL;
2217 uint flags = a ? a->flags : BAF_PARTIAL;
2218
2219 if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
2220 return NULL;
2221
2222 if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
2223 return r;
2224
2225 r = rte_cow_rta(r, pool);
2226 bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags,
2227 int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
2228 r->u.bgp.stale = 1;
2229
2230 return r;
2231 }
2232
2233
2234 /*
2235 * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
2236 */
2237 static void
2238 bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
2239 {
2240 eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
2241 eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
2242 eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
2243 eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
2244
2245 /* First, unset AS4_* attributes */
2246 if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
2247 if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
2248
2249 /* Handle AGGREGATOR attribute */
2250 if (a2 && a4)
2251 {
2252 u32 a2_asn = get_u32(a2->u.ptr->data);
2253
2254 /* If routes were aggregated by an old router, then AS4_PATH and
2255 AS4_AGGREGATOR are invalid. In that case we give up. */
2256 if (a2_asn != AS_TRANS)
2257 return;
2258
2259 /* Use AS4_AGGREGATOR instead of AGGREGATOR */
2260 a2->u.ptr = a4->u.ptr;
2261 }
2262
2263 /* Handle AS_PATH attribute */
2264 if (p2 && p4)
2265 {
2266 /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
2267 int p2_len = as_path_getlen(p2->u.ptr);
2268 int p4_len = as_path_getlen(p4->u.ptr);
2269
2270 /* AS_PATH is too short, give up */
2271 if (p2_len < p4_len)
2272 return;
2273
2274 /* Merge AS_PATH and AS4_PATH */
2275 struct adata *apc = as_path_cut(pool, p2->u.ptr, p2_len - p4_len);
2276 p2->u.ptr = as_path_merge(pool, apc, p4->u.ptr);
2277 }
2278 }
2279
2280 int
2281 bgp_get_attr(eattr *a, byte *buf, int buflen)
2282 {
2283 uint i = EA_ID(a->id);
2284 const struct bgp_attr_desc *d;
2285 int len;
2286
2287 if (bgp_attr_known(i))
2288 {
2289 d = &bgp_attr_table[i];
2290 len = bsprintf(buf, "%s", d->name);
2291 buf += len;
2292 if (d->format)
2293 {
2294 *buf++ = ':';
2295 *buf++ = ' ';
2296 d->format(a, buf, buflen - len - 2);
2297 return GA_FULL;
2298 }
2299 return GA_NAME;
2300 }
2301
2302 bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
2303 return GA_NAME;
2304 }
2305
2306 void
2307 bgp_get_route_info(rte *e, byte *buf)
2308 {
2309 eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2310 eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2311 u32 origas;
2312
2313 buf += bsprintf(buf, " (%d", e->pref);
2314
2315 if (e->u.bgp.suppressed)
2316 buf += bsprintf(buf, "-");
2317
2318 if (rte_stale(e))
2319 buf += bsprintf(buf, "s");
2320
2321 u64 metric = bgp_total_aigp_metric(e);
2322 if (metric < BGP_AIGP_MAX)
2323 {
2324 buf += bsprintf(buf, "/%lu", metric);
2325 }
2326 else if (e->attrs->igp_metric)
2327 {
2328 if (!rte_resolvable(e))
2329 buf += bsprintf(buf, "/-");
2330 else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
2331 buf += bsprintf(buf, "/?");
2332 else
2333 buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
2334 }
2335 buf += bsprintf(buf, ") [");
2336
2337 if (p && as_path_get_last(p->u.ptr, &origas))
2338 buf += bsprintf(buf, "AS%u", origas);
2339 if (o)
2340 buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
2341 strcpy(buf, "]");
2342 }