]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/attrs.c
Nest: Added const to ea_show just to declare that this shouldn't really change anything
[thirdparty/bird.git] / proto / bgp / attrs.c
1 /*
2 * BIRD -- BGP Attributes
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
11 #undef LOCAL_DEBUG
12
13 #include <stdlib.h>
14
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "conf/conf.h"
21 #include "lib/resource.h"
22 #include "lib/string.h"
23 #include "lib/unaligned.h"
24
25 #include "bgp.h"
26
27 /*
28 * UPDATE message error handling
29 *
30 * All checks from RFC 4271 6.3 are done as specified with these exceptions:
31 * - The semantic check of an IP address from NEXT_HOP attribute is missing.
32 * - Checks of some optional attribute values are missing.
33 * - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
34 * are probably inadequate.
35 *
36 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
37 * 4271 does not explicitly specify the behavior in that case.
38 *
39 * Loop detection related to route reflection (based on ORIGINATOR_ID
40 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
41 * specifies that such updates should be ignored, but that is generally
42 * a bad idea.
43 *
44 * BGP attribute table has several hooks:
45 *
46 * export - Hook that validates and normalizes attribute during export phase.
47 * Receives eattr, may modify it (e.g., sort community lists for canonical
48 * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
49 * necessary. May assume that eattr has value valid w.r.t. its type, but may be
50 * invalid w.r.t. BGP constraints. Optional.
51 *
52 * encode - Hook that converts internal representation to external one during
53 * packet writing. Receives eattr and puts it in the buffer (including attribute
54 * header). Returns number of bytes, or -1 if not enough space. May assume that
55 * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
56 * for all known attributes that exist internally after export phase (i.e., all
57 * except pseudoattributes MP_(UN)REACH_NLRI).
58 *
59 * decode - Hook that converts external representation to internal one during
60 * packet parsing. Receives attribute data in buffer, validates it and adds
61 * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
62 * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
63 *
64 * format - Optional hook that converts eattr to textual representation.
65 */
66
67
68 struct bgp_attr_desc {
69 const char *name;
70 uint type;
71 uint flags;
72 void (*export)(struct bgp_export_state *s, eattr *a);
73 int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
74 void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
75 void (*format)(const eattr *ea, byte *buf, uint size);
76 };
77
78 static const struct bgp_attr_desc bgp_attr_table[];
79
80 static inline int bgp_attr_known(uint code);
81
82 eattr *
83 bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
84 {
85 ASSERT(bgp_attr_known(code));
86
87 return ea_set_attr(
88 attrs,
89 pool,
90 EA_CODE(PROTOCOL_BGP, code),
91 flags,
92 bgp_attr_table[code].type,
93 val
94 );
95 }
96
97
98
99 #define REPORT(msg, args...) \
100 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
101
102 #define DISCARD(msg, args...) \
103 ({ REPORT(msg, ## args); return; })
104
105 #define WITHDRAW(msg, args...) \
106 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
107
108 #define UNSET(a) \
109 ({ a->type = EAF_TYPE_UNDEF; return; })
110
111 #define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
112 #define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
113 #define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
114 #define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
115 #define NO_MANDATORY "Missing mandatory %s attribute"
116
117
118 static inline int
119 bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
120 {
121 *buf++ = flags;
122 *buf++ = code;
123 *buf++ = len;
124 return 3;
125 }
126
127 static inline int
128 bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
129 {
130 *buf++ = flags | BAF_EXT_LEN;
131 *buf++ = code;
132 put_u16(buf, len);
133 return 4;
134 }
135
136 static inline int
137 bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
138 {
139 if (len < 256)
140 return bgp_put_attr_hdr3(buf, code, flags, len);
141 else
142 return bgp_put_attr_hdr4(buf, code, flags, len);
143 }
144
145 static int
146 bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
147 {
148 if (size < (3+1))
149 return -1;
150
151 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
152 buf[3] = a->u.data;
153
154 return 3+1;
155 }
156
157 static int
158 bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
159 {
160 if (size < (3+4))
161 return -1;
162
163 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
164 put_u32(buf+3, a->u.data);
165
166 return 3+4;
167 }
168
169 static int
170 bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
171 {
172 uint len = a->u.ptr->length;
173
174 if (size < (4+len))
175 return -1;
176
177 uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
178 put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
179
180 return hdr + len;
181 }
182
183 static int
184 bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint len)
185 {
186 if (size < (4+len))
187 return -1;
188
189 uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
190 memcpy(buf + hdr, data, len);
191
192 return hdr + len;
193 }
194
195 static int
196 bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
197 {
198 return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
199 }
200
201
202 /*
203 * AIGP handling
204 */
205
206 static int
207 bgp_aigp_valid(byte *data, uint len, char *err, uint elen)
208 {
209 byte *pos = data;
210 char *err_dsc = NULL;
211 uint err_val = 0;
212
213 #define BAD(DSC,VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; })
214 while (len)
215 {
216 if (len < 3)
217 BAD("TLV framing error", len);
218
219 /* Process one TLV */
220 uint ptype = pos[0];
221 uint plen = get_u16(pos + 1);
222
223 if (len < plen)
224 BAD("TLV framing error", plen);
225
226 if (plen < 3)
227 BAD("Bad TLV length", plen);
228
229 if ((ptype == BGP_AIGP_METRIC) && (plen != 11))
230 BAD("Bad AIGP TLV length", plen);
231
232 ADVANCE(pos, len, plen);
233 }
234 #undef BAD
235
236 return 1;
237
238 bad:
239 if (err)
240 if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0)
241 err[0] = 0;
242
243 return 0;
244 }
245
246 static const byte *
247 bgp_aigp_get_tlv(const struct adata *ad, uint type)
248 {
249 if (!ad)
250 return NULL;
251
252 uint len = ad->length;
253 const byte *pos = ad->data;
254
255 while (len)
256 {
257 uint ptype = pos[0];
258 uint plen = get_u16(pos + 1);
259
260 if (ptype == type)
261 return pos;
262
263 ADVANCE(pos, len, plen);
264 }
265
266 return NULL;
267 }
268
269 static const struct adata *
270 bgp_aigp_set_tlv(struct linpool *pool, const struct adata *ad, uint type, byte *data, uint dlen)
271 {
272 uint len = ad ? ad->length : 0;
273 const byte *pos = ad ? ad->data : NULL;
274 struct adata *res = lp_alloc_adata(pool, len + 3 + dlen);
275 byte *dst = res->data;
276 byte *tlv = NULL;
277 int del = 0;
278
279 while (len)
280 {
281 uint ptype = pos[0];
282 uint plen = get_u16(pos + 1);
283
284 /* Find position for new TLV */
285 if ((ptype >= type) && !tlv)
286 {
287 tlv = dst;
288 dst += 3 + dlen;
289 }
290
291 /* Skip first matching TLV, copy others */
292 if ((ptype == type) && !del)
293 del = 1;
294 else
295 {
296 memcpy(dst, pos, plen);
297 dst += plen;
298 }
299
300 ADVANCE(pos, len, plen);
301 }
302
303 if (!tlv)
304 {
305 tlv = dst;
306 dst += 3 + dlen;
307 }
308
309 /* Store the TLD */
310 put_u8(tlv + 0, type);
311 put_u16(tlv + 1, 3 + dlen);
312 memcpy(tlv + 3, data, dlen);
313
314 /* Update length */
315 res->length = dst - res->data;
316
317 return res;
318 }
319
320 static u64 UNUSED
321 bgp_aigp_get_metric(const struct adata *ad, u64 def)
322 {
323 const byte *b = bgp_aigp_get_tlv(ad, BGP_AIGP_METRIC);
324 return b ? get_u64(b + 3) : def;
325 }
326
327 static const struct adata *
328 bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric)
329 {
330 byte data[8];
331 put_u64(data, metric);
332 return bgp_aigp_set_tlv(pool, ad, BGP_AIGP_METRIC, data, 8);
333 }
334
335 int
336 bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad)
337 {
338 eattr *a = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP));
339 if (!a)
340 return 0;
341
342 const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
343 if (!b)
344 return 0;
345
346 u64 aigp = get_u64(b + 3);
347 u64 step = e->attrs->igp_metric;
348
349 if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN))
350 step = BGP_AIGP_MAX;
351
352 if (!step)
353 step = 1;
354
355 *ad = a->u.ptr;
356 *metric = aigp + step;
357 if (*metric < aigp)
358 *metric = BGP_AIGP_MAX;
359
360 return 1;
361 }
362
363 static inline int
364 bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
365 {
366 if (e->attrs->source == RTS_BGP)
367 return 0;
368
369 *metric = rt_get_igp_metric(e);
370 *ad = NULL;
371 return *metric < IGP_METRIC_UNKNOWN;
372 }
373
374
375 /*
376 * Attribute hooks
377 */
378
379 static void
380 bgp_export_origin(struct bgp_export_state *s, eattr *a)
381 {
382 if (a->u.data > 2)
383 WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
384 }
385
386 static void
387 bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
388 {
389 if (len != 1)
390 WITHDRAW(BAD_LENGTH, "ORIGIN", len);
391
392 if (data[0] > 2)
393 WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
394
395 bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
396 }
397
398 static void
399 bgp_format_origin(const eattr *a, byte *buf, uint size UNUSED)
400 {
401 static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
402
403 bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
404 }
405
406
407 static inline int
408 bgp_as_path_first_as_equal(const byte *data, uint len, u32 asn)
409 {
410 return (len >= 6) &&
411 ((data[0] == AS_PATH_SEQUENCE) || (data[0] == AS_PATH_CONFED_SEQUENCE)) &&
412 (data[1] > 0) &&
413 (get_u32(data+2) == asn);
414 }
415
416 static int
417 bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
418 {
419 const byte *data = a->u.ptr->data;
420 uint len = a->u.ptr->length;
421
422 if (!s->as4_session)
423 {
424 /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
425 byte *dst = alloca(len);
426 len = as_path_32to16(dst, data, len);
427 data = dst;
428 }
429
430 return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
431 }
432
433 static void
434 bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
435 {
436 struct bgp_proto *p = s->proto;
437 int as_length = s->as4_session ? 4 : 2;
438 int as_sets = p->cf->allow_as_sets;
439 int as_confed = p->cf->confederation && p->is_interior;
440 char err[128];
441
442 if (!as_path_valid(data, len, as_length, as_sets, as_confed, err, sizeof(err)))
443 WITHDRAW("Malformed AS_PATH attribute - %s", err);
444
445 if (!s->as4_session)
446 {
447 /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
448 byte *src = data;
449 data = alloca(2*len);
450 len = as_path_16to32(data, src, len);
451 }
452
453 /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
454 if (p->is_interior && !p->is_internal &&
455 ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
456 WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
457
458 /* Reject routes with first AS in AS_PATH not matching neighbor AS; RFC 4271 6.3 */
459 if (!p->is_internal && p->cf->enforce_first_as &&
460 !bgp_as_path_first_as_equal(data, len, p->remote_as))
461 WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS");
462
463 bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
464 }
465
466
467 static int
468 bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
469 {
470 /*
471 * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
472 * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
473 * store it and encode it later by AFI-specific hooks.
474 */
475
476 if (!s->mp_reach)
477 {
478 // ASSERT(a->u.ptr->length == sizeof(ip_addr));
479
480 /* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */
481 ip_addr *addr = (void *) a->u.ptr->data;
482 if ((a->u.ptr->length != sizeof(ip_addr)) || !ipa_is_ip4(*addr))
483 return 0;
484
485 if (size < (3+4))
486 return -1;
487
488 bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
489 put_ip4(buf+3, ipa_to_ip4(*addr));
490
491 return 3+4;
492 }
493 else
494 {
495 s->mp_next_hop = a;
496 return 0;
497 }
498 }
499
500 static void
501 bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
502 {
503 if (len != 4)
504 WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
505
506 /* Semantic checks are done later */
507 s->ip_next_hop_len = len;
508 s->ip_next_hop_data = data;
509 }
510
511 /* TODO: This function should use AF-specific hook */
512 static void
513 bgp_format_next_hop(const eattr *a, byte *buf, uint size UNUSED)
514 {
515 ip_addr *nh = (void *) a->u.ptr->data;
516 uint len = a->u.ptr->length;
517
518 ASSERT((len == 16) || (len == 32));
519
520 /* in IPv6, we may have two addresses in NEXT HOP */
521 if ((len == 16) || ipa_zero(nh[1]))
522 bsprintf(buf, "%I", nh[0]);
523 else
524 bsprintf(buf, "%I %I", nh[0], nh[1]);
525 }
526
527
528 static void
529 bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
530 {
531 if (len != 4)
532 WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
533
534 u32 val = get_u32(data);
535 bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
536 }
537
538
539 static void
540 bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
541 {
542 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
543 UNSET(a);
544 }
545
546 static void
547 bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
548 {
549 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
550 DISCARD(BAD_EBGP, "LOCAL_PREF");
551
552 if (len != 4)
553 WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
554
555 u32 val = get_u32(data);
556 bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
557 }
558
559
560 static void
561 bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
562 {
563 if (len != 0)
564 DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
565
566 bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
567 }
568
569 static int
570 bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
571 {
572 const byte *data = a->u.ptr->data;
573 uint len = a->u.ptr->length;
574
575 if (!s->as4_session)
576 {
577 /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
578 byte *dst = alloca(6);
579 len = aggregator_32to16(dst, data);
580 data = dst;
581 }
582
583 return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
584 }
585
586 static void
587 bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
588 {
589 if (len != (s->as4_session ? 8 : 6))
590 DISCARD(BAD_LENGTH, "AGGREGATOR", len);
591
592 if (!s->as4_session)
593 {
594 /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
595 byte *src = data;
596 data = alloca(8);
597 len = aggregator_16to32(data, src);
598 }
599
600 bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
601 }
602
603 static void
604 bgp_format_aggregator(const eattr *a, byte *buf, uint size UNUSED)
605 {
606 const byte *data = a->u.ptr->data;
607
608 bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
609 }
610
611
612 static void
613 bgp_export_community(struct bgp_export_state *s, eattr *a)
614 {
615 if (a->u.ptr->length == 0)
616 UNSET(a);
617
618 a->u.ptr = int_set_sort(s->pool, a->u.ptr);
619 }
620
621 static void
622 bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
623 {
624 if (!len || (len % 4))
625 WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
626
627 struct adata *ad = lp_alloc_adata(s->pool, len);
628 get_u32s(data, (u32 *) ad->data, len / 4);
629 bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
630 }
631
632
633 static void
634 bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
635 {
636 if (!s->proto->is_internal)
637 UNSET(a);
638 }
639
640 static void
641 bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
642 {
643 if (!s->proto->is_internal)
644 DISCARD(BAD_EBGP, "ORIGINATOR_ID");
645
646 if (len != 4)
647 WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
648
649 u32 val = get_u32(data);
650 bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
651 }
652
653
654 static void
655 bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
656 {
657 if (!s->proto->is_internal)
658 UNSET(a);
659
660 if (a->u.ptr->length == 0)
661 UNSET(a);
662 }
663
664 static void
665 bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
666 {
667 if (!s->proto->is_internal)
668 DISCARD(BAD_EBGP, "CLUSTER_LIST");
669
670 if (!len || (len % 4))
671 WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
672
673 struct adata *ad = lp_alloc_adata(s->pool, len);
674 get_u32s(data, (u32 *) ad->data, len / 4);
675 bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
676 }
677
678 static void
679 bgp_format_cluster_list(const eattr *a, byte *buf, uint size)
680 {
681 /* Truncates cluster lists larger than buflen, probably not a problem */
682 int_set_format(a->u.ptr, 0, -1, buf, size);
683 }
684
685
686 static inline u32
687 get_af3(byte *buf)
688 {
689 return (get_u16(buf) << 16) | buf[2];
690 }
691
692 static void
693 bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
694 {
695 /*
696 * 2 B MP_REACH_NLRI data - Address Family Identifier
697 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
698 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
699 * var MP_REACH_NLRI data - Network Address of Next Hop
700 * 1 B MP_REACH_NLRI data - Reserved (zero)
701 * var MP_REACH_NLRI data - Network Layer Reachability Information
702 */
703
704 if ((len < 5) || (len < (5 + (uint) data[3])))
705 bgp_parse_error(s, 9);
706
707 s->mp_reach_af = get_af3(data);
708 s->mp_next_hop_len = data[3];
709 s->mp_next_hop_data = data + 4;
710 s->mp_reach_len = len - 5 - s->mp_next_hop_len;
711 s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
712 }
713
714
715 static void
716 bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
717 {
718 /*
719 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
720 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
721 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
722 */
723
724 if (len < 3)
725 bgp_parse_error(s, 9);
726
727 s->mp_unreach_af = get_af3(data);
728 s->mp_unreach_len = len - 3;
729 s->mp_unreach_nlri = data + 3;
730 }
731
732
733 static void
734 bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
735 {
736 if (!s->proto->is_interior)
737 {
738 struct adata *ad = ec_set_del_nontrans(s->pool, a->u.ptr);
739
740 if (ad->length == 0)
741 UNSET(a);
742
743 ec_set_sort_x(ad);
744 a->u.ptr = ad;
745 }
746 else
747 {
748 if (a->u.ptr->length == 0)
749 UNSET(a);
750
751 a->u.ptr = ec_set_sort(s->pool, a->u.ptr);
752 }
753 }
754
755 static void
756 bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
757 {
758 if (!len || (len % 8))
759 WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
760
761 struct adata *ad = lp_alloc_adata(s->pool, len);
762 get_u32s(data, (u32 *) ad->data, len / 4);
763 bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
764 }
765
766
767 static void
768 bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
769 {
770 if (s->as4_session)
771 DISCARD(NEW_BGP, "AS4_AGGREGATOR");
772
773 if (len != 8)
774 DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
775
776 bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
777 }
778
779 static void
780 bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
781 {
782 struct bgp_proto *p = s->proto;
783 int sets = p->cf->allow_as_sets;
784
785 char err[128];
786
787 if (s->as4_session)
788 DISCARD(NEW_BGP, "AS4_PATH");
789
790 if (len < 6)
791 DISCARD(BAD_LENGTH, "AS4_PATH", len);
792
793 if (!as_path_valid(data, len, 4, sets, 1, err, sizeof(err)))
794 DISCARD("Malformed AS4_PATH attribute - %s", err);
795
796 struct adata *a = lp_alloc_adata(s->pool, len);
797 memcpy(a->data, data, len);
798
799 /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
800 if (as_path_contains_confed(a))
801 {
802 REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
803 a = as_path_strip_confed(s->pool, a);
804 }
805
806 bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
807 }
808
809
810 static void
811 bgp_export_aigp(struct bgp_export_state *s, eattr *a)
812 {
813 if (!s->channel->cf->aigp)
814 UNSET(a);
815 }
816
817 static void
818 bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
819 {
820 char err[128];
821
822 /* Acceptability test postponed to bgp_finish_attrs() */
823
824 if ((flags ^ bgp_attr_table[BA_AIGP].flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
825 DISCARD("Malformed AIGP attribute - conflicting flags (%02x)", flags);
826
827 if (!bgp_aigp_valid(data, len, err, sizeof(err)))
828 DISCARD("Malformed AIGP attribute - %s", err);
829
830 bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len);
831 }
832
833 static void
834 bgp_format_aigp(const eattr *a, byte *buf, uint size UNUSED)
835 {
836 const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
837
838 if (!b)
839 bsprintf(buf, "?");
840 else
841 bsprintf(buf, "%lu", get_u64(b + 3));
842 }
843
844
845 static void
846 bgp_export_large_community(struct bgp_export_state *s, eattr *a)
847 {
848 if (a->u.ptr->length == 0)
849 UNSET(a);
850
851 a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
852 }
853
854 static void
855 bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
856 {
857 if (!len || (len % 12))
858 WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
859
860 struct adata *ad = lp_alloc_adata(s->pool, len);
861 get_u32s(data, (u32 *) ad->data, len / 4);
862 bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
863 }
864
865 static void
866 bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
867 {
868 net_addr *n = s->route->net->n.addr;
869 u32 *labels = (u32 *) a->u.ptr->data;
870 uint lnum = a->u.ptr->length / 4;
871
872 /* Perhaps we should just ignore it? */
873 if (!s->mpls)
874 WITHDRAW("Unexpected MPLS stack");
875
876 /* Empty MPLS stack is not allowed */
877 if (!lnum)
878 WITHDRAW("Malformed MPLS stack - empty");
879
880 /* This is ugly, but we must ensure that labels fit into NLRI field */
881 if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
882 WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
883
884 for (uint i = 0; i < lnum; i++)
885 {
886 if (labels[i] > 0xfffff)
887 WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
888
889 /* TODO: Check for special-purpose label values? */
890 }
891 }
892
893 static int
894 bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
895 {
896 /*
897 * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
898 * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
899 */
900
901 s->mpls_labels = a->u.ptr;
902 return 0;
903 }
904
905 static void
906 bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
907 {
908 DISCARD("Discarding received attribute #0");
909 }
910
911 static void
912 bgp_format_mpls_label_stack(const eattr *a, byte *buf, uint size)
913 {
914 u32 *labels = (u32 *) a->u.ptr->data;
915 uint lnum = a->u.ptr->length / 4;
916 char *pos = buf;
917
918 for (uint i = 0; i < lnum; i++)
919 {
920 if (size < 20)
921 {
922 bsprintf(pos, "...");
923 return;
924 }
925
926 uint l = bsprintf(pos, "%d/", labels[i]);
927 ADVANCE(pos, size, l);
928 }
929
930 /* Clear last slash or terminate empty string */
931 pos[lnum ? -1 : 0] = 0;
932 }
933
934 static inline void
935 bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
936 {
937 /* Cannot use bgp_set_attr_data() as it works on known attributes only */
938 ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
939 }
940
941
942 /*
943 * Attribute table
944 */
945
946 static const struct bgp_attr_desc bgp_attr_table[] = {
947 [BA_ORIGIN] = {
948 .name = "origin",
949 .type = EAF_TYPE_INT,
950 .flags = BAF_TRANSITIVE,
951 .export = bgp_export_origin,
952 .encode = bgp_encode_u8,
953 .decode = bgp_decode_origin,
954 .format = bgp_format_origin,
955 },
956 [BA_AS_PATH] = {
957 .name = "as_path",
958 .type = EAF_TYPE_AS_PATH,
959 .flags = BAF_TRANSITIVE,
960 .encode = bgp_encode_as_path,
961 .decode = bgp_decode_as_path,
962 },
963 [BA_NEXT_HOP] = {
964 .name = "next_hop",
965 .type = EAF_TYPE_IP_ADDRESS,
966 .flags = BAF_TRANSITIVE,
967 .encode = bgp_encode_next_hop,
968 .decode = bgp_decode_next_hop,
969 .format = bgp_format_next_hop,
970 },
971 [BA_MULTI_EXIT_DISC] = {
972 .name = "med",
973 .type = EAF_TYPE_INT,
974 .flags = BAF_OPTIONAL,
975 .encode = bgp_encode_u32,
976 .decode = bgp_decode_med,
977 },
978 [BA_LOCAL_PREF] = {
979 .name = "local_pref",
980 .type = EAF_TYPE_INT,
981 .flags = BAF_TRANSITIVE,
982 .export = bgp_export_local_pref,
983 .encode = bgp_encode_u32,
984 .decode = bgp_decode_local_pref,
985 },
986 [BA_ATOMIC_AGGR] = {
987 .name = "atomic_aggr",
988 .type = EAF_TYPE_OPAQUE,
989 .flags = BAF_TRANSITIVE,
990 .encode = bgp_encode_raw,
991 .decode = bgp_decode_atomic_aggr,
992 },
993 [BA_AGGREGATOR] = {
994 .name = "aggregator",
995 .type = EAF_TYPE_OPAQUE,
996 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
997 .encode = bgp_encode_aggregator,
998 .decode = bgp_decode_aggregator,
999 .format = bgp_format_aggregator,
1000 },
1001 [BA_COMMUNITY] = {
1002 .name = "community",
1003 .type = EAF_TYPE_INT_SET,
1004 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1005 .export = bgp_export_community,
1006 .encode = bgp_encode_u32s,
1007 .decode = bgp_decode_community,
1008 },
1009 [BA_ORIGINATOR_ID] = {
1010 .name = "originator_id",
1011 .type = EAF_TYPE_ROUTER_ID,
1012 .flags = BAF_OPTIONAL,
1013 .export = bgp_export_originator_id,
1014 .encode = bgp_encode_u32,
1015 .decode = bgp_decode_originator_id,
1016 },
1017 [BA_CLUSTER_LIST] = {
1018 .name = "cluster_list",
1019 .type = EAF_TYPE_INT_SET,
1020 .flags = BAF_OPTIONAL,
1021 .export = bgp_export_cluster_list,
1022 .encode = bgp_encode_u32s,
1023 .decode = bgp_decode_cluster_list,
1024 .format = bgp_format_cluster_list,
1025 },
1026 [BA_MP_REACH_NLRI] = {
1027 .name = "mp_reach_nlri",
1028 .type = EAF_TYPE_OPAQUE,
1029 .flags = BAF_OPTIONAL,
1030 .decode = bgp_decode_mp_reach_nlri,
1031 },
1032 [BA_MP_UNREACH_NLRI] = {
1033 .name = "mp_unreach_nlri",
1034 .type = EAF_TYPE_OPAQUE,
1035 .flags = BAF_OPTIONAL,
1036 .decode = bgp_decode_mp_unreach_nlri,
1037 },
1038 [BA_EXT_COMMUNITY] = {
1039 .name = "ext_community",
1040 .type = EAF_TYPE_EC_SET,
1041 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1042 .export = bgp_export_ext_community,
1043 .encode = bgp_encode_u32s,
1044 .decode = bgp_decode_ext_community,
1045 },
1046 [BA_AS4_PATH] = {
1047 .name = "as4_path",
1048 .type = EAF_TYPE_AS_PATH,
1049 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1050 .encode = bgp_encode_raw,
1051 .decode = bgp_decode_as4_path,
1052 },
1053 [BA_AS4_AGGREGATOR] = {
1054 .name = "as4_aggregator",
1055 .type = EAF_TYPE_OPAQUE,
1056 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1057 .encode = bgp_encode_raw,
1058 .decode = bgp_decode_as4_aggregator,
1059 .format = bgp_format_aggregator,
1060 },
1061 [BA_AIGP] = {
1062 .name = "aigp",
1063 .type = EAF_TYPE_OPAQUE,
1064 .flags = BAF_OPTIONAL | BAF_DECODE_FLAGS,
1065 .export = bgp_export_aigp,
1066 .encode = bgp_encode_raw,
1067 .decode = bgp_decode_aigp,
1068 .format = bgp_format_aigp,
1069 },
1070 [BA_LARGE_COMMUNITY] = {
1071 .name = "large_community",
1072 .type = EAF_TYPE_LC_SET,
1073 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1074 .export = bgp_export_large_community,
1075 .encode = bgp_encode_u32s,
1076 .decode = bgp_decode_large_community,
1077 },
1078 [BA_MPLS_LABEL_STACK] = {
1079 .name = "mpls_label_stack",
1080 .type = EAF_TYPE_INT_SET,
1081 .export = bgp_export_mpls_label_stack,
1082 .encode = bgp_encode_mpls_label_stack,
1083 .decode = bgp_decode_mpls_label_stack,
1084 .format = bgp_format_mpls_label_stack,
1085 },
1086 };
1087
1088 static inline int
1089 bgp_attr_known(uint code)
1090 {
1091 return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
1092 }
1093
1094
1095 /*
1096 * Attribute export
1097 */
1098
1099 static inline void
1100 bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
1101 {
1102 if (EA_PROTO(a->id) != PROTOCOL_BGP)
1103 return;
1104
1105 uint code = EA_ID(a->id);
1106
1107 if (bgp_attr_known(code))
1108 {
1109 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1110
1111 /* The flags might have been zero if the attr was added by filters */
1112 a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
1113
1114 /* Set partial bit if new opt-trans attribute is attached to non-local route */
1115 if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
1116 (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
1117 a->flags |= BAF_PARTIAL;
1118
1119 /* Call specific hook */
1120 CALL(desc->export, s, a);
1121
1122 /* Attribute might become undefined in hook */
1123 if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
1124 return;
1125 }
1126 else
1127 {
1128 /* Don't re-export unknown non-transitive attributes */
1129 if (!(a->flags & BAF_TRANSITIVE))
1130 return;
1131
1132 a->flags |= BAF_PARTIAL;
1133 }
1134
1135 /* Append updated attribute */
1136 to->attrs[to->count++] = *a;
1137 }
1138
1139 /**
1140 * bgp_export_attrs - export BGP attributes
1141 * @s: BGP export state
1142 * @attrs: a list of extended attributes
1143 *
1144 * The bgp_export_attrs() function takes a list of attributes and merges it to
1145 * one newly allocated and sorted segment. Attributes are validated and
1146 * normalized by type-specific export hooks and attribute flags are updated.
1147 * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
1148 * empty community sets).
1149 *
1150 * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
1151 */
1152 static inline ea_list *
1153 bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
1154 {
1155 /* Merge the attribute list */
1156 ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
1157 ea_merge(attrs, new);
1158 ea_sort(new);
1159
1160 uint i, count;
1161 count = new->count;
1162 new->count = 0;
1163
1164 /* Export each attribute */
1165 for (i = 0; i < count; i++)
1166 bgp_export_attr(s, &new->attrs[i], new);
1167
1168 if (s->err_withdraw)
1169 return NULL;
1170
1171 return new;
1172 }
1173
1174
1175 /*
1176 * Attribute encoding
1177 */
1178
1179 static inline int
1180 bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
1181 {
1182 ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP);
1183
1184 uint code = EA_ID(a->id);
1185
1186 if (bgp_attr_known(code))
1187 return bgp_attr_table[code].encode(s, a, buf, size);
1188 else
1189 return bgp_encode_raw(s, a, buf, size);
1190 }
1191
1192 /**
1193 * bgp_encode_attrs - encode BGP attributes
1194 * @s: BGP write state
1195 * @attrs: a list of extended attributes
1196 * @buf: buffer
1197 * @end: buffer end
1198 *
1199 * The bgp_encode_attrs() function takes a list of extended attributes
1200 * and converts it to its BGP representation (a part of an Update message).
1201 * BGP write state may be fake when called from MRT protocol.
1202 *
1203 * Result: Length of the attribute block generated or -1 if not enough space.
1204 */
1205 int
1206 bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
1207 {
1208 byte *pos = buf;
1209 int i, len;
1210
1211 for (i = 0; i < attrs->count; i++)
1212 {
1213 len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
1214
1215 if (len < 0)
1216 return -1;
1217
1218 pos += len;
1219 }
1220
1221 return pos - buf;
1222 }
1223
1224
1225 /*
1226 * Attribute decoding
1227 */
1228
1229 static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
1230
1231 static inline int
1232 bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
1233 {
1234 eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
1235 int num = p->cf->allow_local_as + 1;
1236 return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
1237 }
1238
1239 static inline int
1240 bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
1241 {
1242 eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
1243 return (e && (e->u.data == p->local_id));
1244 }
1245
1246 static inline int
1247 bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
1248 {
1249 eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
1250 return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
1251 }
1252
1253 static inline void
1254 bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
1255 {
1256 /* Handle duplicate attributes; RFC 7606 3 (g) */
1257 if (BIT32_TEST(s->attrs_seen, code))
1258 {
1259 if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
1260 bgp_parse_error(s, 1);
1261 else
1262 DISCARD("Discarding duplicate attribute (code %u)", code);
1263 }
1264 BIT32_SET(s->attrs_seen, code);
1265
1266 if (bgp_attr_known(code))
1267 {
1268 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1269
1270 /* Handle conflicting flags; RFC 7606 3 (c) */
1271 if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
1272 !(desc->flags & BAF_DECODE_FLAGS))
1273 WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
1274
1275 desc->decode(s, code, flags, data, len, to);
1276 }
1277 else /* Unknown attribute */
1278 {
1279 if (!(flags & BAF_OPTIONAL))
1280 WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
1281
1282 bgp_decode_unknown(s, code, flags, data, len, to);
1283 }
1284 }
1285
1286 /**
1287 * bgp_decode_attrs - check and decode BGP attributes
1288 * @s: BGP parse state
1289 * @data: start of attribute block
1290 * @len: length of attribute block
1291 *
1292 * This function takes a BGP attribute block (a part of an Update message), checks
1293 * its consistency and converts it to a list of BIRD route attributes represented
1294 * by an (uncached) &rta.
1295 */
1296 ea_list *
1297 bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
1298 {
1299 struct bgp_proto *p = s->proto;
1300 ea_list *attrs = NULL;
1301 uint code, flags, alen;
1302 byte *pos = data;
1303
1304 /* Parse the attributes */
1305 while (len)
1306 {
1307 alen = 0;
1308
1309 /* Read attribute type */
1310 if (len < 2)
1311 goto framing_error;
1312 flags = pos[0];
1313 code = pos[1];
1314 ADVANCE(pos, len, 2);
1315
1316 /* Read attribute length */
1317 if (flags & BAF_EXT_LEN)
1318 {
1319 if (len < 2)
1320 goto framing_error;
1321 alen = get_u16(pos);
1322 ADVANCE(pos, len, 2);
1323 }
1324 else
1325 {
1326 if (len < 1)
1327 goto framing_error;
1328 alen = *pos;
1329 ADVANCE(pos, len, 1);
1330 }
1331
1332 if (alen > len)
1333 goto framing_error;
1334
1335 DBG("Attr %02x %02x %u\n", code, flags, alen);
1336
1337 bgp_decode_attr(s, code, flags, pos, alen, &attrs);
1338 ADVANCE(pos, len, alen);
1339 }
1340
1341 if (s->err_withdraw)
1342 goto withdraw;
1343
1344 /* If there is no reachability NLRI, we are finished */
1345 if (!s->ip_reach_len && !s->mp_reach_len)
1346 return NULL;
1347
1348
1349 /* Handle missing mandatory attributes; RFC 7606 3 (d) */
1350 if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
1351 { REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
1352
1353 if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
1354 { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
1355
1356 if (s->ip_reach_len && !BIT32_TEST(s->attrs_seen, BA_NEXT_HOP))
1357 { REPORT(NO_MANDATORY, "NEXT_HOP"); goto withdraw; }
1358
1359 /* When receiving attributes from non-AS4-aware BGP speaker, we have to
1360 reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
1361 if (!p->as4_session)
1362 bgp_process_as4_attrs(&attrs, s->pool);
1363
1364 /* Reject routes with our ASN in AS_PATH attribute */
1365 if (bgp_as_path_loopy(p, attrs, p->local_as))
1366 goto withdraw;
1367
1368 /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
1369 if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
1370 goto withdraw;
1371
1372 /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
1373 if (p->is_internal && bgp_originator_id_loopy(p, attrs))
1374 goto withdraw;
1375
1376 /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
1377 if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
1378 goto withdraw;
1379
1380 /* If there is no local preference, define one */
1381 if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
1382 bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1383
1384 return attrs;
1385
1386
1387 framing_error:
1388 /* RFC 7606 4 - handle attribute framing errors */
1389 REPORT("Malformed attribute list - framing error (%u/%u) at %d",
1390 alen, len, (int) (pos - s->attrs));
1391
1392 withdraw:
1393 /* RFC 7606 5.2 - handle missing NLRI during errors */
1394 if (!s->ip_reach_len && !s->mp_reach_len)
1395 bgp_parse_error(s, 1);
1396
1397 s->err_withdraw = 1;
1398 return NULL;
1399 }
1400
1401 void
1402 bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
1403 {
1404 /* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
1405 if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp)
1406 {
1407 REPORT("Discarding AIGP attribute received on non-AIGP session");
1408 bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP);
1409 }
1410 }
1411
1412
1413 /*
1414 * Route bucket hash table
1415 */
1416
1417 #define RBH_KEY(b) b->eattrs, b->hash
1418 #define RBH_NEXT(b) b->next
1419 #define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
1420 #define RBH_FN(a,h) h
1421
1422 #define RBH_REHASH bgp_rbh_rehash
1423 #define RBH_PARAMS /8, *2, 2, 2, 8, 20
1424
1425
1426 HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
1427
1428 void
1429 bgp_init_bucket_table(struct bgp_channel *c)
1430 {
1431 HASH_INIT(c->bucket_hash, c->pool, 8);
1432
1433 init_list(&c->bucket_queue);
1434 c->withdraw_bucket = NULL;
1435 }
1436
1437 void
1438 bgp_free_bucket_table(struct bgp_channel *c)
1439 {
1440 HASH_FREE(c->bucket_hash);
1441
1442 struct bgp_bucket *b;
1443 WALK_LIST_FIRST(b, c->bucket_queue)
1444 {
1445 rem_node(&b->send_node);
1446 mb_free(b);
1447 }
1448
1449 mb_free(c->withdraw_bucket);
1450 c->withdraw_bucket = NULL;
1451 }
1452
1453 static struct bgp_bucket *
1454 bgp_get_bucket(struct bgp_channel *c, ea_list *new)
1455 {
1456 /* Hash and lookup */
1457 u32 hash = ea_hash(new);
1458 struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
1459
1460 if (b)
1461 return b;
1462
1463 uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
1464 uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
1465 uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
1466 uint i;
1467 byte *dest;
1468
1469 /* Gather total size of non-inline attributes */
1470 for (i = 0; i < new->count; i++)
1471 {
1472 eattr *a = &new->attrs[i];
1473
1474 if (!(a->type & EAF_EMBEDDED))
1475 size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
1476 }
1477
1478 /* Create the bucket */
1479 b = mb_alloc(c->pool, size);
1480 init_list(&b->prefixes);
1481 b->hash = hash;
1482
1483 /* Copy list of extended attributes */
1484 memcpy(b->eattrs, new, ea_size);
1485 dest = ((byte *) b->eattrs) + ea_size_aligned;
1486
1487 /* Copy values of non-inline attributes */
1488 for (i = 0; i < new->count; i++)
1489 {
1490 eattr *a = &b->eattrs->attrs[i];
1491
1492 if (!(a->type & EAF_EMBEDDED))
1493 {
1494 const struct adata *oa = a->u.ptr;
1495 struct adata *na = (struct adata *) dest;
1496 memcpy(na, oa, sizeof(struct adata) + oa->length);
1497 a->u.ptr = na;
1498 dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
1499 }
1500 }
1501
1502 /* Insert the bucket to send queue and bucket hash */
1503 add_tail(&c->bucket_queue, &b->send_node);
1504 HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
1505
1506 return b;
1507 }
1508
1509 static struct bgp_bucket *
1510 bgp_get_withdraw_bucket(struct bgp_channel *c)
1511 {
1512 if (!c->withdraw_bucket)
1513 {
1514 c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
1515 init_list(&c->withdraw_bucket->prefixes);
1516 }
1517
1518 return c->withdraw_bucket;
1519 }
1520
1521 void
1522 bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1523 {
1524 rem_node(&b->send_node);
1525 HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
1526 mb_free(b);
1527 }
1528
1529 void
1530 bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1531 {
1532 rem_node(&b->send_node);
1533 add_tail(&c->bucket_queue, &b->send_node);
1534 }
1535
1536 void
1537 bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1538 {
1539 struct bgp_proto *p = (void *) c->c.proto;
1540 struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
1541
1542 log(L_ERR "%s: Attribute list too long", p->p.name);
1543 while (!EMPTY_LIST(b->prefixes))
1544 {
1545 struct bgp_prefix *px = HEAD(b->prefixes);
1546
1547 log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
1548 rem_node(&px->buck_node);
1549 add_tail(&wb->prefixes, &px->buck_node);
1550 }
1551 }
1552
1553
1554 /*
1555 * Prefix hash table
1556 */
1557
1558 #define PXH_KEY(px) px->net, px->path_id, px->hash
1559 #define PXH_NEXT(px) px->next
1560 #define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
1561 #define PXH_FN(n,i,h) h
1562
1563 #define PXH_REHASH bgp_pxh_rehash
1564 #define PXH_PARAMS /8, *2, 2, 2, 8, 24
1565
1566
1567 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
1568
1569 void
1570 bgp_init_prefix_table(struct bgp_channel *c)
1571 {
1572 HASH_INIT(c->prefix_hash, c->pool, 8);
1573
1574 uint alen = net_addr_length[c->c.net_type];
1575 c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
1576 }
1577
1578 void
1579 bgp_free_prefix_table(struct bgp_channel *c)
1580 {
1581 HASH_FREE(c->prefix_hash);
1582
1583 rfree(c->prefix_slab);
1584 c->prefix_slab = NULL;
1585 }
1586
1587 static struct bgp_prefix *
1588 bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
1589 {
1590 u32 hash = net_hash(net) ^ u32_hash(path_id);
1591 struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
1592
1593 if (px)
1594 {
1595 rem_node(&px->buck_node);
1596 return px;
1597 }
1598
1599 if (c->prefix_slab)
1600 px = sl_alloc(c->prefix_slab);
1601 else
1602 px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
1603
1604 px->buck_node.next = NULL;
1605 px->buck_node.prev = NULL;
1606 px->hash = hash;
1607 px->path_id = path_id;
1608 net_copy(px->net, net);
1609
1610 HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
1611
1612 return px;
1613 }
1614
1615 void
1616 bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
1617 {
1618 rem_node(&px->buck_node);
1619 HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
1620
1621 if (c->prefix_slab)
1622 sl_free(c->prefix_slab, px);
1623 else
1624 mb_free(px);
1625 }
1626
1627
1628 /*
1629 * BGP protocol glue
1630 */
1631
1632 int
1633 bgp_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED)
1634 {
1635 rte *e = *new;
1636 struct proto *SRC = e->attrs->src->proto;
1637 struct bgp_proto *p = (struct bgp_proto *) P;
1638 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
1639
1640 /* Reject our routes */
1641 if (src == p)
1642 return -1;
1643
1644 /* Accept non-BGP routes */
1645 if (src == NULL)
1646 return 0;
1647
1648 /* IBGP route reflection, RFC 4456 */
1649 if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
1650 {
1651 /* Rejected unless configured as route reflector */
1652 if (!p->rr_client && !src->rr_client)
1653 return -1;
1654
1655 /* Generally, this should be handled when path is received, but we check it
1656 also here as rr_cluster_id may be undefined or different in src. */
1657 if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
1658 return -1;
1659 }
1660
1661 /* Handle well-known communities, RFC 1997 */
1662 struct eattr *c;
1663 if (p->cf->interpret_communities &&
1664 (c = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY))))
1665 {
1666 const struct adata *d = c->u.ptr;
1667
1668 /* Do not export anywhere */
1669 if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
1670 return -1;
1671
1672 /* Do not export outside of AS (or member-AS) */
1673 if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
1674 return -1;
1675
1676 /* Do not export outside of AS (or confederation) */
1677 if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
1678 return -1;
1679
1680 /* Do not export LLGR_STALE routes to LLGR-ignorant peers */
1681 if (!p->conn->remote_caps->llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
1682 return -1;
1683 }
1684
1685 return 0;
1686 }
1687
1688 static ea_list *
1689 bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
1690 {
1691 struct proto *SRC = e->attrs->src->proto;
1692 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
1693 struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
1694 ea_list *attrs = attrs0;
1695 eattr *a;
1696 const adata *ad;
1697
1698 /* ORIGIN attribute - mandatory, attach if missing */
1699 if (! bgp_find_attr(attrs0, BA_ORIGIN))
1700 bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
1701
1702 /* AS_PATH attribute - mandatory */
1703 a = bgp_find_attr(attrs0, BA_AS_PATH);
1704 ad = a ? a->u.ptr : &null_adata;
1705
1706 /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
1707 if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
1708 ad = as_path_strip_confed(pool, ad);
1709
1710 /* AS_PATH attribute - keep or prepend ASN */
1711 if (p->is_internal || p->rs_client)
1712 {
1713 /* IBGP or route server -> just ensure there is one */
1714 if (!a)
1715 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
1716 }
1717 else if (p->is_interior)
1718 {
1719 /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
1720 ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
1721 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1722 }
1723 else /* Regular EBGP (no RS, no confederation) */
1724 {
1725 /* Regular EBGP -> prepend ASN as regular sequence */
1726 ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
1727 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1728
1729 /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
1730 a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
1731 if (a && !(a->type & EAF_FRESH))
1732 bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
1733 }
1734
1735 /* NEXT_HOP attribute - delegated to AF-specific hook */
1736 a = bgp_find_attr(attrs0, BA_NEXT_HOP);
1737 bgp_update_next_hop(&s, a, &attrs);
1738
1739 /* LOCAL_PREF attribute - required for IBGP, attach if missing */
1740 if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
1741 bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1742
1743 /* AIGP attribute - accumulate local metric or originate new one */
1744 u64 metric;
1745 if (s.local_next_hop &&
1746 (bgp_total_aigp_metric_(e, &metric, &ad) ||
1747 (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad))))
1748 {
1749 ad = bgp_aigp_set_metric(pool, ad, metric);
1750 bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad);
1751 }
1752
1753 /* IBGP route reflection, RFC 4456 */
1754 if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
1755 {
1756 /* ORIGINATOR_ID attribute - attach if not already set */
1757 if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
1758 bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
1759
1760 /* CLUSTER_LIST attribute - prepend cluster ID */
1761 a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
1762 ad = a ? a->u.ptr : NULL;
1763
1764 /* Prepend src cluster ID */
1765 if (src->rr_cluster_id)
1766 ad = int_set_prepend(pool, ad, src->rr_cluster_id);
1767
1768 /* Prepend dst cluster ID if src and dst clusters are different */
1769 if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
1770 ad = int_set_prepend(pool, ad, p->rr_cluster_id);
1771
1772 /* Should be at least one prepended cluster ID */
1773 bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
1774 }
1775
1776 /* AS4_* transition attributes, RFC 6793 4.2.2 */
1777 if (! p->as4_session)
1778 {
1779 a = bgp_find_attr(attrs, BA_AS_PATH);
1780 if (a && as_path_contains_as4(a->u.ptr))
1781 {
1782 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
1783 bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
1784 }
1785
1786 a = bgp_find_attr(attrs, BA_AGGREGATOR);
1787 if (a && aggregator_contains_as4(a->u.ptr))
1788 {
1789 bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
1790 bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
1791 }
1792 }
1793
1794 /*
1795 * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
1796 * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
1797 * should be checked in AF-specific hooks.
1798 */
1799
1800 /* Apply per-attribute export hooks for validatation and normalization */
1801 return bgp_export_attrs(&s, attrs);
1802 }
1803
1804 void
1805 bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old)
1806 {
1807 struct bgp_proto *p = (void *) P;
1808 struct bgp_channel *c = (void *) C;
1809 struct bgp_bucket *buck;
1810 struct bgp_prefix *px;
1811 u32 path;
1812
1813 if (new)
1814 {
1815 struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2);
1816
1817 /* If attributes are invalid, we fail back to withdraw */
1818 buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
1819 path = new->attrs->src->global_id;
1820
1821 lp_flush(bgp_linpool2);
1822 }
1823 else
1824 {
1825 buck = bgp_get_withdraw_bucket(c);
1826 path = old->attrs->src->global_id;
1827 }
1828
1829 px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
1830 add_tail(&buck->prefixes, &px->buck_node);
1831
1832 bgp_schedule_packet(p->conn, c, PKT_UPDATE);
1833 }
1834
1835
1836 static inline u32
1837 bgp_get_neighbor(rte *r)
1838 {
1839 eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1840 u32 as;
1841
1842 if (e && as_path_get_first_regular(e->u.ptr, &as))
1843 return as;
1844
1845 /* If AS_PATH is not defined, we treat rte as locally originated */
1846 struct bgp_proto *p = (void *) r->attrs->src->proto;
1847 return p->cf->confederation ?: p->local_as;
1848 }
1849
1850 static inline int
1851 rte_stale(rte *r)
1852 {
1853 if (r->u.bgp.stale < 0)
1854 {
1855 /* If staleness is unknown, compute and cache it */
1856 eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
1857 r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
1858 }
1859
1860 return r->u.bgp.stale;
1861 }
1862
1863 int
1864 bgp_rte_better(rte *new, rte *old)
1865 {
1866 struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
1867 struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
1868 eattr *x, *y;
1869 u32 n, o;
1870
1871 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1872 n = new->u.bgp.suppressed;
1873 o = old->u.bgp.suppressed;
1874 if (n > o)
1875 return 0;
1876 if (n < o)
1877 return 1;
1878
1879 /* RFC 4271 9.1.2.1. Route resolvability test */
1880 n = rte_resolvable(new);
1881 o = rte_resolvable(old);
1882 if (n > o)
1883 return 1;
1884 if (n < o)
1885 return 0;
1886
1887 /* LLGR draft - depreference stale routes */
1888 n = rte_stale(new);
1889 o = rte_stale(old);
1890 if (n > o)
1891 return 0;
1892 if (n < o)
1893 return 1;
1894
1895 /* Start with local preferences */
1896 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1897 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1898 n = x ? x->u.data : new_bgp->cf->default_local_pref;
1899 o = y ? y->u.data : old_bgp->cf->default_local_pref;
1900 if (n > o)
1901 return 1;
1902 if (n < o)
1903 return 0;
1904
1905 /* RFC 7311 4.1 - Apply AIGP metric */
1906 u64 n2 = bgp_total_aigp_metric(new);
1907 u64 o2 = bgp_total_aigp_metric(old);
1908 if (n2 < o2)
1909 return 1;
1910 if (n2 > o2)
1911 return 0;
1912
1913 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1914 if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
1915 {
1916 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1917 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1918 n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1919 o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
1920 if (n < o)
1921 return 1;
1922 if (n > o)
1923 return 0;
1924 }
1925
1926 /* RFC 4271 9.1.2.2. b) Use origins */
1927 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1928 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1929 n = x ? x->u.data : ORIGIN_INCOMPLETE;
1930 o = y ? y->u.data : ORIGIN_INCOMPLETE;
1931 if (n < o)
1932 return 1;
1933 if (n > o)
1934 return 0;
1935
1936 /* RFC 4271 9.1.2.2. c) Compare MED's */
1937 /* Proper RFC 4271 path selection cannot be interpreted as finding
1938 * the best path in some ordering. It is implemented partially in
1939 * bgp_rte_recalculate() when deterministic_med option is
1940 * active. Without that option, the behavior is just an
1941 * approximation, which in specific situations may lead to
1942 * persistent routing loops, because it is nondeterministic - it
1943 * depends on the order in which routes appeared. But it is also the
1944 * same behavior as used by default in Cisco routers, so it is
1945 * probably not a big issue.
1946 */
1947 if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
1948 (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
1949 {
1950 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1951 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1952 n = x ? x->u.data : new_bgp->cf->default_med;
1953 o = y ? y->u.data : old_bgp->cf->default_med;
1954 if (n < o)
1955 return 1;
1956 if (n > o)
1957 return 0;
1958 }
1959
1960 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1961 if (new_bgp->is_interior > old_bgp->is_interior)
1962 return 0;
1963 if (new_bgp->is_interior < old_bgp->is_interior)
1964 return 1;
1965
1966 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1967 n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
1968 o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
1969 if (n < o)
1970 return 1;
1971 if (n > o)
1972 return 0;
1973
1974 /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
1975 /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
1976 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1977 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1978 n = x ? x->u.data : new_bgp->remote_id;
1979 o = y ? y->u.data : old_bgp->remote_id;
1980
1981 /* RFC 5004 - prefer older routes */
1982 /* (if both are external and from different peer) */
1983 if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
1984 !new_bgp->is_internal && n != o)
1985 return 0;
1986
1987 /* rest of RFC 4271 9.1.2.2. f) */
1988 if (n < o)
1989 return 1;
1990 if (n > o)
1991 return 0;
1992
1993 /* RFC 4456 9. b) Compare cluster list lengths */
1994 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1995 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1996 n = x ? int_set_get_size(x->u.ptr) : 0;
1997 o = y ? int_set_get_size(y->u.ptr) : 0;
1998 if (n < o)
1999 return 1;
2000 if (n > o)
2001 return 0;
2002
2003 /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
2004 return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0;
2005 }
2006
2007
2008 int
2009 bgp_rte_mergable(rte *pri, rte *sec)
2010 {
2011 struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
2012 struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
2013 eattr *x, *y;
2014 u32 p, s;
2015
2016 /* Skip suppressed routes (see bgp_rte_recalculate()) */
2017 if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
2018 return 0;
2019
2020 /* RFC 4271 9.1.2.1. Route resolvability test */
2021 if (rte_resolvable(pri) != rte_resolvable(sec))
2022 return 0;
2023
2024 /* LLGR draft - depreference stale routes */
2025 if (rte_stale(pri) != rte_stale(sec))
2026 return 0;
2027
2028 /* Start with local preferences */
2029 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
2030 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
2031 p = x ? x->u.data : pri_bgp->cf->default_local_pref;
2032 s = y ? y->u.data : sec_bgp->cf->default_local_pref;
2033 if (p != s)
2034 return 0;
2035
2036 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
2037 if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
2038 {
2039 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2040 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2041 p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
2042 s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
2043
2044 if (p != s)
2045 return 0;
2046
2047 // if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
2048 // return 0;
2049 }
2050
2051 /* RFC 4271 9.1.2.2. b) Use origins */
2052 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2053 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2054 p = x ? x->u.data : ORIGIN_INCOMPLETE;
2055 s = y ? y->u.data : ORIGIN_INCOMPLETE;
2056 if (p != s)
2057 return 0;
2058
2059 /* RFC 4271 9.1.2.2. c) Compare MED's */
2060 if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
2061 (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
2062 {
2063 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
2064 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
2065 p = x ? x->u.data : pri_bgp->cf->default_med;
2066 s = y ? y->u.data : sec_bgp->cf->default_med;
2067 if (p != s)
2068 return 0;
2069 }
2070
2071 /* RFC 4271 9.1.2.2. d) Prefer external peers */
2072 if (pri_bgp->is_interior != sec_bgp->is_interior)
2073 return 0;
2074
2075 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
2076 p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
2077 s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
2078 if (p != s)
2079 return 0;
2080
2081 /* Remaining criteria are ignored */
2082
2083 return 1;
2084 }
2085
2086
2087 static inline int
2088 same_group(rte *r, u32 lpref, u32 lasn)
2089 {
2090 return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
2091 }
2092
2093 static inline int
2094 use_deterministic_med(rte *r)
2095 {
2096 struct proto *P = r->attrs->src->proto;
2097 return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
2098 }
2099
2100 int
2101 bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
2102 {
2103 rte *r, *s;
2104 rte *key = new ? new : old;
2105 u32 lpref = key->pref;
2106 u32 lasn = bgp_get_neighbor(key);
2107 int old_suppressed = old ? old->u.bgp.suppressed : 0;
2108
2109 /*
2110 * Proper RFC 4271 path selection is a bit complicated, it cannot be
2111 * implemented just by rte_better(), because it is not a linear
2112 * ordering. But it can be splitted to two levels, where the lower
2113 * level chooses the best routes in each group of routes from the
2114 * same neighboring AS and higher level chooses the best route (with
2115 * a slightly different ordering) between the best-in-group routes.
2116 *
2117 * When deterministic_med is disabled, we just ignore this issue and
2118 * choose the best route by bgp_rte_better() alone. If enabled, the
2119 * lower level of the route selection is done here (for the group
2120 * to which the changed route belongs), all routes in group are
2121 * marked as suppressed, just chosen best-in-group is not.
2122 *
2123 * Global best route selection then implements higher level by
2124 * choosing between non-suppressed routes (as they are always
2125 * preferred over suppressed routes). Routes from BGP protocols
2126 * that do not set deterministic_med are just never suppressed. As
2127 * they do not participate in the lower level selection, it is OK
2128 * that this fn is not called for them.
2129 *
2130 * The idea is simple, the implementation is more problematic,
2131 * mostly because of optimizations in rte_recalculate() that
2132 * avoids full recalculation in most cases.
2133 *
2134 * We can assume that at least one of new, old is non-NULL and both
2135 * are from the same protocol with enabled deterministic_med. We
2136 * group routes by both neighbor AS (lasn) and preference (lpref),
2137 * because bgp_rte_better() does not handle preference itself.
2138 */
2139
2140 /* If new and old are from different groups, we just process that
2141 as two independent events */
2142 if (new && old && !same_group(old, lpref, lasn))
2143 {
2144 int i1, i2;
2145 i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
2146 i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
2147 return i1 || i2;
2148 }
2149
2150 /*
2151 * We could find the best-in-group and then make some shortcuts like
2152 * in rte_recalculate, but as we would have to walk through all
2153 * net->routes just to find it, it is probably not worth. So we
2154 * just have one simple fast case that use just the old route.
2155 * We also set suppressed flag to avoid using it in bgp_rte_better().
2156 */
2157
2158 if (new)
2159 new->u.bgp.suppressed = 1;
2160
2161 if (old)
2162 {
2163 old->u.bgp.suppressed = 1;
2164
2165 /* The fast case - replace not best with worse (or remove not best) */
2166 if (old_suppressed && !(new && bgp_rte_better(new, old)))
2167 return 0;
2168 }
2169
2170 /* The default case - find a new best-in-group route */
2171 r = new; /* new may not be in the list */
2172 for (s=net->routes; rte_is_valid(s); s=s->next)
2173 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
2174 {
2175 s->u.bgp.suppressed = 1;
2176 if (!r || bgp_rte_better(s, r))
2177 r = s;
2178 }
2179
2180 /* Simple case - the last route in group disappears */
2181 if (!r)
2182 return 0;
2183
2184 /* Found if new is mergable with best-in-group */
2185 if (new && (new != r) && bgp_rte_mergable(r, new))
2186 new->u.bgp.suppressed = 0;
2187
2188 /* Found all existing routes mergable with best-in-group */
2189 for (s=net->routes; rte_is_valid(s); s=s->next)
2190 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
2191 if ((s != r) && bgp_rte_mergable(r, s))
2192 s->u.bgp.suppressed = 0;
2193
2194 /* Found best-in-group */
2195 r->u.bgp.suppressed = 0;
2196
2197 /*
2198 * There are generally two reasons why we have to force
2199 * recalculation (return 1): First, the new route may be wrongfully
2200 * chosen to be the best in the first case check in
2201 * rte_recalculate(), this may happen only if old_best is from the
2202 * same group. Second, another (different than new route)
2203 * best-in-group is chosen and that may be the proper best (although
2204 * rte_recalculate() without ignore that possibility).
2205 *
2206 * There are three possible cases according to whether the old route
2207 * was the best in group (OBG, i.e. !old_suppressed) and whether the
2208 * new route is the best in group (NBG, tested by r == new). These
2209 * cases work even if old or new is NULL.
2210 *
2211 * NBG -> new is a possible candidate for the best route, so we just
2212 * check for the first reason using same_group().
2213 *
2214 * !NBG && OBG -> Second reason applies, return 1
2215 *
2216 * !NBG && !OBG -> Best in group does not change, old != old_best,
2217 * rte_better(new, old_best) is false and therefore
2218 * the first reason does not apply, return 0
2219 */
2220
2221 if (r == new)
2222 return old_best && same_group(old_best, lpref, lasn);
2223 else
2224 return !old_suppressed;
2225 }
2226
2227 struct rte *
2228 bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
2229 {
2230 eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
2231 const struct adata *ad = a ? a->u.ptr : NULL;
2232 uint flags = a ? a->flags : BAF_PARTIAL;
2233
2234 if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
2235 return NULL;
2236
2237 if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
2238 return r;
2239
2240 r = rte_cow_rta(r, pool);
2241 bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags,
2242 int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
2243 r->u.bgp.stale = 1;
2244
2245 return r;
2246 }
2247
2248
2249 /*
2250 * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
2251 */
2252 static void
2253 bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
2254 {
2255 eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
2256 eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
2257 eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
2258 eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
2259
2260 /* First, unset AS4_* attributes */
2261 if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
2262 if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
2263
2264 /* Handle AGGREGATOR attribute */
2265 if (a2 && a4)
2266 {
2267 u32 a2_asn = get_u32(a2->u.ptr->data);
2268
2269 /* If routes were aggregated by an old router, then AS4_PATH and
2270 AS4_AGGREGATOR are invalid. In that case we give up. */
2271 if (a2_asn != AS_TRANS)
2272 return;
2273
2274 /* Use AS4_AGGREGATOR instead of AGGREGATOR */
2275 a2->u.ptr = a4->u.ptr;
2276 }
2277
2278 /* Handle AS_PATH attribute */
2279 if (p2 && p4)
2280 {
2281 /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
2282 int p2_len = as_path_getlen(p2->u.ptr);
2283 int p4_len = as_path_getlen(p4->u.ptr);
2284
2285 /* AS_PATH is too short, give up */
2286 if (p2_len < p4_len)
2287 return;
2288
2289 /* Merge AS_PATH and AS4_PATH */
2290 struct adata *apc = as_path_cut(pool, p2->u.ptr, p2_len - p4_len);
2291 p2->u.ptr = as_path_merge(pool, apc, p4->u.ptr);
2292 }
2293 }
2294
2295 int
2296 bgp_get_attr(const eattr *a, byte *buf, int buflen)
2297 {
2298 uint i = EA_ID(a->id);
2299 const struct bgp_attr_desc *d;
2300 int len;
2301
2302 if (bgp_attr_known(i))
2303 {
2304 d = &bgp_attr_table[i];
2305 len = bsprintf(buf, "%s", d->name);
2306 buf += len;
2307 if (d->format)
2308 {
2309 *buf++ = ':';
2310 *buf++ = ' ';
2311 d->format(a, buf, buflen - len - 2);
2312 return GA_FULL;
2313 }
2314 return GA_NAME;
2315 }
2316
2317 bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
2318 return GA_NAME;
2319 }
2320
2321 void
2322 bgp_get_route_info(rte *e, byte *buf)
2323 {
2324 eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2325 eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2326 u32 origas;
2327
2328 buf += bsprintf(buf, " (%d", e->pref);
2329
2330 if (e->u.bgp.suppressed)
2331 buf += bsprintf(buf, "-");
2332
2333 if (rte_stale(e))
2334 buf += bsprintf(buf, "s");
2335
2336 u64 metric = bgp_total_aigp_metric(e);
2337 if (metric < BGP_AIGP_MAX)
2338 {
2339 buf += bsprintf(buf, "/%lu", metric);
2340 }
2341 else if (e->attrs->igp_metric)
2342 {
2343 if (!rte_resolvable(e))
2344 buf += bsprintf(buf, "/-");
2345 else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
2346 buf += bsprintf(buf, "/?");
2347 else
2348 buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
2349 }
2350 buf += bsprintf(buf, ") [");
2351
2352 if (p && as_path_get_last(p->u.ptr, &origas))
2353 buf += bsprintf(buf, "AS%u", origas);
2354 if (o)
2355 buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
2356 strcpy(buf, "]");
2357 }