]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/attrs.c
BGP: Add option to enforce first AS in AS_PATH
[thirdparty/bird.git] / proto / bgp / attrs.c
1 /*
2 * BIRD -- BGP Attributes
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
11 #undef LOCAL_DEBUG
12
13 #include <stdlib.h>
14
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "conf/conf.h"
21 #include "lib/resource.h"
22 #include "lib/string.h"
23 #include "lib/unaligned.h"
24
25 #include "bgp.h"
26
27 /*
28 * UPDATE message error handling
29 *
30 * All checks from RFC 4271 6.3 are done as specified with these exceptions:
31 * - The semantic check of an IP address from NEXT_HOP attribute is missing.
32 * - Checks of some optional attribute values are missing.
33 * - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
34 * are probably inadequate.
35 *
36 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
37 * 4271 does not explicitly specify the behavior in that case.
38 *
39 * Loop detection related to route reflection (based on ORIGINATOR_ID
40 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
41 * specifies that such updates should be ignored, but that is generally
42 * a bad idea.
43 *
44 * BGP attribute table has several hooks:
45 *
46 * export - Hook that validates and normalizes attribute during export phase.
47 * Receives eattr, may modify it (e.g., sort community lists for canonical
48 * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
49 * necessary. May assume that eattr has value valid w.r.t. its type, but may be
50 * invalid w.r.t. BGP constraints. Optional.
51 *
52 * encode - Hook that converts internal representation to external one during
53 * packet writing. Receives eattr and puts it in the buffer (including attribute
54 * header). Returns number of bytes, or -1 if not enough space. May assume that
55 * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
56 * for all known attributes that exist internally after export phase (i.e., all
57 * except pseudoattributes MP_(UN)REACH_NLRI).
58 *
59 * decode - Hook that converts external representation to internal one during
60 * packet parsing. Receives attribute data in buffer, validates it and adds
61 * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
62 * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
63 *
64 * format - Optional hook that converts eattr to textual representation.
65 */
66
67
68 struct bgp_attr_desc {
69 const char *name;
70 uint type;
71 uint flags;
72 void (*export)(struct bgp_export_state *s, eattr *a);
73 int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
74 void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
75 void (*format)(eattr *ea, byte *buf, uint size);
76 };
77
78 static const struct bgp_attr_desc bgp_attr_table[];
79
80 static inline int bgp_attr_known(uint code);
81
82 eattr *
83 bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
84 {
85 ASSERT(bgp_attr_known(code));
86
87 return ea_set_attr(
88 attrs,
89 pool,
90 EA_CODE(PROTOCOL_BGP, code),
91 flags,
92 bgp_attr_table[code].type,
93 val
94 );
95 }
96
97
98
99 #define REPORT(msg, args...) \
100 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
101
102 #define DISCARD(msg, args...) \
103 ({ REPORT(msg, ## args); return; })
104
105 #define WITHDRAW(msg, args...) \
106 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
107
108 #define UNSET(a) \
109 ({ a->type = EAF_TYPE_UNDEF; return; })
110
111 #define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
112 #define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
113 #define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
114 #define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
115 #define NO_MANDATORY "Missing mandatory %s attribute"
116
117
118 static inline int
119 bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
120 {
121 *buf++ = flags;
122 *buf++ = code;
123 *buf++ = len;
124 return 3;
125 }
126
127 static inline int
128 bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
129 {
130 *buf++ = flags | BAF_EXT_LEN;
131 *buf++ = code;
132 put_u16(buf, len);
133 return 4;
134 }
135
136 static inline int
137 bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
138 {
139 if (len < 256)
140 return bgp_put_attr_hdr3(buf, code, flags, len);
141 else
142 return bgp_put_attr_hdr4(buf, code, flags, len);
143 }
144
145 static int
146 bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
147 {
148 if (size < (3+1))
149 return -1;
150
151 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
152 buf[3] = a->u.data;
153
154 return 3+1;
155 }
156
157 static int
158 bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
159 {
160 if (size < (3+4))
161 return -1;
162
163 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
164 put_u32(buf+3, a->u.data);
165
166 return 3+4;
167 }
168
169 static int
170 bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
171 {
172 uint len = a->u.ptr->length;
173
174 if (size < (4+len))
175 return -1;
176
177 uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
178 put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
179
180 return hdr + len;
181 }
182
183 static int
184 bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint len)
185 {
186 if (size < (4+len))
187 return -1;
188
189 uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
190 memcpy(buf + hdr, data, len);
191
192 return hdr + len;
193 }
194
195 static int
196 bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
197 {
198 return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
199 }
200
201
202 /*
203 * AIGP handling
204 */
205
206 static int
207 bgp_aigp_valid(byte *data, uint len, char *err, uint elen)
208 {
209 byte *pos = data;
210 char *err_dsc = NULL;
211 uint err_val = 0;
212
213 #define BAD(DSC,VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; })
214 while (len)
215 {
216 if (len < 3)
217 BAD("TLV framing error", len);
218
219 /* Process one TLV */
220 uint ptype = pos[0];
221 uint plen = get_u16(pos + 1);
222
223 if (len < plen)
224 BAD("TLV framing error", plen);
225
226 if (plen < 3)
227 BAD("Bad TLV length", plen);
228
229 if ((ptype == BGP_AIGP_METRIC) && (plen != 11))
230 BAD("Bad AIGP TLV length", plen);
231
232 ADVANCE(pos, len, plen);
233 }
234 #undef BAD
235
236 return 1;
237
238 bad:
239 if (err)
240 if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0)
241 err[0] = 0;
242
243 return 0;
244 }
245
246 static const byte *
247 bgp_aigp_get_tlv(const struct adata *ad, uint type)
248 {
249 if (!ad)
250 return NULL;
251
252 uint len = ad->length;
253 const byte *pos = ad->data;
254
255 while (len)
256 {
257 uint ptype = pos[0];
258 uint plen = get_u16(pos + 1);
259
260 if (ptype == type)
261 return pos;
262
263 ADVANCE(pos, len, plen);
264 }
265
266 return NULL;
267 }
268
269 static const struct adata *
270 bgp_aigp_set_tlv(struct linpool *pool, const struct adata *ad, uint type, byte *data, uint dlen)
271 {
272 uint len = ad ? ad->length : 0;
273 const byte *pos = ad ? ad->data : NULL;
274 struct adata *res = lp_alloc_adata(pool, len + 3 + dlen);
275 byte *dst = res->data;
276 byte *tlv = NULL;
277 int del = 0;
278
279 while (len)
280 {
281 uint ptype = pos[0];
282 uint plen = get_u16(pos + 1);
283
284 /* Find position for new TLV */
285 if ((ptype >= type) && !tlv)
286 {
287 tlv = dst;
288 dst += 3 + dlen;
289 }
290
291 /* Skip first matching TLV, copy others */
292 if ((ptype == type) && !del)
293 del = 1;
294 else
295 {
296 memcpy(dst, pos, plen);
297 dst += plen;
298 }
299
300 ADVANCE(pos, len, plen);
301 }
302
303 if (!tlv)
304 {
305 tlv = dst;
306 dst += 3 + dlen;
307 }
308
309 /* Store the TLD */
310 put_u8(tlv + 0, type);
311 put_u16(tlv + 1, 3 + dlen);
312 memcpy(tlv + 3, data, dlen);
313
314 /* Update length */
315 res->length = dst - res->data;
316
317 return res;
318 }
319
320 static u64 UNUSED
321 bgp_aigp_get_metric(const struct adata *ad, u64 def)
322 {
323 const byte *b = bgp_aigp_get_tlv(ad, BGP_AIGP_METRIC);
324 return b ? get_u64(b + 3) : def;
325 }
326
327 static const struct adata *
328 bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric)
329 {
330 byte data[8];
331 put_u64(data, metric);
332 return bgp_aigp_set_tlv(pool, ad, BGP_AIGP_METRIC, data, 8);
333 }
334
335 int
336 bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad)
337 {
338 eattr *a = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP));
339 if (!a)
340 return 0;
341
342 const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
343 if (!b)
344 return 0;
345
346 u64 aigp = get_u64(b + 3);
347 u64 step = e->attrs->igp_metric;
348
349 if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN))
350 step = BGP_AIGP_MAX;
351
352 if (!step)
353 step = 1;
354
355 *ad = a->u.ptr;
356 *metric = aigp + step;
357 if (*metric < aigp)
358 *metric = BGP_AIGP_MAX;
359
360 return 1;
361 }
362
363 static inline int
364 bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
365 {
366 if (e->attrs->source == RTS_BGP)
367 return 0;
368
369 *metric = rt_get_igp_metric(e);
370 *ad = NULL;
371 return *metric < IGP_METRIC_UNKNOWN;
372 }
373
374
375 /*
376 * Attribute hooks
377 */
378
379 static void
380 bgp_export_origin(struct bgp_export_state *s, eattr *a)
381 {
382 if (a->u.data > 2)
383 WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
384 }
385
386 static void
387 bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
388 {
389 if (len != 1)
390 WITHDRAW(BAD_LENGTH, "ORIGIN", len);
391
392 if (data[0] > 2)
393 WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
394
395 bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
396 }
397
398 static void
399 bgp_format_origin(eattr *a, byte *buf, uint size UNUSED)
400 {
401 static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
402
403 bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
404 }
405
406
407 static inline int
408 bgp_as_path_first_as_equal(const byte *data, uint len, u32 asn)
409 {
410 return (len >= 6) &&
411 ((data[0] == AS_PATH_SEQUENCE) || (data[0] == AS_PATH_CONFED_SEQUENCE)) &&
412 (data[1] > 0) &&
413 (get_u32(data+2) == asn);
414 }
415
416 static int
417 bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
418 {
419 const byte *data = a->u.ptr->data;
420 uint len = a->u.ptr->length;
421
422 if (!s->as4_session)
423 {
424 /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
425 byte *dst = alloca(len);
426 len = as_path_32to16(dst, data, len);
427 data = dst;
428 }
429
430 return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
431 }
432
433 static void
434 bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
435 {
436 struct bgp_proto *p = s->proto;
437 int as_length = s->as4_session ? 4 : 2;
438 int as_sets = p->cf->allow_as_sets;
439 int as_confed = p->cf->confederation && p->is_interior;
440 char err[128];
441
442 if (!as_path_valid(data, len, as_length, as_sets, as_confed, err, sizeof(err)))
443 WITHDRAW("Malformed AS_PATH attribute - %s", err);
444
445 if (!s->as4_session)
446 {
447 /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
448 byte *src = data;
449 data = alloca(2*len);
450 len = as_path_16to32(data, src, len);
451 }
452
453 /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
454 if (p->is_interior && !p->is_internal &&
455 ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
456 WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
457
458 /* Reject routes with first AS in AS_PATH not matching neighbor AS; RFC 4271 6.3 */
459 if (!p->is_internal && p->cf->enforce_first_as &&
460 !bgp_as_path_first_as_equal(data, len, p->remote_as))
461 WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS");
462
463 bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
464 }
465
466
467 static int
468 bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
469 {
470 /*
471 * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
472 * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
473 * store it and encode it later by AFI-specific hooks.
474 */
475
476 if (!s->mp_reach)
477 {
478 // ASSERT(a->u.ptr->length == sizeof(ip_addr));
479
480 /* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */
481 ip_addr *addr = (void *) a->u.ptr->data;
482 if ((a->u.ptr->length != sizeof(ip_addr)) || !ipa_is_ip4(*addr))
483 return 0;
484
485 if (size < (3+4))
486 return -1;
487
488 bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
489 put_ip4(buf+3, ipa_to_ip4(*addr));
490
491 return 3+4;
492 }
493 else
494 {
495 s->mp_next_hop = a;
496 return 0;
497 }
498 }
499
500 static void
501 bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
502 {
503 if (len != 4)
504 WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
505
506 /* Semantic checks are done later */
507 s->ip_next_hop_len = len;
508 s->ip_next_hop_data = data;
509 }
510
511 /* TODO: This function should use AF-specific hook */
512 static void
513 bgp_format_next_hop(eattr *a, byte *buf, uint size UNUSED)
514 {
515 ip_addr *nh = (void *) a->u.ptr->data;
516 uint len = a->u.ptr->length;
517
518 ASSERT((len == 16) || (len == 32));
519
520 /* in IPv6, we may have two addresses in NEXT HOP */
521 if ((len == 16) || ipa_zero(nh[1]))
522 bsprintf(buf, "%I", nh[0]);
523 else
524 bsprintf(buf, "%I %I", nh[0], nh[1]);
525 }
526
527
528 static void
529 bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
530 {
531 if (len != 4)
532 WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
533
534 u32 val = get_u32(data);
535 bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
536 }
537
538
539 static void
540 bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
541 {
542 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
543 UNSET(a);
544 }
545
546 static void
547 bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
548 {
549 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
550 DISCARD(BAD_EBGP, "LOCAL_PREF");
551
552 if (len != 4)
553 WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
554
555 u32 val = get_u32(data);
556 bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
557 }
558
559
560 static void
561 bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
562 {
563 if (len != 0)
564 DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
565
566 bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
567 }
568
569 static int
570 bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
571 {
572 const byte *data = a->u.ptr->data;
573 uint len = a->u.ptr->length;
574
575 if (!s->as4_session)
576 {
577 /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
578 byte *dst = alloca(6);
579 len = aggregator_32to16(dst, data);
580 }
581
582 return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
583 }
584
585 static void
586 bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
587 {
588 if (len != (s->as4_session ? 8 : 6))
589 DISCARD(BAD_LENGTH, "AGGREGATOR", len);
590
591 if (!s->as4_session)
592 {
593 /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
594 byte *src = data;
595 data = alloca(8);
596 len = aggregator_16to32(data, src);
597 }
598
599 bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
600 }
601
602 static void
603 bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED)
604 {
605 const byte *data = a->u.ptr->data;
606
607 bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
608 }
609
610
611 static void
612 bgp_export_community(struct bgp_export_state *s, eattr *a)
613 {
614 if (a->u.ptr->length == 0)
615 UNSET(a);
616
617 a->u.ptr = int_set_sort(s->pool, a->u.ptr);
618 }
619
620 static void
621 bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
622 {
623 if (!len || (len % 4))
624 WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
625
626 struct adata *ad = lp_alloc_adata(s->pool, len);
627 get_u32s(data, (u32 *) ad->data, len / 4);
628 bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
629 }
630
631
632 static void
633 bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
634 {
635 if (!s->proto->is_internal)
636 UNSET(a);
637 }
638
639 static void
640 bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
641 {
642 if (!s->proto->is_internal)
643 DISCARD(BAD_EBGP, "ORIGINATOR_ID");
644
645 if (len != 4)
646 WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
647
648 u32 val = get_u32(data);
649 bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
650 }
651
652
653 static void
654 bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
655 {
656 if (!s->proto->is_internal)
657 UNSET(a);
658
659 if (a->u.ptr->length == 0)
660 UNSET(a);
661 }
662
663 static void
664 bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
665 {
666 if (!s->proto->is_internal)
667 DISCARD(BAD_EBGP, "CLUSTER_LIST");
668
669 if (!len || (len % 4))
670 WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
671
672 struct adata *ad = lp_alloc_adata(s->pool, len);
673 get_u32s(data, (u32 *) ad->data, len / 4);
674 bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
675 }
676
677 static void
678 bgp_format_cluster_list(eattr *a, byte *buf, uint size)
679 {
680 /* Truncates cluster lists larger than buflen, probably not a problem */
681 int_set_format(a->u.ptr, 0, -1, buf, size);
682 }
683
684
685 static inline u32
686 get_af3(byte *buf)
687 {
688 return (get_u16(buf) << 16) | buf[2];
689 }
690
691 static void
692 bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
693 {
694 /*
695 * 2 B MP_REACH_NLRI data - Address Family Identifier
696 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
697 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
698 * var MP_REACH_NLRI data - Network Address of Next Hop
699 * 1 B MP_REACH_NLRI data - Reserved (zero)
700 * var MP_REACH_NLRI data - Network Layer Reachability Information
701 */
702
703 if ((len < 5) || (len < (5 + (uint) data[3])))
704 bgp_parse_error(s, 9);
705
706 s->mp_reach_af = get_af3(data);
707 s->mp_next_hop_len = data[3];
708 s->mp_next_hop_data = data + 4;
709 s->mp_reach_len = len - 5 - s->mp_next_hop_len;
710 s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
711 }
712
713
714 static void
715 bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
716 {
717 /*
718 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
719 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
720 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
721 */
722
723 if (len < 3)
724 bgp_parse_error(s, 9);
725
726 s->mp_unreach_af = get_af3(data);
727 s->mp_unreach_len = len - 3;
728 s->mp_unreach_nlri = data + 3;
729 }
730
731
732 static void
733 bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
734 {
735 if (!s->proto->is_interior)
736 {
737 struct adata *ad = ec_set_del_nontrans(s->pool, a->u.ptr);
738
739 if (ad->length == 0)
740 UNSET(a);
741
742 ec_set_sort_x(ad);
743 a->u.ptr = ad;
744 }
745 else
746 {
747 if (a->u.ptr->length == 0)
748 UNSET(a);
749
750 a->u.ptr = ec_set_sort(s->pool, a->u.ptr);
751 }
752 }
753
754 static void
755 bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
756 {
757 if (!len || (len % 8))
758 WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
759
760 struct adata *ad = lp_alloc_adata(s->pool, len);
761 get_u32s(data, (u32 *) ad->data, len / 4);
762 bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
763 }
764
765
766 static void
767 bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
768 {
769 if (s->as4_session)
770 DISCARD(NEW_BGP, "AS4_AGGREGATOR");
771
772 if (len != 8)
773 DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
774
775 bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
776 }
777
778 static void
779 bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
780 {
781 struct bgp_proto *p = s->proto;
782 int sets = p->cf->allow_as_sets;
783
784 char err[128];
785
786 if (s->as4_session)
787 DISCARD(NEW_BGP, "AS4_PATH");
788
789 if (len < 6)
790 DISCARD(BAD_LENGTH, "AS4_PATH", len);
791
792 if (!as_path_valid(data, len, 4, sets, 1, err, sizeof(err)))
793 DISCARD("Malformed AS4_PATH attribute - %s", err);
794
795 struct adata *a = lp_alloc_adata(s->pool, len);
796 memcpy(a->data, data, len);
797
798 /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
799 if (as_path_contains_confed(a))
800 {
801 REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
802 a = as_path_strip_confed(s->pool, a);
803 }
804
805 bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
806 }
807
808
809 static void
810 bgp_export_aigp(struct bgp_export_state *s, eattr *a)
811 {
812 if (!s->channel->cf->aigp)
813 UNSET(a);
814 }
815
816 static void
817 bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
818 {
819 char err[128];
820
821 /* Acceptability test postponed to bgp_finish_attrs() */
822
823 if ((flags ^ bgp_attr_table[BA_AIGP].flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
824 DISCARD("Malformed AIGP attribute - conflicting flags (%02x)", flags);
825
826 if (!bgp_aigp_valid(data, len, err, sizeof(err)))
827 DISCARD("Malformed AIGP attribute - %s", err);
828
829 bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len);
830 }
831
832 static void
833 bgp_format_aigp(eattr *a, byte *buf, uint size UNUSED)
834 {
835 const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
836
837 if (!b)
838 bsprintf(buf, "?");
839 else
840 bsprintf(buf, "%lu", get_u64(b + 3));
841 }
842
843
844 static void
845 bgp_export_large_community(struct bgp_export_state *s, eattr *a)
846 {
847 if (a->u.ptr->length == 0)
848 UNSET(a);
849
850 a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
851 }
852
853 static void
854 bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
855 {
856 if (!len || (len % 12))
857 WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
858
859 struct adata *ad = lp_alloc_adata(s->pool, len);
860 get_u32s(data, (u32 *) ad->data, len / 4);
861 bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
862 }
863
864 static void
865 bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
866 {
867 net_addr *n = s->route->net->n.addr;
868 u32 *labels = (u32 *) a->u.ptr->data;
869 uint lnum = a->u.ptr->length / 4;
870
871 /* Perhaps we should just ignore it? */
872 if (!s->mpls)
873 WITHDRAW("Unexpected MPLS stack");
874
875 /* Empty MPLS stack is not allowed */
876 if (!lnum)
877 WITHDRAW("Malformed MPLS stack - empty");
878
879 /* This is ugly, but we must ensure that labels fit into NLRI field */
880 if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
881 WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
882
883 for (uint i = 0; i < lnum; i++)
884 {
885 if (labels[i] > 0xfffff)
886 WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
887
888 /* TODO: Check for special-purpose label values? */
889 }
890 }
891
892 static int
893 bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
894 {
895 /*
896 * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
897 * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
898 */
899
900 s->mpls_labels = a->u.ptr;
901 return 0;
902 }
903
904 static void
905 bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
906 {
907 DISCARD("Discarding received attribute #0");
908 }
909
910 static void
911 bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size)
912 {
913 u32 *labels = (u32 *) a->u.ptr->data;
914 uint lnum = a->u.ptr->length / 4;
915 char *pos = buf;
916
917 for (uint i = 0; i < lnum; i++)
918 {
919 if (size < 20)
920 {
921 bsprintf(pos, "...");
922 return;
923 }
924
925 uint l = bsprintf(pos, "%d/", labels[i]);
926 ADVANCE(pos, size, l);
927 }
928
929 /* Clear last slash or terminate empty string */
930 pos[lnum ? -1 : 0] = 0;
931 }
932
933 static inline void
934 bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
935 {
936 /* Cannot use bgp_set_attr_data() as it works on known attributes only */
937 ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
938 }
939
940
941 /*
942 * Attribute table
943 */
944
945 static const struct bgp_attr_desc bgp_attr_table[] = {
946 [BA_ORIGIN] = {
947 .name = "origin",
948 .type = EAF_TYPE_INT,
949 .flags = BAF_TRANSITIVE,
950 .export = bgp_export_origin,
951 .encode = bgp_encode_u8,
952 .decode = bgp_decode_origin,
953 .format = bgp_format_origin,
954 },
955 [BA_AS_PATH] = {
956 .name = "as_path",
957 .type = EAF_TYPE_AS_PATH,
958 .flags = BAF_TRANSITIVE,
959 .encode = bgp_encode_as_path,
960 .decode = bgp_decode_as_path,
961 },
962 [BA_NEXT_HOP] = {
963 .name = "next_hop",
964 .type = EAF_TYPE_IP_ADDRESS,
965 .flags = BAF_TRANSITIVE,
966 .encode = bgp_encode_next_hop,
967 .decode = bgp_decode_next_hop,
968 .format = bgp_format_next_hop,
969 },
970 [BA_MULTI_EXIT_DISC] = {
971 .name = "med",
972 .type = EAF_TYPE_INT,
973 .flags = BAF_OPTIONAL,
974 .encode = bgp_encode_u32,
975 .decode = bgp_decode_med,
976 },
977 [BA_LOCAL_PREF] = {
978 .name = "local_pref",
979 .type = EAF_TYPE_INT,
980 .flags = BAF_TRANSITIVE,
981 .export = bgp_export_local_pref,
982 .encode = bgp_encode_u32,
983 .decode = bgp_decode_local_pref,
984 },
985 [BA_ATOMIC_AGGR] = {
986 .name = "atomic_aggr",
987 .type = EAF_TYPE_OPAQUE,
988 .flags = BAF_TRANSITIVE,
989 .encode = bgp_encode_raw,
990 .decode = bgp_decode_atomic_aggr,
991 },
992 [BA_AGGREGATOR] = {
993 .name = "aggregator",
994 .type = EAF_TYPE_OPAQUE,
995 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
996 .encode = bgp_encode_aggregator,
997 .decode = bgp_decode_aggregator,
998 .format = bgp_format_aggregator,
999 },
1000 [BA_COMMUNITY] = {
1001 .name = "community",
1002 .type = EAF_TYPE_INT_SET,
1003 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1004 .export = bgp_export_community,
1005 .encode = bgp_encode_u32s,
1006 .decode = bgp_decode_community,
1007 },
1008 [BA_ORIGINATOR_ID] = {
1009 .name = "originator_id",
1010 .type = EAF_TYPE_ROUTER_ID,
1011 .flags = BAF_OPTIONAL,
1012 .export = bgp_export_originator_id,
1013 .encode = bgp_encode_u32,
1014 .decode = bgp_decode_originator_id,
1015 },
1016 [BA_CLUSTER_LIST] = {
1017 .name = "cluster_list",
1018 .type = EAF_TYPE_INT_SET,
1019 .flags = BAF_OPTIONAL,
1020 .export = bgp_export_cluster_list,
1021 .encode = bgp_encode_u32s,
1022 .decode = bgp_decode_cluster_list,
1023 .format = bgp_format_cluster_list,
1024 },
1025 [BA_MP_REACH_NLRI] = {
1026 .name = "mp_reach_nlri",
1027 .type = EAF_TYPE_OPAQUE,
1028 .flags = BAF_OPTIONAL,
1029 .decode = bgp_decode_mp_reach_nlri,
1030 },
1031 [BA_MP_UNREACH_NLRI] = {
1032 .name = "mp_unreach_nlri",
1033 .type = EAF_TYPE_OPAQUE,
1034 .flags = BAF_OPTIONAL,
1035 .decode = bgp_decode_mp_unreach_nlri,
1036 },
1037 [BA_EXT_COMMUNITY] = {
1038 .name = "ext_community",
1039 .type = EAF_TYPE_EC_SET,
1040 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1041 .export = bgp_export_ext_community,
1042 .encode = bgp_encode_u32s,
1043 .decode = bgp_decode_ext_community,
1044 },
1045 [BA_AS4_PATH] = {
1046 .name = "as4_path",
1047 .type = EAF_TYPE_AS_PATH,
1048 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1049 .encode = bgp_encode_raw,
1050 .decode = bgp_decode_as4_path,
1051 },
1052 [BA_AS4_AGGREGATOR] = {
1053 .name = "as4_aggregator",
1054 .type = EAF_TYPE_OPAQUE,
1055 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1056 .encode = bgp_encode_raw,
1057 .decode = bgp_decode_as4_aggregator,
1058 .format = bgp_format_aggregator,
1059 },
1060 [BA_AIGP] = {
1061 .name = "aigp",
1062 .type = EAF_TYPE_OPAQUE,
1063 .flags = BAF_OPTIONAL | BAF_DECODE_FLAGS,
1064 .export = bgp_export_aigp,
1065 .encode = bgp_encode_raw,
1066 .decode = bgp_decode_aigp,
1067 .format = bgp_format_aigp,
1068 },
1069 [BA_LARGE_COMMUNITY] = {
1070 .name = "large_community",
1071 .type = EAF_TYPE_LC_SET,
1072 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1073 .export = bgp_export_large_community,
1074 .encode = bgp_encode_u32s,
1075 .decode = bgp_decode_large_community,
1076 },
1077 [BA_MPLS_LABEL_STACK] = {
1078 .name = "mpls_label_stack",
1079 .type = EAF_TYPE_INT_SET,
1080 .export = bgp_export_mpls_label_stack,
1081 .encode = bgp_encode_mpls_label_stack,
1082 .decode = bgp_decode_mpls_label_stack,
1083 .format = bgp_format_mpls_label_stack,
1084 },
1085 };
1086
1087 static inline int
1088 bgp_attr_known(uint code)
1089 {
1090 return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
1091 }
1092
1093
1094 /*
1095 * Attribute export
1096 */
1097
1098 static inline void
1099 bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
1100 {
1101 if (EA_PROTO(a->id) != PROTOCOL_BGP)
1102 return;
1103
1104 uint code = EA_ID(a->id);
1105
1106 if (bgp_attr_known(code))
1107 {
1108 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1109
1110 /* The flags might have been zero if the attr was added by filters */
1111 a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
1112
1113 /* Set partial bit if new opt-trans attribute is attached to non-local route */
1114 if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
1115 (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
1116 a->flags |= BAF_PARTIAL;
1117
1118 /* Call specific hook */
1119 CALL(desc->export, s, a);
1120
1121 /* Attribute might become undefined in hook */
1122 if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
1123 return;
1124 }
1125 else
1126 {
1127 /* Don't re-export unknown non-transitive attributes */
1128 if (!(a->flags & BAF_TRANSITIVE))
1129 return;
1130
1131 a->flags |= BAF_PARTIAL;
1132 }
1133
1134 /* Append updated attribute */
1135 to->attrs[to->count++] = *a;
1136 }
1137
1138 /**
1139 * bgp_export_attrs - export BGP attributes
1140 * @s: BGP export state
1141 * @attrs: a list of extended attributes
1142 *
1143 * The bgp_export_attrs() function takes a list of attributes and merges it to
1144 * one newly allocated and sorted segment. Attributes are validated and
1145 * normalized by type-specific export hooks and attribute flags are updated.
1146 * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
1147 * empty community sets).
1148 *
1149 * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
1150 */
1151 static inline ea_list *
1152 bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
1153 {
1154 /* Merge the attribute list */
1155 ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
1156 ea_merge(attrs, new);
1157 ea_sort(new);
1158
1159 uint i, count;
1160 count = new->count;
1161 new->count = 0;
1162
1163 /* Export each attribute */
1164 for (i = 0; i < count; i++)
1165 bgp_export_attr(s, &new->attrs[i], new);
1166
1167 if (s->err_withdraw)
1168 return NULL;
1169
1170 return new;
1171 }
1172
1173
1174 /*
1175 * Attribute encoding
1176 */
1177
1178 static inline int
1179 bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
1180 {
1181 ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP);
1182
1183 uint code = EA_ID(a->id);
1184
1185 if (bgp_attr_known(code))
1186 return bgp_attr_table[code].encode(s, a, buf, size);
1187 else
1188 return bgp_encode_raw(s, a, buf, size);
1189 }
1190
1191 /**
1192 * bgp_encode_attrs - encode BGP attributes
1193 * @s: BGP write state
1194 * @attrs: a list of extended attributes
1195 * @buf: buffer
1196 * @end: buffer end
1197 *
1198 * The bgp_encode_attrs() function takes a list of extended attributes
1199 * and converts it to its BGP representation (a part of an Update message).
1200 * BGP write state may be fake when called from MRT protocol.
1201 *
1202 * Result: Length of the attribute block generated or -1 if not enough space.
1203 */
1204 int
1205 bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
1206 {
1207 byte *pos = buf;
1208 int i, len;
1209
1210 for (i = 0; i < attrs->count; i++)
1211 {
1212 len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
1213
1214 if (len < 0)
1215 return -1;
1216
1217 pos += len;
1218 }
1219
1220 return pos - buf;
1221 }
1222
1223
1224 /*
1225 * Attribute decoding
1226 */
1227
1228 static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
1229
1230 static inline int
1231 bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
1232 {
1233 eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
1234 int num = p->cf->allow_local_as + 1;
1235 return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
1236 }
1237
1238 static inline int
1239 bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
1240 {
1241 eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
1242 return (e && (e->u.data == p->local_id));
1243 }
1244
1245 static inline int
1246 bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
1247 {
1248 eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
1249 return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
1250 }
1251
1252 static inline void
1253 bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
1254 {
1255 /* Handle duplicate attributes; RFC 7606 3 (g) */
1256 if (BIT32_TEST(s->attrs_seen, code))
1257 {
1258 if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
1259 bgp_parse_error(s, 1);
1260 else
1261 DISCARD("Discarding duplicate attribute (code %u)", code);
1262 }
1263 BIT32_SET(s->attrs_seen, code);
1264
1265 if (bgp_attr_known(code))
1266 {
1267 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1268
1269 /* Handle conflicting flags; RFC 7606 3 (c) */
1270 if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
1271 !(desc->flags & BAF_DECODE_FLAGS))
1272 WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
1273
1274 desc->decode(s, code, flags, data, len, to);
1275 }
1276 else /* Unknown attribute */
1277 {
1278 if (!(flags & BAF_OPTIONAL))
1279 WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
1280
1281 bgp_decode_unknown(s, code, flags, data, len, to);
1282 }
1283 }
1284
1285 /**
1286 * bgp_decode_attrs - check and decode BGP attributes
1287 * @s: BGP parse state
1288 * @data: start of attribute block
1289 * @len: length of attribute block
1290 *
1291 * This function takes a BGP attribute block (a part of an Update message), checks
1292 * its consistency and converts it to a list of BIRD route attributes represented
1293 * by an (uncached) &rta.
1294 */
1295 ea_list *
1296 bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
1297 {
1298 struct bgp_proto *p = s->proto;
1299 ea_list *attrs = NULL;
1300 uint code, flags, alen;
1301 byte *pos = data;
1302
1303 /* Parse the attributes */
1304 while (len)
1305 {
1306 alen = 0;
1307
1308 /* Read attribute type */
1309 if (len < 2)
1310 goto framing_error;
1311 flags = pos[0];
1312 code = pos[1];
1313 ADVANCE(pos, len, 2);
1314
1315 /* Read attribute length */
1316 if (flags & BAF_EXT_LEN)
1317 {
1318 if (len < 2)
1319 goto framing_error;
1320 alen = get_u16(pos);
1321 ADVANCE(pos, len, 2);
1322 }
1323 else
1324 {
1325 if (len < 1)
1326 goto framing_error;
1327 alen = *pos;
1328 ADVANCE(pos, len, 1);
1329 }
1330
1331 if (alen > len)
1332 goto framing_error;
1333
1334 DBG("Attr %02x %02x %u\n", code, flags, alen);
1335
1336 bgp_decode_attr(s, code, flags, pos, alen, &attrs);
1337 ADVANCE(pos, len, alen);
1338 }
1339
1340 if (s->err_withdraw)
1341 goto withdraw;
1342
1343 /* If there is no reachability NLRI, we are finished */
1344 if (!s->ip_reach_len && !s->mp_reach_len)
1345 return NULL;
1346
1347
1348 /* Handle missing mandatory attributes; RFC 7606 3 (d) */
1349 if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
1350 { REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
1351
1352 if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
1353 { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
1354
1355 if (s->ip_reach_len && !BIT32_TEST(s->attrs_seen, BA_NEXT_HOP))
1356 { REPORT(NO_MANDATORY, "NEXT_HOP"); goto withdraw; }
1357
1358 /* When receiving attributes from non-AS4-aware BGP speaker, we have to
1359 reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
1360 if (!p->as4_session)
1361 bgp_process_as4_attrs(&attrs, s->pool);
1362
1363 /* Reject routes with our ASN in AS_PATH attribute */
1364 if (bgp_as_path_loopy(p, attrs, p->local_as))
1365 goto withdraw;
1366
1367 /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
1368 if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
1369 goto withdraw;
1370
1371 /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
1372 if (p->is_internal && bgp_originator_id_loopy(p, attrs))
1373 goto withdraw;
1374
1375 /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
1376 if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
1377 goto withdraw;
1378
1379 /* If there is no local preference, define one */
1380 if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
1381 bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1382
1383 return attrs;
1384
1385
1386 framing_error:
1387 /* RFC 7606 4 - handle attribute framing errors */
1388 REPORT("Malformed attribute list - framing error (%u/%u) at %d",
1389 alen, len, (int) (pos - s->attrs));
1390
1391 withdraw:
1392 /* RFC 7606 5.2 - handle missing NLRI during errors */
1393 if (!s->ip_reach_len && !s->mp_reach_len)
1394 bgp_parse_error(s, 1);
1395
1396 s->err_withdraw = 1;
1397 return NULL;
1398 }
1399
1400 void
1401 bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
1402 {
1403 /* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
1404 if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp)
1405 {
1406 REPORT("Discarding AIGP attribute received on non-AIGP session");
1407 bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP);
1408 }
1409 }
1410
1411
1412 /*
1413 * Route bucket hash table
1414 */
1415
1416 #define RBH_KEY(b) b->eattrs, b->hash
1417 #define RBH_NEXT(b) b->next
1418 #define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
1419 #define RBH_FN(a,h) h
1420
1421 #define RBH_REHASH bgp_rbh_rehash
1422 #define RBH_PARAMS /8, *2, 2, 2, 8, 20
1423
1424
1425 HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
1426
1427 void
1428 bgp_init_bucket_table(struct bgp_channel *c)
1429 {
1430 HASH_INIT(c->bucket_hash, c->pool, 8);
1431
1432 init_list(&c->bucket_queue);
1433 c->withdraw_bucket = NULL;
1434 }
1435
1436 void
1437 bgp_free_bucket_table(struct bgp_channel *c)
1438 {
1439 HASH_FREE(c->bucket_hash);
1440
1441 struct bgp_bucket *b;
1442 WALK_LIST_FIRST(b, c->bucket_queue)
1443 {
1444 rem_node(&b->send_node);
1445 mb_free(b);
1446 }
1447
1448 mb_free(c->withdraw_bucket);
1449 c->withdraw_bucket = NULL;
1450 }
1451
1452 static struct bgp_bucket *
1453 bgp_get_bucket(struct bgp_channel *c, ea_list *new)
1454 {
1455 /* Hash and lookup */
1456 u32 hash = ea_hash(new);
1457 struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
1458
1459 if (b)
1460 return b;
1461
1462 uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
1463 uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
1464 uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
1465 uint i;
1466 byte *dest;
1467
1468 /* Gather total size of non-inline attributes */
1469 for (i = 0; i < new->count; i++)
1470 {
1471 eattr *a = &new->attrs[i];
1472
1473 if (!(a->type & EAF_EMBEDDED))
1474 size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
1475 }
1476
1477 /* Create the bucket */
1478 b = mb_alloc(c->pool, size);
1479 init_list(&b->prefixes);
1480 b->hash = hash;
1481
1482 /* Copy list of extended attributes */
1483 memcpy(b->eattrs, new, ea_size);
1484 dest = ((byte *) b->eattrs) + ea_size_aligned;
1485
1486 /* Copy values of non-inline attributes */
1487 for (i = 0; i < new->count; i++)
1488 {
1489 eattr *a = &b->eattrs->attrs[i];
1490
1491 if (!(a->type & EAF_EMBEDDED))
1492 {
1493 const struct adata *oa = a->u.ptr;
1494 struct adata *na = (struct adata *) dest;
1495 memcpy(na, oa, sizeof(struct adata) + oa->length);
1496 a->u.ptr = na;
1497 dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
1498 }
1499 }
1500
1501 /* Insert the bucket to send queue and bucket hash */
1502 add_tail(&c->bucket_queue, &b->send_node);
1503 HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
1504
1505 return b;
1506 }
1507
1508 static struct bgp_bucket *
1509 bgp_get_withdraw_bucket(struct bgp_channel *c)
1510 {
1511 if (!c->withdraw_bucket)
1512 {
1513 c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
1514 init_list(&c->withdraw_bucket->prefixes);
1515 }
1516
1517 return c->withdraw_bucket;
1518 }
1519
1520 void
1521 bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1522 {
1523 rem_node(&b->send_node);
1524 HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
1525 mb_free(b);
1526 }
1527
1528 void
1529 bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1530 {
1531 rem_node(&b->send_node);
1532 add_tail(&c->bucket_queue, &b->send_node);
1533 }
1534
1535 void
1536 bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1537 {
1538 struct bgp_proto *p = (void *) c->c.proto;
1539 struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
1540
1541 log(L_ERR "%s: Attribute list too long", p->p.name);
1542 while (!EMPTY_LIST(b->prefixes))
1543 {
1544 struct bgp_prefix *px = HEAD(b->prefixes);
1545
1546 log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
1547 rem_node(&px->buck_node);
1548 add_tail(&wb->prefixes, &px->buck_node);
1549 }
1550 }
1551
1552
1553 /*
1554 * Prefix hash table
1555 */
1556
1557 #define PXH_KEY(px) px->net, px->path_id, px->hash
1558 #define PXH_NEXT(px) px->next
1559 #define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
1560 #define PXH_FN(n,i,h) h
1561
1562 #define PXH_REHASH bgp_pxh_rehash
1563 #define PXH_PARAMS /8, *2, 2, 2, 8, 24
1564
1565
1566 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
1567
1568 void
1569 bgp_init_prefix_table(struct bgp_channel *c)
1570 {
1571 HASH_INIT(c->prefix_hash, c->pool, 8);
1572
1573 uint alen = net_addr_length[c->c.net_type];
1574 c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
1575 }
1576
1577 void
1578 bgp_free_prefix_table(struct bgp_channel *c)
1579 {
1580 HASH_FREE(c->prefix_hash);
1581
1582 rfree(c->prefix_slab);
1583 c->prefix_slab = NULL;
1584 }
1585
1586 static struct bgp_prefix *
1587 bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
1588 {
1589 u32 hash = net_hash(net) ^ u32_hash(path_id);
1590 struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
1591
1592 if (px)
1593 {
1594 rem_node(&px->buck_node);
1595 return px;
1596 }
1597
1598 if (c->prefix_slab)
1599 px = sl_alloc(c->prefix_slab);
1600 else
1601 px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
1602
1603 px->buck_node.next = NULL;
1604 px->buck_node.prev = NULL;
1605 px->hash = hash;
1606 px->path_id = path_id;
1607 net_copy(px->net, net);
1608
1609 HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
1610
1611 return px;
1612 }
1613
1614 void
1615 bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
1616 {
1617 rem_node(&px->buck_node);
1618 HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
1619
1620 if (c->prefix_slab)
1621 sl_free(c->prefix_slab, px);
1622 else
1623 mb_free(px);
1624 }
1625
1626
1627 /*
1628 * BGP protocol glue
1629 */
1630
1631 int
1632 bgp_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED)
1633 {
1634 rte *e = *new;
1635 struct proto *SRC = e->attrs->src->proto;
1636 struct bgp_proto *p = (struct bgp_proto *) P;
1637 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
1638
1639 /* Reject our routes */
1640 if (src == p)
1641 return -1;
1642
1643 /* Accept non-BGP routes */
1644 if (src == NULL)
1645 return 0;
1646
1647 /* IBGP route reflection, RFC 4456 */
1648 if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
1649 {
1650 /* Rejected unless configured as route reflector */
1651 if (!p->rr_client && !src->rr_client)
1652 return -1;
1653
1654 /* Generally, this should be handled when path is received, but we check it
1655 also here as rr_cluster_id may be undefined or different in src. */
1656 if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
1657 return -1;
1658 }
1659
1660 /* Handle well-known communities, RFC 1997 */
1661 struct eattr *c;
1662 if (p->cf->interpret_communities &&
1663 (c = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY))))
1664 {
1665 const struct adata *d = c->u.ptr;
1666
1667 /* Do not export anywhere */
1668 if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
1669 return -1;
1670
1671 /* Do not export outside of AS (or member-AS) */
1672 if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
1673 return -1;
1674
1675 /* Do not export outside of AS (or confederation) */
1676 if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
1677 return -1;
1678
1679 /* Do not export LLGR_STALE routes to LLGR-ignorant peers */
1680 if (!p->conn->remote_caps->llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
1681 return -1;
1682 }
1683
1684 return 0;
1685 }
1686
1687 static ea_list *
1688 bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
1689 {
1690 struct proto *SRC = e->attrs->src->proto;
1691 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
1692 struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
1693 ea_list *attrs = attrs0;
1694 eattr *a;
1695 const adata *ad;
1696
1697 /* ORIGIN attribute - mandatory, attach if missing */
1698 if (! bgp_find_attr(attrs0, BA_ORIGIN))
1699 bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
1700
1701 /* AS_PATH attribute - mandatory */
1702 a = bgp_find_attr(attrs0, BA_AS_PATH);
1703 ad = a ? a->u.ptr : &null_adata;
1704
1705 /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
1706 if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
1707 ad = as_path_strip_confed(pool, ad);
1708
1709 /* AS_PATH attribute - keep or prepend ASN */
1710 if (p->is_internal || p->rs_client)
1711 {
1712 /* IBGP or route server -> just ensure there is one */
1713 if (!a)
1714 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
1715 }
1716 else if (p->is_interior)
1717 {
1718 /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
1719 ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
1720 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1721 }
1722 else /* Regular EBGP (no RS, no confederation) */
1723 {
1724 /* Regular EBGP -> prepend ASN as regular sequence */
1725 ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
1726 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1727
1728 /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
1729 a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
1730 if (a && !(a->type & EAF_FRESH))
1731 bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
1732 }
1733
1734 /* NEXT_HOP attribute - delegated to AF-specific hook */
1735 a = bgp_find_attr(attrs0, BA_NEXT_HOP);
1736 bgp_update_next_hop(&s, a, &attrs);
1737
1738 /* LOCAL_PREF attribute - required for IBGP, attach if missing */
1739 if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
1740 bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1741
1742 /* AIGP attribute - accumulate local metric or originate new one */
1743 u64 metric;
1744 if (s.local_next_hop &&
1745 (bgp_total_aigp_metric_(e, &metric, &ad) ||
1746 (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad))))
1747 {
1748 ad = bgp_aigp_set_metric(pool, ad, metric);
1749 bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad);
1750 }
1751
1752 /* IBGP route reflection, RFC 4456 */
1753 if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
1754 {
1755 /* ORIGINATOR_ID attribute - attach if not already set */
1756 if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
1757 bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
1758
1759 /* CLUSTER_LIST attribute - prepend cluster ID */
1760 a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
1761 ad = a ? a->u.ptr : NULL;
1762
1763 /* Prepend src cluster ID */
1764 if (src->rr_cluster_id)
1765 ad = int_set_prepend(pool, ad, src->rr_cluster_id);
1766
1767 /* Prepend dst cluster ID if src and dst clusters are different */
1768 if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
1769 ad = int_set_prepend(pool, ad, p->rr_cluster_id);
1770
1771 /* Should be at least one prepended cluster ID */
1772 bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
1773 }
1774
1775 /* AS4_* transition attributes, RFC 6793 4.2.2 */
1776 if (! p->as4_session)
1777 {
1778 a = bgp_find_attr(attrs, BA_AS_PATH);
1779 if (a && as_path_contains_as4(a->u.ptr))
1780 {
1781 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
1782 bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
1783 }
1784
1785 a = bgp_find_attr(attrs, BA_AGGREGATOR);
1786 if (a && aggregator_contains_as4(a->u.ptr))
1787 {
1788 bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
1789 bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
1790 }
1791 }
1792
1793 /*
1794 * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
1795 * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
1796 * should be checked in AF-specific hooks.
1797 */
1798
1799 /* Apply per-attribute export hooks for validatation and normalization */
1800 return bgp_export_attrs(&s, attrs);
1801 }
1802
1803 void
1804 bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old)
1805 {
1806 struct bgp_proto *p = (void *) P;
1807 struct bgp_channel *c = (void *) C;
1808 struct bgp_bucket *buck;
1809 struct bgp_prefix *px;
1810 u32 path;
1811
1812 if (new)
1813 {
1814 struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2);
1815
1816 /* If attributes are invalid, we fail back to withdraw */
1817 buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
1818 path = new->attrs->src->global_id;
1819
1820 lp_flush(bgp_linpool2);
1821 }
1822 else
1823 {
1824 buck = bgp_get_withdraw_bucket(c);
1825 path = old->attrs->src->global_id;
1826 }
1827
1828 px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
1829 add_tail(&buck->prefixes, &px->buck_node);
1830
1831 bgp_schedule_packet(p->conn, c, PKT_UPDATE);
1832 }
1833
1834
1835 static inline u32
1836 bgp_get_neighbor(rte *r)
1837 {
1838 eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1839 u32 as;
1840
1841 if (e && as_path_get_first_regular(e->u.ptr, &as))
1842 return as;
1843
1844 /* If AS_PATH is not defined, we treat rte as locally originated */
1845 struct bgp_proto *p = (void *) r->attrs->src->proto;
1846 return p->cf->confederation ?: p->local_as;
1847 }
1848
1849 static inline int
1850 rte_stale(rte *r)
1851 {
1852 if (r->u.bgp.stale < 0)
1853 {
1854 /* If staleness is unknown, compute and cache it */
1855 eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
1856 r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
1857 }
1858
1859 return r->u.bgp.stale;
1860 }
1861
1862 int
1863 bgp_rte_better(rte *new, rte *old)
1864 {
1865 struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
1866 struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
1867 eattr *x, *y;
1868 u32 n, o;
1869
1870 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1871 n = new->u.bgp.suppressed;
1872 o = old->u.bgp.suppressed;
1873 if (n > o)
1874 return 0;
1875 if (n < o)
1876 return 1;
1877
1878 /* RFC 4271 9.1.2.1. Route resolvability test */
1879 n = rte_resolvable(new);
1880 o = rte_resolvable(old);
1881 if (n > o)
1882 return 1;
1883 if (n < o)
1884 return 0;
1885
1886 /* LLGR draft - depreference stale routes */
1887 n = rte_stale(new);
1888 o = rte_stale(old);
1889 if (n > o)
1890 return 0;
1891 if (n < o)
1892 return 1;
1893
1894 /* Start with local preferences */
1895 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1896 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1897 n = x ? x->u.data : new_bgp->cf->default_local_pref;
1898 o = y ? y->u.data : old_bgp->cf->default_local_pref;
1899 if (n > o)
1900 return 1;
1901 if (n < o)
1902 return 0;
1903
1904 /* RFC 7311 4.1 - Apply AIGP metric */
1905 u64 n2 = bgp_total_aigp_metric(new);
1906 u64 o2 = bgp_total_aigp_metric(old);
1907 if (n2 < o2)
1908 return 1;
1909 if (n2 > o2)
1910 return 0;
1911
1912 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1913 if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
1914 {
1915 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1916 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1917 n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1918 o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
1919 if (n < o)
1920 return 1;
1921 if (n > o)
1922 return 0;
1923 }
1924
1925 /* RFC 4271 9.1.2.2. b) Use origins */
1926 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1927 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1928 n = x ? x->u.data : ORIGIN_INCOMPLETE;
1929 o = y ? y->u.data : ORIGIN_INCOMPLETE;
1930 if (n < o)
1931 return 1;
1932 if (n > o)
1933 return 0;
1934
1935 /* RFC 4271 9.1.2.2. c) Compare MED's */
1936 /* Proper RFC 4271 path selection cannot be interpreted as finding
1937 * the best path in some ordering. It is implemented partially in
1938 * bgp_rte_recalculate() when deterministic_med option is
1939 * active. Without that option, the behavior is just an
1940 * approximation, which in specific situations may lead to
1941 * persistent routing loops, because it is nondeterministic - it
1942 * depends on the order in which routes appeared. But it is also the
1943 * same behavior as used by default in Cisco routers, so it is
1944 * probably not a big issue.
1945 */
1946 if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
1947 (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
1948 {
1949 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1950 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1951 n = x ? x->u.data : new_bgp->cf->default_med;
1952 o = y ? y->u.data : old_bgp->cf->default_med;
1953 if (n < o)
1954 return 1;
1955 if (n > o)
1956 return 0;
1957 }
1958
1959 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1960 if (new_bgp->is_interior > old_bgp->is_interior)
1961 return 0;
1962 if (new_bgp->is_interior < old_bgp->is_interior)
1963 return 1;
1964
1965 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1966 n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
1967 o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
1968 if (n < o)
1969 return 1;
1970 if (n > o)
1971 return 0;
1972
1973 /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
1974 /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
1975 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1976 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1977 n = x ? x->u.data : new_bgp->remote_id;
1978 o = y ? y->u.data : old_bgp->remote_id;
1979
1980 /* RFC 5004 - prefer older routes */
1981 /* (if both are external and from different peer) */
1982 if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
1983 !new_bgp->is_internal && n != o)
1984 return 0;
1985
1986 /* rest of RFC 4271 9.1.2.2. f) */
1987 if (n < o)
1988 return 1;
1989 if (n > o)
1990 return 0;
1991
1992 /* RFC 4456 9. b) Compare cluster list lengths */
1993 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1994 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1995 n = x ? int_set_get_size(x->u.ptr) : 0;
1996 o = y ? int_set_get_size(y->u.ptr) : 0;
1997 if (n < o)
1998 return 1;
1999 if (n > o)
2000 return 0;
2001
2002 /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
2003 return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0;
2004 }
2005
2006
2007 int
2008 bgp_rte_mergable(rte *pri, rte *sec)
2009 {
2010 struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
2011 struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
2012 eattr *x, *y;
2013 u32 p, s;
2014
2015 /* Skip suppressed routes (see bgp_rte_recalculate()) */
2016 if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
2017 return 0;
2018
2019 /* RFC 4271 9.1.2.1. Route resolvability test */
2020 if (rte_resolvable(pri) != rte_resolvable(sec))
2021 return 0;
2022
2023 /* LLGR draft - depreference stale routes */
2024 if (rte_stale(pri) != rte_stale(sec))
2025 return 0;
2026
2027 /* Start with local preferences */
2028 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
2029 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
2030 p = x ? x->u.data : pri_bgp->cf->default_local_pref;
2031 s = y ? y->u.data : sec_bgp->cf->default_local_pref;
2032 if (p != s)
2033 return 0;
2034
2035 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
2036 if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
2037 {
2038 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2039 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2040 p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
2041 s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
2042
2043 if (p != s)
2044 return 0;
2045
2046 // if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
2047 // return 0;
2048 }
2049
2050 /* RFC 4271 9.1.2.2. b) Use origins */
2051 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2052 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2053 p = x ? x->u.data : ORIGIN_INCOMPLETE;
2054 s = y ? y->u.data : ORIGIN_INCOMPLETE;
2055 if (p != s)
2056 return 0;
2057
2058 /* RFC 4271 9.1.2.2. c) Compare MED's */
2059 if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
2060 (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
2061 {
2062 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
2063 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
2064 p = x ? x->u.data : pri_bgp->cf->default_med;
2065 s = y ? y->u.data : sec_bgp->cf->default_med;
2066 if (p != s)
2067 return 0;
2068 }
2069
2070 /* RFC 4271 9.1.2.2. d) Prefer external peers */
2071 if (pri_bgp->is_interior != sec_bgp->is_interior)
2072 return 0;
2073
2074 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
2075 p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
2076 s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
2077 if (p != s)
2078 return 0;
2079
2080 /* Remaining criteria are ignored */
2081
2082 return 1;
2083 }
2084
2085
2086 static inline int
2087 same_group(rte *r, u32 lpref, u32 lasn)
2088 {
2089 return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
2090 }
2091
2092 static inline int
2093 use_deterministic_med(rte *r)
2094 {
2095 struct proto *P = r->attrs->src->proto;
2096 return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
2097 }
2098
2099 int
2100 bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
2101 {
2102 rte *r, *s;
2103 rte *key = new ? new : old;
2104 u32 lpref = key->pref;
2105 u32 lasn = bgp_get_neighbor(key);
2106 int old_suppressed = old ? old->u.bgp.suppressed : 0;
2107
2108 /*
2109 * Proper RFC 4271 path selection is a bit complicated, it cannot be
2110 * implemented just by rte_better(), because it is not a linear
2111 * ordering. But it can be splitted to two levels, where the lower
2112 * level chooses the best routes in each group of routes from the
2113 * same neighboring AS and higher level chooses the best route (with
2114 * a slightly different ordering) between the best-in-group routes.
2115 *
2116 * When deterministic_med is disabled, we just ignore this issue and
2117 * choose the best route by bgp_rte_better() alone. If enabled, the
2118 * lower level of the route selection is done here (for the group
2119 * to which the changed route belongs), all routes in group are
2120 * marked as suppressed, just chosen best-in-group is not.
2121 *
2122 * Global best route selection then implements higher level by
2123 * choosing between non-suppressed routes (as they are always
2124 * preferred over suppressed routes). Routes from BGP protocols
2125 * that do not set deterministic_med are just never suppressed. As
2126 * they do not participate in the lower level selection, it is OK
2127 * that this fn is not called for them.
2128 *
2129 * The idea is simple, the implementation is more problematic,
2130 * mostly because of optimizations in rte_recalculate() that
2131 * avoids full recalculation in most cases.
2132 *
2133 * We can assume that at least one of new, old is non-NULL and both
2134 * are from the same protocol with enabled deterministic_med. We
2135 * group routes by both neighbor AS (lasn) and preference (lpref),
2136 * because bgp_rte_better() does not handle preference itself.
2137 */
2138
2139 /* If new and old are from different groups, we just process that
2140 as two independent events */
2141 if (new && old && !same_group(old, lpref, lasn))
2142 {
2143 int i1, i2;
2144 i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
2145 i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
2146 return i1 || i2;
2147 }
2148
2149 /*
2150 * We could find the best-in-group and then make some shortcuts like
2151 * in rte_recalculate, but as we would have to walk through all
2152 * net->routes just to find it, it is probably not worth. So we
2153 * just have one simple fast case that use just the old route.
2154 * We also set suppressed flag to avoid using it in bgp_rte_better().
2155 */
2156
2157 if (new)
2158 new->u.bgp.suppressed = 1;
2159
2160 if (old)
2161 {
2162 old->u.bgp.suppressed = 1;
2163
2164 /* The fast case - replace not best with worse (or remove not best) */
2165 if (old_suppressed && !(new && bgp_rte_better(new, old)))
2166 return 0;
2167 }
2168
2169 /* The default case - find a new best-in-group route */
2170 r = new; /* new may not be in the list */
2171 for (s=net->routes; rte_is_valid(s); s=s->next)
2172 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
2173 {
2174 s->u.bgp.suppressed = 1;
2175 if (!r || bgp_rte_better(s, r))
2176 r = s;
2177 }
2178
2179 /* Simple case - the last route in group disappears */
2180 if (!r)
2181 return 0;
2182
2183 /* Found if new is mergable with best-in-group */
2184 if (new && (new != r) && bgp_rte_mergable(r, new))
2185 new->u.bgp.suppressed = 0;
2186
2187 /* Found all existing routes mergable with best-in-group */
2188 for (s=net->routes; rte_is_valid(s); s=s->next)
2189 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
2190 if ((s != r) && bgp_rte_mergable(r, s))
2191 s->u.bgp.suppressed = 0;
2192
2193 /* Found best-in-group */
2194 r->u.bgp.suppressed = 0;
2195
2196 /*
2197 * There are generally two reasons why we have to force
2198 * recalculation (return 1): First, the new route may be wrongfully
2199 * chosen to be the best in the first case check in
2200 * rte_recalculate(), this may happen only if old_best is from the
2201 * same group. Second, another (different than new route)
2202 * best-in-group is chosen and that may be the proper best (although
2203 * rte_recalculate() without ignore that possibility).
2204 *
2205 * There are three possible cases according to whether the old route
2206 * was the best in group (OBG, i.e. !old_suppressed) and whether the
2207 * new route is the best in group (NBG, tested by r == new). These
2208 * cases work even if old or new is NULL.
2209 *
2210 * NBG -> new is a possible candidate for the best route, so we just
2211 * check for the first reason using same_group().
2212 *
2213 * !NBG && OBG -> Second reason applies, return 1
2214 *
2215 * !NBG && !OBG -> Best in group does not change, old != old_best,
2216 * rte_better(new, old_best) is false and therefore
2217 * the first reason does not apply, return 0
2218 */
2219
2220 if (r == new)
2221 return old_best && same_group(old_best, lpref, lasn);
2222 else
2223 return !old_suppressed;
2224 }
2225
2226 struct rte *
2227 bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
2228 {
2229 eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
2230 const struct adata *ad = a ? a->u.ptr : NULL;
2231 uint flags = a ? a->flags : BAF_PARTIAL;
2232
2233 if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
2234 return NULL;
2235
2236 if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
2237 return r;
2238
2239 r = rte_cow_rta(r, pool);
2240 bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags,
2241 int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
2242 r->u.bgp.stale = 1;
2243
2244 return r;
2245 }
2246
2247
2248 /*
2249 * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
2250 */
2251 static void
2252 bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
2253 {
2254 eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
2255 eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
2256 eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
2257 eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
2258
2259 /* First, unset AS4_* attributes */
2260 if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
2261 if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
2262
2263 /* Handle AGGREGATOR attribute */
2264 if (a2 && a4)
2265 {
2266 u32 a2_asn = get_u32(a2->u.ptr->data);
2267
2268 /* If routes were aggregated by an old router, then AS4_PATH and
2269 AS4_AGGREGATOR are invalid. In that case we give up. */
2270 if (a2_asn != AS_TRANS)
2271 return;
2272
2273 /* Use AS4_AGGREGATOR instead of AGGREGATOR */
2274 a2->u.ptr = a4->u.ptr;
2275 }
2276
2277 /* Handle AS_PATH attribute */
2278 if (p2 && p4)
2279 {
2280 /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
2281 int p2_len = as_path_getlen(p2->u.ptr);
2282 int p4_len = as_path_getlen(p4->u.ptr);
2283
2284 /* AS_PATH is too short, give up */
2285 if (p2_len < p4_len)
2286 return;
2287
2288 /* Merge AS_PATH and AS4_PATH */
2289 struct adata *apc = as_path_cut(pool, p2->u.ptr, p2_len - p4_len);
2290 p2->u.ptr = as_path_merge(pool, apc, p4->u.ptr);
2291 }
2292 }
2293
2294 int
2295 bgp_get_attr(eattr *a, byte *buf, int buflen)
2296 {
2297 uint i = EA_ID(a->id);
2298 const struct bgp_attr_desc *d;
2299 int len;
2300
2301 if (bgp_attr_known(i))
2302 {
2303 d = &bgp_attr_table[i];
2304 len = bsprintf(buf, "%s", d->name);
2305 buf += len;
2306 if (d->format)
2307 {
2308 *buf++ = ':';
2309 *buf++ = ' ';
2310 d->format(a, buf, buflen - len - 2);
2311 return GA_FULL;
2312 }
2313 return GA_NAME;
2314 }
2315
2316 bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
2317 return GA_NAME;
2318 }
2319
2320 void
2321 bgp_get_route_info(rte *e, byte *buf)
2322 {
2323 eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2324 eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2325 u32 origas;
2326
2327 buf += bsprintf(buf, " (%d", e->pref);
2328
2329 if (e->u.bgp.suppressed)
2330 buf += bsprintf(buf, "-");
2331
2332 if (rte_stale(e))
2333 buf += bsprintf(buf, "s");
2334
2335 u64 metric = bgp_total_aigp_metric(e);
2336 if (metric < BGP_AIGP_MAX)
2337 {
2338 buf += bsprintf(buf, "/%lu", metric);
2339 }
2340 else if (e->attrs->igp_metric)
2341 {
2342 if (!rte_resolvable(e))
2343 buf += bsprintf(buf, "/-");
2344 else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
2345 buf += bsprintf(buf, "/?");
2346 else
2347 buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
2348 }
2349 buf += bsprintf(buf, ") [");
2350
2351 if (p && as_path_get_last(p->u.ptr, &origas))
2352 buf += bsprintf(buf, "AS%u", origas);
2353 if (o)
2354 buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
2355 strcpy(buf, "]");
2356 }