]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/attrs.c
BGP: Do not prepend ASN in export from non-RS EBGP to RS EBGP
[thirdparty/bird.git] / proto / bgp / attrs.c
1 /*
2 * BIRD -- BGP Attributes
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
11 #undef LOCAL_DEBUG
12
13 #include <stdlib.h>
14
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "conf/conf.h"
21 #include "lib/resource.h"
22 #include "lib/string.h"
23 #include "lib/unaligned.h"
24
25 #include "bgp.h"
26
27 /*
28 * UPDATE message error handling
29 *
30 * All checks from RFC 4271 6.3 are done as specified with these exceptions:
31 * - The semantic check of an IP address from NEXT_HOP attribute is missing.
32 * - Checks of some optional attribute values are missing.
33 * - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
34 * are probably inadequate.
35 *
36 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
37 * 4271 does not explicitly specifiy the behavior in that case.
38 *
39 * Loop detection related to route reflection (based on ORIGINATOR_ID
40 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
41 * specifies that such updates should be ignored, but that is generally
42 * a bad idea.
43 *
44 * BGP attribute table has several hooks:
45 *
46 * export - Hook that validates and normalizes attribute during export phase.
47 * Receives eattr, may modify it (e.g., sort community lists for canonical
48 * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
49 * necessary. May assume that eattr has value valid w.r.t. its type, but may be
50 * invalid w.r.t. BGP constraints. Optional.
51 *
52 * encode - Hook that converts internal representation to external one during
53 * packet writing. Receives eattr and puts it in the buffer (including attribute
54 * header). Returns number of bytes, or -1 if not enough space. May assume that
55 * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
56 * for all known attributes that exist internally after export phase (i.e., all
57 * except pseudoattributes MP_(UN)REACH_NLRI).
58 *
59 * decode - Hook that converts external representation to internal one during
60 * packet parsing. Receives attribute data in buffer, validates it and adds
61 * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
62 * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
63 *
64 * format - Optional hook that converts eattr to textual representation.
65 */
66
67
68 struct bgp_attr_desc {
69 const char *name;
70 uint type;
71 uint flags;
72 void (*export)(struct bgp_export_state *s, eattr *a);
73 int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
74 void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
75 void (*format)(eattr *ea, byte *buf, uint size);
76 };
77
78 static const struct bgp_attr_desc bgp_attr_table[];
79
80 static inline int bgp_attr_known(uint code);
81
82 eattr *
83 bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
84 {
85 ASSERT(bgp_attr_known(code));
86
87 return ea_set_attr(
88 attrs,
89 pool,
90 EA_CODE(PROTOCOL_BGP, code),
91 flags,
92 bgp_attr_table[code].type,
93 val
94 );
95 }
96
97
98
99 #define REPORT(msg, args...) \
100 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
101
102 #define DISCARD(msg, args...) \
103 ({ REPORT(msg, ## args); return; })
104
105 #define WITHDRAW(msg, args...) \
106 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
107
108 #define UNSET(a) \
109 ({ a->type = EAF_TYPE_UNDEF; return; })
110
111 #define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
112 #define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
113 #define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
114 #define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
115 #define NO_MANDATORY "Missing mandatory %s attribute"
116
117
118 static inline int
119 bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
120 {
121 *buf++ = flags;
122 *buf++ = code;
123 *buf++ = len;
124 return 3;
125 }
126
127 static inline int
128 bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
129 {
130 *buf++ = flags | BAF_EXT_LEN;
131 *buf++ = code;
132 put_u16(buf, len);
133 return 4;
134 }
135
136 static inline int
137 bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
138 {
139 if (len < 256)
140 return bgp_put_attr_hdr3(buf, code, flags, len);
141 else
142 return bgp_put_attr_hdr4(buf, code, flags, len);
143 }
144
145 static int
146 bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
147 {
148 if (size < (3+1))
149 return -1;
150
151 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
152 buf[3] = a->u.data;
153
154 return 3+1;
155 }
156
157 static int
158 bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
159 {
160 if (size < (3+4))
161 return -1;
162
163 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
164 put_u32(buf+3, a->u.data);
165
166 return 3+4;
167 }
168
169 static int
170 bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
171 {
172 uint len = a->u.ptr->length;
173
174 if (size < (4+len))
175 return -1;
176
177 uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
178 put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
179
180 return hdr + len;
181 }
182
183 static int
184 bgp_put_attr(byte *buf, uint size, uint code, uint flags, byte *data, uint len)
185 {
186 if (size < (4+len))
187 return -1;
188
189 uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
190 memcpy(buf + hdr, data, len);
191
192 return hdr + len;
193 }
194
195 static int
196 bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
197 {
198 return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
199 }
200
201
202 /*
203 * Attribute hooks
204 */
205
206 static void
207 bgp_export_origin(struct bgp_export_state *s, eattr *a)
208 {
209 if (a->u.data > 2)
210 WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
211 }
212
213 static void
214 bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
215 {
216 if (len != 1)
217 WITHDRAW(BAD_LENGTH, "ORIGIN", len);
218
219 if (data[0] > 2)
220 WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
221
222 bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
223 }
224
225 static void
226 bgp_format_origin(eattr *a, byte *buf, uint size UNUSED)
227 {
228 static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
229
230 bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
231 }
232
233
234 static int
235 bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
236 {
237 byte *data = a->u.ptr->data;
238 uint len = a->u.ptr->length;
239
240 if (!s->as4_session)
241 {
242 /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
243 byte *src = data;
244 data = alloca(len);
245 len = as_path_32to16(data, src, len);
246 }
247
248 return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
249 }
250
251 static void
252 bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
253 {
254 struct bgp_proto *p = s->proto;
255 int as_length = s->as4_session ? 4 : 2;
256 int as_confed = p->cf->confederation && p->is_interior;
257 char err[128];
258
259 if (!as_path_valid(data, len, as_length, as_confed, err, sizeof(err)))
260 WITHDRAW("Malformed AS_PATH attribute - %s", err);
261
262 /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
263 if (p->is_interior && !p->is_internal &&
264 ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
265 WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
266
267 if (!s->as4_session)
268 {
269 /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
270 byte *src = data;
271 data = alloca(2*len);
272 len = as_path_16to32(data, src, len);
273 }
274
275 bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
276 }
277
278
279 static int
280 bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
281 {
282 /*
283 * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
284 * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
285 * store it and encode it later by AFI-specific hooks.
286 */
287
288 if (!s->mp_reach)
289 {
290 // ASSERT(a->u.ptr->length == sizeof(ip_addr));
291
292 /* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */
293 ip_addr *addr = (void *) a->u.ptr->data;
294 if ((a->u.ptr->length != sizeof(ip_addr)) || !ipa_is_ip4(*addr))
295 return 0;
296
297 if (size < (3+4))
298 return -1;
299
300 bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
301 put_ip4(buf+3, ipa_to_ip4(*addr));
302
303 return 3+4;
304 }
305 else
306 {
307 s->mp_next_hop = a;
308 return 0;
309 }
310 }
311
312 static void
313 bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
314 {
315 if (len != 4)
316 WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
317
318 /* Semantic checks are done later */
319 s->ip_next_hop_len = len;
320 s->ip_next_hop_data = data;
321 }
322
323 /* TODO: This function should use AF-specific hook */
324 static void
325 bgp_format_next_hop(eattr *a, byte *buf, uint size UNUSED)
326 {
327 ip_addr *nh = (void *) a->u.ptr->data;
328 uint len = a->u.ptr->length;
329
330 ASSERT((len == 16) || (len == 32));
331
332 /* in IPv6, we may have two addresses in NEXT HOP */
333 if ((len == 16) || ipa_zero(nh[1]))
334 bsprintf(buf, "%I", nh[0]);
335 else
336 bsprintf(buf, "%I %I", nh[0], nh[1]);
337 }
338
339
340 static void
341 bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
342 {
343 if (len != 4)
344 WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
345
346 u32 val = get_u32(data);
347 bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
348 }
349
350
351 static void
352 bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
353 {
354 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
355 UNSET(a);
356 }
357
358 static void
359 bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
360 {
361 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
362 DISCARD(BAD_EBGP, "LOCAL_PREF");
363
364 if (len != 4)
365 WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
366
367 u32 val = get_u32(data);
368 bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
369 }
370
371
372 static void
373 bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
374 {
375 if (len != 0)
376 DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
377
378 bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
379 }
380
381 static int
382 bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
383 {
384 byte *data = a->u.ptr->data;
385 uint len = a->u.ptr->length;
386
387 if (!s->as4_session)
388 {
389 /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
390 byte *src = data;
391 data = alloca(6);
392 len = aggregator_32to16(data, src);
393 }
394
395 return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
396 }
397
398 static void
399 bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
400 {
401 if (len != (s->as4_session ? 8 : 6))
402 DISCARD(BAD_LENGTH, "AGGREGATOR", len);
403
404 if (!s->as4_session)
405 {
406 /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
407 byte *src = data;
408 data = alloca(8);
409 len = aggregator_16to32(data, src);
410 }
411
412 bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
413 }
414
415 static void
416 bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED)
417 {
418 byte *data = a->u.ptr->data;
419
420 bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
421 }
422
423
424 static void
425 bgp_export_community(struct bgp_export_state *s, eattr *a)
426 {
427 if (a->u.ptr->length == 0)
428 UNSET(a);
429
430 a->u.ptr = int_set_sort(s->pool, a->u.ptr);
431 }
432
433 static void
434 bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
435 {
436 if (!len || (len % 4))
437 WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
438
439 struct adata *ad = lp_alloc_adata(s->pool, len);
440 get_u32s(data, (u32 *) ad->data, len / 4);
441 bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
442 }
443
444
445 static void
446 bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
447 {
448 if (!s->proto->is_internal)
449 UNSET(a);
450 }
451
452 static void
453 bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
454 {
455 if (!s->proto->is_internal)
456 DISCARD(BAD_EBGP, "ORIGINATOR_ID");
457
458 if (len != 4)
459 WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
460
461 u32 val = get_u32(data);
462 bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
463 }
464
465
466 static void
467 bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
468 {
469 if (!s->proto->is_internal)
470 UNSET(a);
471
472 if (a->u.ptr->length == 0)
473 UNSET(a);
474 }
475
476 static void
477 bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
478 {
479 if (!s->proto->is_internal)
480 DISCARD(BAD_EBGP, "CLUSTER_LIST");
481
482 if (!len || (len % 4))
483 WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
484
485 struct adata *ad = lp_alloc_adata(s->pool, len);
486 get_u32s(data, (u32 *) ad->data, len / 4);
487 bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
488 }
489
490 static void
491 bgp_format_cluster_list(eattr *a, byte *buf, uint size)
492 {
493 /* Truncates cluster lists larger than buflen, probably not a problem */
494 int_set_format(a->u.ptr, 0, -1, buf, size);
495 }
496
497
498 static inline u32
499 get_af3(byte *buf)
500 {
501 return (get_u16(buf) << 16) | buf[2];
502 }
503
504 static void
505 bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
506 {
507 /*
508 * 2 B MP_REACH_NLRI data - Address Family Identifier
509 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
510 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
511 * var MP_REACH_NLRI data - Network Address of Next Hop
512 * 1 B MP_REACH_NLRI data - Reserved (zero)
513 * var MP_REACH_NLRI data - Network Layer Reachability Information
514 */
515
516 if ((len < 5) || (len < (5 + (uint) data[3])))
517 bgp_parse_error(s, 9);
518
519 s->mp_reach_af = get_af3(data);
520 s->mp_next_hop_len = data[3];
521 s->mp_next_hop_data = data + 4;
522 s->mp_reach_len = len - 5 - s->mp_next_hop_len;
523 s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
524 }
525
526
527 static void
528 bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
529 {
530 /*
531 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
532 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
533 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
534 */
535
536 if (len < 3)
537 bgp_parse_error(s, 9);
538
539 s->mp_unreach_af = get_af3(data);
540 s->mp_unreach_len = len - 3;
541 s->mp_unreach_nlri = data + 3;
542 }
543
544
545 static void
546 bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
547 {
548 a->u.ptr = ec_set_del_nontrans(s->pool, a->u.ptr);
549
550 if (a->u.ptr->length == 0)
551 UNSET(a);
552
553 ec_set_sort_x(a->u.ptr);
554 }
555
556 static void
557 bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
558 {
559 if (!len || (len % 8))
560 WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
561
562 struct adata *ad = lp_alloc_adata(s->pool, len);
563 get_u32s(data, (u32 *) ad->data, len / 4);
564 bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
565 }
566
567
568 static void
569 bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
570 {
571 if (s->as4_session)
572 DISCARD(NEW_BGP, "AS4_AGGREGATOR");
573
574 if (len != 8)
575 DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
576
577 bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
578 }
579
580 static void
581 bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
582 {
583 char err[128];
584
585 if (s->as4_session)
586 DISCARD(NEW_BGP, "AS4_PATH");
587
588 if (len < 6)
589 DISCARD(BAD_LENGTH, "AS4_PATH", len);
590
591 if (!as_path_valid(data, len, 4, 1, err, sizeof(err)))
592 DISCARD("Malformed AS4_PATH attribute - %s", err);
593
594 struct adata *a = lp_alloc_adata(s->pool, len);
595 memcpy(a->data, data, len);
596
597 /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
598 if (as_path_contains_confed(a))
599 {
600 REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
601 a = as_path_strip_confed(s->pool, a);
602 }
603
604 bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
605 }
606
607 static void
608 bgp_export_large_community(struct bgp_export_state *s, eattr *a)
609 {
610 if (a->u.ptr->length == 0)
611 UNSET(a);
612
613 a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
614 }
615
616 static void
617 bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
618 {
619 if (!len || (len % 12))
620 WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
621
622 struct adata *ad = lp_alloc_adata(s->pool, len);
623 get_u32s(data, (u32 *) ad->data, len / 4);
624 bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
625 }
626
627 static void
628 bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
629 {
630 net_addr *n = s->route->net->n.addr;
631 u32 *labels = (u32 *) a->u.ptr->data;
632 uint lnum = a->u.ptr->length / 4;
633
634 /* Perhaps we should just ignore it? */
635 if (!s->mpls)
636 WITHDRAW("Unexpected MPLS stack");
637
638 /* Empty MPLS stack is not allowed */
639 if (!lnum)
640 WITHDRAW("Malformed MPLS stack - empty");
641
642 /* This is ugly, but we must ensure that labels fit into NLRI field */
643 if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
644 WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
645
646 for (uint i = 0; i < lnum; i++)
647 {
648 if (labels[i] > 0xfffff)
649 WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
650
651 /* TODO: Check for special-purpose label values? */
652 }
653 }
654
655 static int
656 bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
657 {
658 /*
659 * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
660 * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
661 */
662
663 s->mpls_labels = a->u.ptr;
664 return 0;
665 }
666
667 static void
668 bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
669 {
670 DISCARD("Discarding received attribute #0");
671 }
672
673 static void
674 bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size)
675 {
676 u32 *labels = (u32 *) a->u.ptr->data;
677 uint lnum = a->u.ptr->length / 4;
678 char *pos = buf;
679
680 for (uint i = 0; i < lnum; i++)
681 {
682 if (size < 20)
683 {
684 bsprintf(pos, "...");
685 return;
686 }
687
688 uint l = bsprintf(pos, "%d/", labels[i]);
689 ADVANCE(pos, size, l);
690 }
691
692 /* Clear last slash or terminate empty string */
693 pos[lnum ? -1 : 0] = 0;
694 }
695
696 static inline void
697 bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
698 {
699 /* Cannot use bgp_set_attr_data() as it works on known attributes only */
700 ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
701 }
702
703
704 /*
705 * Attribute table
706 */
707
708 static const struct bgp_attr_desc bgp_attr_table[] = {
709 [BA_ORIGIN] = {
710 .name = "origin",
711 .type = EAF_TYPE_INT,
712 .flags = BAF_TRANSITIVE,
713 .export = bgp_export_origin,
714 .encode = bgp_encode_u8,
715 .decode = bgp_decode_origin,
716 .format = bgp_format_origin,
717 },
718 [BA_AS_PATH] = {
719 .name = "as_path",
720 .type = EAF_TYPE_AS_PATH,
721 .flags = BAF_TRANSITIVE,
722 .encode = bgp_encode_as_path,
723 .decode = bgp_decode_as_path,
724 },
725 [BA_NEXT_HOP] = {
726 .name = "next_hop",
727 .type = EAF_TYPE_IP_ADDRESS,
728 .flags = BAF_TRANSITIVE,
729 .encode = bgp_encode_next_hop,
730 .decode = bgp_decode_next_hop,
731 .format = bgp_format_next_hop,
732 },
733 [BA_MULTI_EXIT_DISC] = {
734 .name = "med",
735 .type = EAF_TYPE_INT,
736 .flags = BAF_OPTIONAL,
737 .encode = bgp_encode_u32,
738 .decode = bgp_decode_med,
739 },
740 [BA_LOCAL_PREF] = {
741 .name = "local_pref",
742 .type = EAF_TYPE_INT,
743 .flags = BAF_TRANSITIVE,
744 .export = bgp_export_local_pref,
745 .encode = bgp_encode_u32,
746 .decode = bgp_decode_local_pref,
747 },
748 [BA_ATOMIC_AGGR] = {
749 .name = "atomic_aggr",
750 .type = EAF_TYPE_OPAQUE,
751 .flags = BAF_TRANSITIVE,
752 .encode = bgp_encode_raw,
753 .decode = bgp_decode_atomic_aggr,
754 },
755 [BA_AGGREGATOR] = {
756 .name = "aggregator",
757 .type = EAF_TYPE_OPAQUE,
758 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
759 .encode = bgp_encode_aggregator,
760 .decode = bgp_decode_aggregator,
761 .format = bgp_format_aggregator,
762 },
763 [BA_COMMUNITY] = {
764 .name = "community",
765 .type = EAF_TYPE_INT_SET,
766 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
767 .export = bgp_export_community,
768 .encode = bgp_encode_u32s,
769 .decode = bgp_decode_community,
770 },
771 [BA_ORIGINATOR_ID] = {
772 .name = "originator_id",
773 .type = EAF_TYPE_ROUTER_ID,
774 .flags = BAF_OPTIONAL,
775 .export = bgp_export_originator_id,
776 .encode = bgp_encode_u32,
777 .decode = bgp_decode_originator_id,
778 },
779 [BA_CLUSTER_LIST] = {
780 .name = "cluster_list",
781 .type = EAF_TYPE_INT_SET,
782 .flags = BAF_OPTIONAL,
783 .export = bgp_export_cluster_list,
784 .encode = bgp_encode_u32s,
785 .decode = bgp_decode_cluster_list,
786 .format = bgp_format_cluster_list,
787 },
788 [BA_MP_REACH_NLRI] = {
789 .name = "mp_reach_nlri",
790 .type = EAF_TYPE_OPAQUE,
791 .flags = BAF_OPTIONAL,
792 .decode = bgp_decode_mp_reach_nlri,
793 },
794 [BA_MP_UNREACH_NLRI] = {
795 .name = "mp_unreach_nlri",
796 .type = EAF_TYPE_OPAQUE,
797 .flags = BAF_OPTIONAL,
798 .decode = bgp_decode_mp_unreach_nlri,
799 },
800 [BA_EXT_COMMUNITY] = {
801 .name = "ext_community",
802 .type = EAF_TYPE_EC_SET,
803 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
804 .export = bgp_export_ext_community,
805 .encode = bgp_encode_u32s,
806 .decode = bgp_decode_ext_community,
807 },
808 [BA_AS4_PATH] = {
809 .name = "as4_path",
810 .type = EAF_TYPE_AS_PATH,
811 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
812 .encode = bgp_encode_raw,
813 .decode = bgp_decode_as4_path,
814 },
815 [BA_AS4_AGGREGATOR] = {
816 .name = "as4_aggregator",
817 .type = EAF_TYPE_OPAQUE,
818 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
819 .encode = bgp_encode_raw,
820 .decode = bgp_decode_as4_aggregator,
821 .format = bgp_format_aggregator,
822 },
823 [BA_LARGE_COMMUNITY] = {
824 .name = "large_community",
825 .type = EAF_TYPE_LC_SET,
826 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
827 .export = bgp_export_large_community,
828 .encode = bgp_encode_u32s,
829 .decode = bgp_decode_large_community,
830 },
831 [BA_MPLS_LABEL_STACK] = {
832 .name = "mpls_label_stack",
833 .type = EAF_TYPE_INT_SET,
834 .export = bgp_export_mpls_label_stack,
835 .encode = bgp_encode_mpls_label_stack,
836 .decode = bgp_decode_mpls_label_stack,
837 .format = bgp_format_mpls_label_stack,
838 },
839 };
840
841 static inline int
842 bgp_attr_known(uint code)
843 {
844 return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
845 }
846
847
848 /*
849 * Attribute export
850 */
851
852 static inline void
853 bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
854 {
855 if (EA_PROTO(a->id) != PROTOCOL_BGP)
856 return;
857
858 uint code = EA_ID(a->id);
859
860 if (bgp_attr_known(code))
861 {
862 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
863
864 /* The flags might have been zero if the attr was added by filters */
865 a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
866
867 /* Set partial bit if new opt-trans attribute is attached to non-local route */
868 if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
869 (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
870 a->flags |= BAF_PARTIAL;
871
872 /* Call specific hook */
873 CALL(desc->export, s, a);
874
875 /* Attribute might become undefined in hook */
876 if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
877 return;
878 }
879 else
880 {
881 /* Don't re-export unknown non-transitive attributes */
882 if (!(a->flags & BAF_TRANSITIVE))
883 return;
884
885 a->flags |= BAF_PARTIAL;
886 }
887
888 /* Append updated attribute */
889 to->attrs[to->count++] = *a;
890 }
891
892 /**
893 * bgp_export_attrs - export BGP attributes
894 * @s: BGP export state
895 * @attrs: a list of extended attributes
896 *
897 * The bgp_export_attrs() function takes a list of attributes and merges it to
898 * one newly allocated and sorted segment. Attributes are validated and
899 * normalized by type-specific export hooks and attribute flags are updated.
900 * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
901 * empty community sets).
902 *
903 * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
904 */
905 static inline ea_list *
906 bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
907 {
908 /* Merge the attribute list */
909 ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
910 ea_merge(attrs, new);
911 ea_sort(new);
912
913 uint i, count;
914 count = new->count;
915 new->count = 0;
916
917 /* Export each attribute */
918 for (i = 0; i < count; i++)
919 bgp_export_attr(s, &new->attrs[i], new);
920
921 if (s->err_withdraw)
922 return NULL;
923
924 return new;
925 }
926
927
928 /*
929 * Attribute encoding
930 */
931
932 static inline int
933 bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
934 {
935 ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP);
936
937 uint code = EA_ID(a->id);
938
939 if (bgp_attr_known(code))
940 return bgp_attr_table[code].encode(s, a, buf, size);
941 else
942 return bgp_encode_raw(s, a, buf, size);
943 }
944
945 /**
946 * bgp_encode_attrs - encode BGP attributes
947 * @s: BGP write state
948 * @attrs: a list of extended attributes
949 * @buf: buffer
950 * @end: buffer end
951 *
952 * The bgp_encode_attrs() function takes a list of extended attributes
953 * and converts it to its BGP representation (a part of an Update message).
954 * BGP write state may be fake when called from MRT protocol.
955 *
956 * Result: Length of the attribute block generated or -1 if not enough space.
957 */
958 int
959 bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
960 {
961 byte *pos = buf;
962 int i, len;
963
964 for (i = 0; i < attrs->count; i++)
965 {
966 len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
967
968 if (len < 0)
969 return -1;
970
971 pos += len;
972 }
973
974 return pos - buf;
975 }
976
977
978 /*
979 * Attribute decoding
980 */
981
982 static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
983
984 static inline int
985 bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
986 {
987 eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
988 int num = p->cf->allow_local_as + 1;
989 return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
990 }
991
992 static inline int
993 bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
994 {
995 eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
996 return (e && (e->u.data == p->local_id));
997 }
998
999 static inline int
1000 bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
1001 {
1002 eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
1003 return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
1004 }
1005
1006 static inline void
1007 bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
1008 {
1009 /* Handle duplicate attributes; RFC 7606 3 (g) */
1010 if (BIT32_TEST(s->attrs_seen, code))
1011 {
1012 if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
1013 bgp_parse_error(s, 1);
1014 else
1015 DISCARD("Discarding duplicate attribute (code %u)", code);
1016 }
1017 BIT32_SET(s->attrs_seen, code);
1018
1019 if (bgp_attr_known(code))
1020 {
1021 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1022
1023 /* Handle conflicting flags; RFC 7606 3 (c) */
1024 if ((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
1025 WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
1026
1027 desc->decode(s, code, flags, data, len, to);
1028 }
1029 else /* Unknown attribute */
1030 {
1031 if (!(flags & BAF_OPTIONAL))
1032 WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
1033
1034 bgp_decode_unknown(s, code, flags, data, len, to);
1035 }
1036 }
1037
1038 /**
1039 * bgp_decode_attrs - check and decode BGP attributes
1040 * @s: BGP parse state
1041 * @data: start of attribute block
1042 * @len: length of attribute block
1043 *
1044 * This function takes a BGP attribute block (a part of an Update message), checks
1045 * its consistency and converts it to a list of BIRD route attributes represented
1046 * by an (uncached) &rta.
1047 */
1048 ea_list *
1049 bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
1050 {
1051 struct bgp_proto *p = s->proto;
1052 ea_list *attrs = NULL;
1053 uint code, flags, alen;
1054 byte *pos = data;
1055
1056 /* Parse the attributes */
1057 while (len)
1058 {
1059 alen = 0;
1060
1061 /* Read attribute type */
1062 if (len < 2)
1063 goto framing_error;
1064 flags = pos[0];
1065 code = pos[1];
1066 ADVANCE(pos, len, 2);
1067
1068 /* Read attribute length */
1069 if (flags & BAF_EXT_LEN)
1070 {
1071 if (len < 2)
1072 goto framing_error;
1073 alen = get_u16(pos);
1074 ADVANCE(pos, len, 2);
1075 }
1076 else
1077 {
1078 if (len < 1)
1079 goto framing_error;
1080 alen = *pos;
1081 ADVANCE(pos, len, 1);
1082 }
1083
1084 if (alen > len)
1085 goto framing_error;
1086
1087 DBG("Attr %02x %02x %u\n", code, flags, alen);
1088
1089 bgp_decode_attr(s, code, flags, pos, alen, &attrs);
1090 ADVANCE(pos, len, alen);
1091 }
1092
1093 if (s->err_withdraw)
1094 goto withdraw;
1095
1096 /* If there is no reachability NLRI, we are finished */
1097 if (!s->ip_reach_len && !s->mp_reach_len)
1098 return NULL;
1099
1100
1101 /* Handle missing mandatory attributes; RFC 7606 3 (d) */
1102 if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
1103 { REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
1104
1105 if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
1106 { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
1107
1108 if (s->ip_reach_len && !BIT32_TEST(s->attrs_seen, BA_NEXT_HOP))
1109 { REPORT(NO_MANDATORY, "NEXT_HOP"); goto withdraw; }
1110
1111 /* When receiving attributes from non-AS4-aware BGP speaker, we have to
1112 reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
1113 if (!p->as4_session)
1114 bgp_process_as4_attrs(&attrs, s->pool);
1115
1116 /* Reject routes with our ASN in AS_PATH attribute */
1117 if (bgp_as_path_loopy(p, attrs, p->local_as))
1118 goto withdraw;
1119
1120 /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
1121 if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
1122 goto withdraw;
1123
1124 /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
1125 if (p->is_internal && bgp_originator_id_loopy(p, attrs))
1126 goto withdraw;
1127
1128 /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
1129 if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
1130 goto withdraw;
1131
1132 /* If there is no local preference, define one */
1133 if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
1134 bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1135
1136 return attrs;
1137
1138
1139 framing_error:
1140 /* RFC 7606 4 - handle attribute framing errors */
1141 REPORT("Malformed attribute list - framing error (%u/%u) at %d",
1142 alen, len, (int) (pos - s->attrs));
1143
1144 withdraw:
1145 /* RFC 7606 5.2 - handle missing NLRI during errors */
1146 if (!s->ip_reach_len && !s->mp_reach_len)
1147 bgp_parse_error(s, 1);
1148
1149 s->err_withdraw = 1;
1150 return NULL;
1151 }
1152
1153
1154 /*
1155 * Route bucket hash table
1156 */
1157
1158 #define RBH_KEY(b) b->eattrs, b->hash
1159 #define RBH_NEXT(b) b->next
1160 #define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
1161 #define RBH_FN(a,h) h
1162
1163 #define RBH_REHASH bgp_rbh_rehash
1164 #define RBH_PARAMS /8, *2, 2, 2, 8, 20
1165
1166
1167 HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
1168
1169 void
1170 bgp_init_bucket_table(struct bgp_channel *c)
1171 {
1172 HASH_INIT(c->bucket_hash, c->pool, 8);
1173
1174 init_list(&c->bucket_queue);
1175 c->withdraw_bucket = NULL;
1176 }
1177
1178 void
1179 bgp_free_bucket_table(struct bgp_channel *c)
1180 {
1181 HASH_FREE(c->bucket_hash);
1182
1183 struct bgp_bucket *b;
1184 WALK_LIST_FIRST(b, c->bucket_queue)
1185 {
1186 rem_node(&b->send_node);
1187 mb_free(b);
1188 }
1189
1190 mb_free(c->withdraw_bucket);
1191 c->withdraw_bucket = NULL;
1192 }
1193
1194 static struct bgp_bucket *
1195 bgp_get_bucket(struct bgp_channel *c, ea_list *new)
1196 {
1197 /* Hash and lookup */
1198 u32 hash = ea_hash(new);
1199 struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
1200
1201 if (b)
1202 return b;
1203
1204 uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
1205 uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
1206 uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
1207 uint i;
1208 byte *dest;
1209
1210 /* Gather total size of non-inline attributes */
1211 for (i = 0; i < new->count; i++)
1212 {
1213 eattr *a = &new->attrs[i];
1214
1215 if (!(a->type & EAF_EMBEDDED))
1216 size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
1217 }
1218
1219 /* Create the bucket */
1220 b = mb_alloc(c->pool, size);
1221 init_list(&b->prefixes);
1222 b->hash = hash;
1223
1224 /* Copy list of extended attributes */
1225 memcpy(b->eattrs, new, ea_size);
1226 dest = ((byte *) b->eattrs) + ea_size_aligned;
1227
1228 /* Copy values of non-inline attributes */
1229 for (i = 0; i < new->count; i++)
1230 {
1231 eattr *a = &b->eattrs->attrs[i];
1232
1233 if (!(a->type & EAF_EMBEDDED))
1234 {
1235 struct adata *oa = a->u.ptr;
1236 struct adata *na = (struct adata *) dest;
1237 memcpy(na, oa, sizeof(struct adata) + oa->length);
1238 a->u.ptr = na;
1239 dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
1240 }
1241 }
1242
1243 /* Insert the bucket to send queue and bucket hash */
1244 add_tail(&c->bucket_queue, &b->send_node);
1245 HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
1246
1247 return b;
1248 }
1249
1250 static struct bgp_bucket *
1251 bgp_get_withdraw_bucket(struct bgp_channel *c)
1252 {
1253 if (!c->withdraw_bucket)
1254 {
1255 c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
1256 init_list(&c->withdraw_bucket->prefixes);
1257 }
1258
1259 return c->withdraw_bucket;
1260 }
1261
1262 void
1263 bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1264 {
1265 rem_node(&b->send_node);
1266 HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
1267 mb_free(b);
1268 }
1269
1270 void
1271 bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1272 {
1273 rem_node(&b->send_node);
1274 add_tail(&c->bucket_queue, &b->send_node);
1275 }
1276
1277 void
1278 bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1279 {
1280 struct bgp_proto *p = (void *) c->c.proto;
1281 struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
1282
1283 log(L_ERR "%s: Attribute list too long", p->p.name);
1284 while (!EMPTY_LIST(b->prefixes))
1285 {
1286 struct bgp_prefix *px = HEAD(b->prefixes);
1287
1288 log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
1289 rem_node(&px->buck_node);
1290 add_tail(&wb->prefixes, &px->buck_node);
1291 }
1292 }
1293
1294
1295 /*
1296 * Prefix hash table
1297 */
1298
1299 #define PXH_KEY(px) px->net, px->path_id, px->hash
1300 #define PXH_NEXT(px) px->next
1301 #define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
1302 #define PXH_FN(n,i,h) h
1303
1304 #define PXH_REHASH bgp_pxh_rehash
1305 #define PXH_PARAMS /8, *2, 2, 2, 8, 20
1306
1307
1308 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
1309
1310 void
1311 bgp_init_prefix_table(struct bgp_channel *c)
1312 {
1313 HASH_INIT(c->prefix_hash, c->pool, 8);
1314
1315 uint alen = net_addr_length[c->c.net_type];
1316 c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
1317 }
1318
1319 void
1320 bgp_free_prefix_table(struct bgp_channel *c)
1321 {
1322 HASH_FREE(c->prefix_hash);
1323
1324 rfree(c->prefix_slab);
1325 c->prefix_slab = NULL;
1326 }
1327
1328 static struct bgp_prefix *
1329 bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
1330 {
1331 u32 hash = net_hash(net) ^ u32_hash(path_id);
1332 struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
1333
1334 if (px)
1335 {
1336 rem_node(&px->buck_node);
1337 return px;
1338 }
1339
1340 if (c->prefix_slab)
1341 px = sl_alloc(c->prefix_slab);
1342 else
1343 px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
1344
1345 px->buck_node.next = NULL;
1346 px->buck_node.prev = NULL;
1347 px->hash = hash;
1348 px->path_id = path_id;
1349 net_copy(px->net, net);
1350
1351 HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
1352
1353 return px;
1354 }
1355
1356 void
1357 bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
1358 {
1359 rem_node(&px->buck_node);
1360 HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
1361
1362 if (c->prefix_slab)
1363 sl_free(c->prefix_slab, px);
1364 else
1365 mb_free(px);
1366 }
1367
1368
1369 /*
1370 * BGP protocol glue
1371 */
1372
1373 int
1374 bgp_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED)
1375 {
1376 rte *e = *new;
1377 struct proto *SRC = e->attrs->src->proto;
1378 struct bgp_proto *p = (struct bgp_proto *) P;
1379 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
1380
1381 /* Reject our routes */
1382 if (src == p)
1383 return -1;
1384
1385 /* Accept non-BGP routes */
1386 if (src == NULL)
1387 return 0;
1388
1389 // XXXX: Check next hop AF
1390
1391 /* IBGP route reflection, RFC 4456 */
1392 if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
1393 {
1394 /* Rejected unless configured as route reflector */
1395 if (!p->rr_client && !src->rr_client)
1396 return -1;
1397
1398 /* Generally, this should be handled when path is received, but we check it
1399 also here as rr_cluster_id may be undefined or different in src. */
1400 if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
1401 return -1;
1402 }
1403
1404 /* Handle well-known communities, RFC 1997 */
1405 struct eattr *c;
1406 if (p->cf->interpret_communities &&
1407 (c = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY))))
1408 {
1409 struct adata *d = c->u.ptr;
1410
1411 /* Do not export anywhere */
1412 if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
1413 return -1;
1414
1415 /* Do not export outside of AS (or member-AS) */
1416 if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
1417 return -1;
1418
1419 /* Do not export outside of AS (or confederation) */
1420 if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
1421 return -1;
1422
1423 /* Do not export LLGR_STALE routes to LLGR-ignorant peers */
1424 if (!p->conn->remote_caps->llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
1425 return -1;
1426 }
1427
1428 return 0;
1429 }
1430
1431
1432 static adata null_adata; /* adata of length 0 */
1433
1434 static ea_list *
1435 bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
1436 {
1437 struct proto *SRC = e->attrs->src->proto;
1438 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
1439 struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
1440 ea_list *attrs = attrs0;
1441 eattr *a;
1442 adata *ad;
1443
1444 /* ORIGIN attribute - mandatory, attach if missing */
1445 if (! bgp_find_attr(attrs0, BA_ORIGIN))
1446 bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
1447
1448 /* AS_PATH attribute - mandatory */
1449 a = bgp_find_attr(attrs0, BA_AS_PATH);
1450 ad = a ? a->u.ptr : &null_adata;
1451
1452 /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
1453 if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
1454 ad = as_path_strip_confed(pool, ad);
1455
1456 /* AS_PATH attribute - keep or prepend ASN */
1457 if (p->is_internal || p->rs_client)
1458 {
1459 /* IBGP or route server -> just ensure there is one */
1460 if (!a)
1461 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
1462 }
1463 else if (p->is_interior)
1464 {
1465 /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
1466 ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
1467 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1468 }
1469 else /* Regular EBGP (no RS, no confederation) */
1470 {
1471 /* Regular EBGP -> prepend ASN as regular sequence */
1472 ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
1473 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1474
1475 /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
1476 a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
1477 if (a && !(a->type & EAF_FRESH))
1478 bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
1479 }
1480
1481 /* NEXT_HOP attribute - delegated to AF-specific hook */
1482 a = bgp_find_attr(attrs0, BA_NEXT_HOP);
1483 bgp_update_next_hop(&s, a, &attrs);
1484
1485 /* LOCAL_PREF attribute - required for IBGP, attach if missing */
1486 if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
1487 bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1488
1489 /* IBGP route reflection, RFC 4456 */
1490 if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
1491 {
1492 /* ORIGINATOR_ID attribute - attach if not already set */
1493 if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
1494 bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
1495
1496 /* CLUSTER_LIST attribute - prepend cluster ID */
1497 a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
1498 ad = a ? a->u.ptr : NULL;
1499
1500 /* Prepend src cluster ID */
1501 if (src->rr_cluster_id)
1502 ad = int_set_prepend(pool, ad, src->rr_cluster_id);
1503
1504 /* Prepend dst cluster ID if src and dst clusters are different */
1505 if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
1506 ad = int_set_prepend(pool, ad, p->rr_cluster_id);
1507
1508 /* Should be at least one prepended cluster ID */
1509 bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
1510 }
1511
1512 /* AS4_* transition attributes, RFC 6793 4.2.2 */
1513 if (! p->as4_session)
1514 {
1515 a = bgp_find_attr(attrs, BA_AS_PATH);
1516 if (a && as_path_contains_as4(a->u.ptr))
1517 {
1518 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
1519 bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
1520 }
1521
1522 a = bgp_find_attr(attrs, BA_AGGREGATOR);
1523 if (a && aggregator_contains_as4(a->u.ptr))
1524 {
1525 bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
1526 bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
1527 }
1528 }
1529
1530 /*
1531 * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
1532 * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
1533 * should be checked in AF-specific hooks.
1534 */
1535
1536 /* Apply per-attribute export hooks for validatation and normalization */
1537 return bgp_export_attrs(&s, attrs);
1538 }
1539
1540 void
1541 bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old)
1542 {
1543 struct bgp_proto *p = (void *) P;
1544 struct bgp_channel *c = (void *) C;
1545 struct bgp_bucket *buck;
1546 struct bgp_prefix *px;
1547 u32 path;
1548
1549 if (new)
1550 {
1551 struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2);
1552
1553 /* If attributes are invalid, we fail back to withdraw */
1554 buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
1555 path = new->attrs->src->global_id;
1556
1557 lp_flush(bgp_linpool2);
1558 }
1559 else
1560 {
1561 buck = bgp_get_withdraw_bucket(c);
1562 path = old->attrs->src->global_id;
1563 }
1564
1565 px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
1566 add_tail(&buck->prefixes, &px->buck_node);
1567
1568 bgp_schedule_packet(p->conn, c, PKT_UPDATE);
1569 }
1570
1571
1572 static inline u32
1573 bgp_get_neighbor(rte *r)
1574 {
1575 eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1576 u32 as;
1577
1578 if (e && as_path_get_first_regular(e->u.ptr, &as))
1579 return as;
1580
1581 /* If AS_PATH is not defined, we treat rte as locally originated */
1582 struct bgp_proto *p = (void *) r->attrs->src->proto;
1583 return p->cf->confederation ?: p->local_as;
1584 }
1585
1586 static inline int
1587 rte_resolvable(rte *rt)
1588 {
1589 return rt->attrs->dest == RTD_UNICAST;
1590 }
1591
1592 static inline int
1593 rte_stale(rte *r)
1594 {
1595 if (r->u.bgp.stale < 0)
1596 {
1597 /* If staleness is unknown, compute and cache it */
1598 eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
1599 r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
1600 }
1601
1602 return r->u.bgp.stale;
1603 }
1604
1605 int
1606 bgp_rte_better(rte *new, rte *old)
1607 {
1608 struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
1609 struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
1610 eattr *x, *y;
1611 u32 n, o;
1612
1613 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1614 n = new->u.bgp.suppressed;
1615 o = old->u.bgp.suppressed;
1616 if (n > o)
1617 return 0;
1618 if (n < o)
1619 return 1;
1620
1621 /* RFC 4271 9.1.2.1. Route resolvability test */
1622 n = rte_resolvable(new);
1623 o = rte_resolvable(old);
1624 if (n > o)
1625 return 1;
1626 if (n < o)
1627 return 0;
1628
1629 /* LLGR draft - depreference stale routes */
1630 n = rte_stale(new);
1631 o = rte_stale(old);
1632 if (n > o)
1633 return 0;
1634 if (n < o)
1635 return 1;
1636
1637 /* Start with local preferences */
1638 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1639 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1640 n = x ? x->u.data : new_bgp->cf->default_local_pref;
1641 o = y ? y->u.data : old_bgp->cf->default_local_pref;
1642 if (n > o)
1643 return 1;
1644 if (n < o)
1645 return 0;
1646
1647 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1648 if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
1649 {
1650 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1651 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1652 n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1653 o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
1654 if (n < o)
1655 return 1;
1656 if (n > o)
1657 return 0;
1658 }
1659
1660 /* RFC 4271 9.1.2.2. b) Use origins */
1661 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1662 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1663 n = x ? x->u.data : ORIGIN_INCOMPLETE;
1664 o = y ? y->u.data : ORIGIN_INCOMPLETE;
1665 if (n < o)
1666 return 1;
1667 if (n > o)
1668 return 0;
1669
1670 /* RFC 4271 9.1.2.2. c) Compare MED's */
1671 /* Proper RFC 4271 path selection cannot be interpreted as finding
1672 * the best path in some ordering. It is implemented partially in
1673 * bgp_rte_recalculate() when deterministic_med option is
1674 * active. Without that option, the behavior is just an
1675 * approximation, which in specific situations may lead to
1676 * persistent routing loops, because it is nondeterministic - it
1677 * depends on the order in which routes appeared. But it is also the
1678 * same behavior as used by default in Cisco routers, so it is
1679 * probably not a big issue.
1680 */
1681 if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
1682 (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
1683 {
1684 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1685 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1686 n = x ? x->u.data : new_bgp->cf->default_med;
1687 o = y ? y->u.data : old_bgp->cf->default_med;
1688 if (n < o)
1689 return 1;
1690 if (n > o)
1691 return 0;
1692 }
1693
1694 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1695 if (new_bgp->is_interior > old_bgp->is_interior)
1696 return 0;
1697 if (new_bgp->is_interior < old_bgp->is_interior)
1698 return 1;
1699
1700 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1701 n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
1702 o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
1703 if (n < o)
1704 return 1;
1705 if (n > o)
1706 return 0;
1707
1708 /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
1709 /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
1710 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1711 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1712 n = x ? x->u.data : new_bgp->remote_id;
1713 o = y ? y->u.data : old_bgp->remote_id;
1714
1715 /* RFC 5004 - prefer older routes */
1716 /* (if both are external and from different peer) */
1717 if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
1718 !new_bgp->is_internal && n != o)
1719 return 0;
1720
1721 /* rest of RFC 4271 9.1.2.2. f) */
1722 if (n < o)
1723 return 1;
1724 if (n > o)
1725 return 0;
1726
1727 /* RFC 4456 9. b) Compare cluster list lengths */
1728 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1729 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1730 n = x ? int_set_get_size(x->u.ptr) : 0;
1731 o = y ? int_set_get_size(y->u.ptr) : 0;
1732 if (n < o)
1733 return 1;
1734 if (n > o)
1735 return 0;
1736
1737 /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
1738 return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
1739 }
1740
1741
1742 int
1743 bgp_rte_mergable(rte *pri, rte *sec)
1744 {
1745 struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
1746 struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
1747 eattr *x, *y;
1748 u32 p, s;
1749
1750 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1751 if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
1752 return 0;
1753
1754 /* RFC 4271 9.1.2.1. Route resolvability test */
1755 if (!rte_resolvable(sec))
1756 return 0;
1757
1758 /* LLGR draft - depreference stale routes */
1759 if (rte_stale(pri) != rte_stale(sec))
1760 return 0;
1761
1762 /* Start with local preferences */
1763 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1764 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1765 p = x ? x->u.data : pri_bgp->cf->default_local_pref;
1766 s = y ? y->u.data : sec_bgp->cf->default_local_pref;
1767 if (p != s)
1768 return 0;
1769
1770 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1771 if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
1772 {
1773 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1774 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1775 p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1776 s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
1777
1778 if (p != s)
1779 return 0;
1780
1781 // if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
1782 // return 0;
1783 }
1784
1785 /* RFC 4271 9.1.2.2. b) Use origins */
1786 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1787 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1788 p = x ? x->u.data : ORIGIN_INCOMPLETE;
1789 s = y ? y->u.data : ORIGIN_INCOMPLETE;
1790 if (p != s)
1791 return 0;
1792
1793 /* RFC 4271 9.1.2.2. c) Compare MED's */
1794 if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
1795 (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
1796 {
1797 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1798 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1799 p = x ? x->u.data : pri_bgp->cf->default_med;
1800 s = y ? y->u.data : sec_bgp->cf->default_med;
1801 if (p != s)
1802 return 0;
1803 }
1804
1805 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1806 if (pri_bgp->is_interior != sec_bgp->is_interior)
1807 return 0;
1808
1809 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1810 p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
1811 s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
1812 if (p != s)
1813 return 0;
1814
1815 /* Remaining criteria are ignored */
1816
1817 return 1;
1818 }
1819
1820
1821 static inline int
1822 same_group(rte *r, u32 lpref, u32 lasn)
1823 {
1824 return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
1825 }
1826
1827 static inline int
1828 use_deterministic_med(rte *r)
1829 {
1830 struct proto *P = r->attrs->src->proto;
1831 return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
1832 }
1833
1834 int
1835 bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
1836 {
1837 rte *r, *s;
1838 rte *key = new ? new : old;
1839 u32 lpref = key->pref;
1840 u32 lasn = bgp_get_neighbor(key);
1841 int old_is_group_best = 0;
1842
1843 /*
1844 * Proper RFC 4271 path selection is a bit complicated, it cannot be
1845 * implemented just by rte_better(), because it is not a linear
1846 * ordering. But it can be splitted to two levels, where the lower
1847 * level chooses the best routes in each group of routes from the
1848 * same neighboring AS and higher level chooses the best route (with
1849 * a slightly different ordering) between the best-in-group routes.
1850 *
1851 * When deterministic_med is disabled, we just ignore this issue and
1852 * choose the best route by bgp_rte_better() alone. If enabled, the
1853 * lower level of the route selection is done here (for the group
1854 * to which the changed route belongs), all routes in group are
1855 * marked as suppressed, just chosen best-in-group is not.
1856 *
1857 * Global best route selection then implements higher level by
1858 * choosing between non-suppressed routes (as they are always
1859 * preferred over suppressed routes). Routes from BGP protocols
1860 * that do not set deterministic_med are just never suppressed. As
1861 * they do not participate in the lower level selection, it is OK
1862 * that this fn is not called for them.
1863 *
1864 * The idea is simple, the implementation is more problematic,
1865 * mostly because of optimizations in rte_recalculate() that
1866 * avoids full recalculation in most cases.
1867 *
1868 * We can assume that at least one of new, old is non-NULL and both
1869 * are from the same protocol with enabled deterministic_med. We
1870 * group routes by both neighbor AS (lasn) and preference (lpref),
1871 * because bgp_rte_better() does not handle preference itself.
1872 */
1873
1874 /* If new and old are from different groups, we just process that
1875 as two independent events */
1876 if (new && old && !same_group(old, lpref, lasn))
1877 {
1878 int i1, i2;
1879 i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
1880 i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
1881 return i1 || i2;
1882 }
1883
1884 /*
1885 * We could find the best-in-group and then make some shortcuts like
1886 * in rte_recalculate, but as we would have to walk through all
1887 * net->routes just to find it, it is probably not worth. So we
1888 * just have two simpler fast cases that use just the old route.
1889 * We also set suppressed flag to avoid using it in bgp_rte_better().
1890 */
1891
1892 if (new)
1893 new->u.bgp.suppressed = 1;
1894
1895 if (old)
1896 {
1897 old_is_group_best = !old->u.bgp.suppressed;
1898 old->u.bgp.suppressed = 1;
1899 int new_is_better = new && bgp_rte_better(new, old);
1900
1901 /* The first case - replace not best with worse (or remove not best) */
1902 if (!old_is_group_best && !new_is_better)
1903 return 0;
1904
1905 /* The second case - replace the best with better */
1906 if (old_is_group_best && new_is_better)
1907 {
1908 /* new is best-in-group, the see discussion below - this is
1909 a special variant of NBG && OBG. From OBG we can deduce
1910 that same_group(old_best) iff (old == old_best) */
1911 new->u.bgp.suppressed = 0;
1912 return (old == old_best);
1913 }
1914 }
1915
1916 /* The default case - find a new best-in-group route */
1917 r = new; /* new may not be in the list */
1918 for (s=net->routes; rte_is_valid(s); s=s->next)
1919 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
1920 {
1921 s->u.bgp.suppressed = 1;
1922 if (!r || bgp_rte_better(s, r))
1923 r = s;
1924 }
1925
1926 /* Simple case - the last route in group disappears */
1927 if (!r)
1928 return 0;
1929
1930 /* Found best-in-group */
1931 r->u.bgp.suppressed = 0;
1932
1933 /*
1934 * There are generally two reasons why we have to force
1935 * recalculation (return 1): First, the new route may be wrongfully
1936 * chosen to be the best in the first case check in
1937 * rte_recalculate(), this may happen only if old_best is from the
1938 * same group. Second, another (different than new route)
1939 * best-in-group is chosen and that may be the proper best (although
1940 * rte_recalculate() without ignore that possibility).
1941 *
1942 * There are three possible cases according to whether the old route
1943 * was the best in group (OBG, stored in old_is_group_best) and
1944 * whether the new route is the best in group (NBG, tested by r == new).
1945 * These cases work even if old or new is NULL.
1946 *
1947 * NBG -> new is a possible candidate for the best route, so we just
1948 * check for the first reason using same_group().
1949 *
1950 * !NBG && OBG -> Second reason applies, return 1
1951 *
1952 * !NBG && !OBG -> Best in group does not change, old != old_best,
1953 * rte_better(new, old_best) is false and therefore
1954 * the first reason does not apply, return 0
1955 */
1956
1957 if (r == new)
1958 return old_best && same_group(old_best, lpref, lasn);
1959 else
1960 return old_is_group_best;
1961 }
1962
1963 struct rte *
1964 bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
1965 {
1966 eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
1967 struct adata *ad = a ? a->u.ptr : NULL;
1968 uint flags = a ? a->flags : BAF_PARTIAL;
1969
1970 if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
1971 return NULL;
1972
1973 if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
1974 return r;
1975
1976 r = rte_cow_rta(r, pool);
1977 bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags,
1978 int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
1979 r->u.bgp.stale = 1;
1980
1981 return r;
1982 }
1983
1984
1985 /*
1986 * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
1987 */
1988 static void
1989 bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
1990 {
1991 eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
1992 eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
1993 eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
1994 eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
1995
1996 /* First, unset AS4_* attributes */
1997 if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
1998 if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
1999
2000 /* Handle AGGREGATOR attribute */
2001 if (a2 && a4)
2002 {
2003 u32 a2_asn = get_u32(a2->u.ptr->data);
2004
2005 /* If routes were aggregated by an old router, then AS4_PATH and
2006 AS4_AGGREGATOR are invalid. In that case we give up. */
2007 if (a2_asn != AS_TRANS)
2008 return;
2009
2010 /* Use AS4_AGGREGATOR instead of AGGREGATOR */
2011 a2->u.ptr = a4->u.ptr;
2012 }
2013
2014 /* Handle AS_PATH attribute */
2015 if (p2 && p4)
2016 {
2017 /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
2018 int p2_len = as_path_getlen(p2->u.ptr);
2019 int p4_len = as_path_getlen(p4->u.ptr);
2020
2021 /* AS_PATH is too short, give up */
2022 if (p2_len < p4_len)
2023 return;
2024
2025 /* Merge AS_PATH and AS4_PATH */
2026 as_path_cut(p2->u.ptr, p2_len - p4_len);
2027 p2->u.ptr = as_path_merge(pool, p2->u.ptr, p4->u.ptr);
2028 }
2029 }
2030
2031 int
2032 bgp_get_attr(eattr *a, byte *buf, int buflen)
2033 {
2034 uint i = EA_ID(a->id);
2035 const struct bgp_attr_desc *d;
2036 int len;
2037
2038 if (bgp_attr_known(i))
2039 {
2040 d = &bgp_attr_table[i];
2041 len = bsprintf(buf, "%s", d->name);
2042 buf += len;
2043 if (d->format)
2044 {
2045 *buf++ = ':';
2046 *buf++ = ' ';
2047 d->format(a, buf, buflen - len - 2);
2048 return GA_FULL;
2049 }
2050 return GA_NAME;
2051 }
2052
2053 bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
2054 return GA_NAME;
2055 }
2056
2057 void
2058 bgp_get_route_info(rte *e, byte *buf)
2059 {
2060 eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2061 eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2062 u32 origas;
2063
2064 buf += bsprintf(buf, " (%d", e->pref);
2065
2066 if (e->u.bgp.suppressed)
2067 buf += bsprintf(buf, "-");
2068
2069 if (rte_stale(e))
2070 buf += bsprintf(buf, "s");
2071
2072 if (e->attrs->hostentry)
2073 {
2074 if (!rte_resolvable(e))
2075 buf += bsprintf(buf, "/-");
2076 else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
2077 buf += bsprintf(buf, "/?");
2078 else
2079 buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
2080 }
2081 buf += bsprintf(buf, ") [");
2082
2083 if (p && as_path_get_last(p->u.ptr, &origas))
2084 buf += bsprintf(buf, "AS%u", origas);
2085 if (o)
2086 buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
2087 strcpy(buf, "]");
2088 }