]> git.ipfire.org Git - thirdparty/bird.git/blob - proto/bgp/attrs.c
BGP: Attribute set function merged with its common counterpart
[thirdparty/bird.git] / proto / bgp / attrs.c
1 /*
2 * BIRD -- BGP Attributes
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
11 #undef LOCAL_DEBUG
12
13 #include <stdlib.h>
14
15 #include "nest/bird.h"
16 #include "nest/iface.h"
17 #include "nest/protocol.h"
18 #include "nest/route.h"
19 #include "nest/attrs.h"
20 #include "conf/conf.h"
21 #include "lib/resource.h"
22 #include "lib/string.h"
23 #include "lib/unaligned.h"
24
25 #include "bgp.h"
26
27 /*
28 * UPDATE message error handling
29 *
30 * All checks from RFC 4271 6.3 are done as specified with these exceptions:
31 * - The semantic check of an IP address from NEXT_HOP attribute is missing.
32 * - Checks of some optional attribute values are missing.
33 * - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
34 * are probably inadequate.
35 *
36 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
37 * 4271 does not explicitly specifiy the behavior in that case.
38 *
39 * Loop detection related to route reflection (based on ORIGINATOR_ID
40 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
41 * specifies that such updates should be ignored, but that is generally
42 * a bad idea.
43 *
44 * BGP attribute table has several hooks:
45 *
46 * export - Hook that validates and normalizes attribute during export phase.
47 * Receives eattr, may modify it (e.g., sort community lists for canonical
48 * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
49 * necessary. May assume that eattr has value valid w.r.t. its type, but may be
50 * invalid w.r.t. BGP constraints. Optional.
51 *
52 * encode - Hook that converts internal representation to external one during
53 * packet writing. Receives eattr and puts it in the buffer (including attribute
54 * header). Returns number of bytes, or -1 if not enough space. May assume that
55 * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
56 * for all known attributes that exist internally after export phase (i.e., all
57 * except pseudoattributes MP_(UN)REACH_NLRI).
58 *
59 * decode - Hook that converts external representation to internal one during
60 * packet parsing. Receives attribute data in buffer, validates it and adds
61 * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
62 * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
63 *
64 * format - Optional hook that converts eattr to textual representation.
65 */
66
67
68 struct bgp_attr_desc {
69 const char *name;
70 uint type;
71 uint flags;
72 void (*export)(struct bgp_export_state *s, eattr *a);
73 int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
74 void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
75 void (*format)(eattr *ea, byte *buf, uint size);
76 };
77
78 static const struct bgp_attr_desc bgp_attr_table[];
79
80 static inline int bgp_attr_known(uint code);
81
82 eattr *
83 bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
84 {
85 ASSERT(bgp_attr_known(code));
86
87 return ea_set_attr(
88 attrs,
89 pool,
90 EA_CODE(PROTOCOL_BGP, code),
91 flags,
92 bgp_attr_table[code].type,
93 val
94 );
95 }
96
97
98
99 #define REPORT(msg, args...) \
100 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
101
102 #define DISCARD(msg, args...) \
103 ({ REPORT(msg, ## args); return; })
104
105 #define WITHDRAW(msg, args...) \
106 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
107
108 #define UNSET(a) \
109 ({ a->type = EAF_TYPE_UNDEF; return; })
110
111 #define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
112 #define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
113 #define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
114 #define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
115 #define NO_MANDATORY "Missing mandatory %s attribute"
116
117
118 static inline int
119 bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
120 {
121 *buf++ = flags;
122 *buf++ = code;
123 *buf++ = len;
124 return 3;
125 }
126
127 static inline int
128 bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
129 {
130 *buf++ = flags | BAF_EXT_LEN;
131 *buf++ = code;
132 put_u16(buf, len);
133 return 4;
134 }
135
136 static inline int
137 bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
138 {
139 if (len < 256)
140 return bgp_put_attr_hdr3(buf, code, flags, len);
141 else
142 return bgp_put_attr_hdr4(buf, code, flags, len);
143 }
144
145 static int
146 bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
147 {
148 if (size < (3+1))
149 return -1;
150
151 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
152 buf[3] = a->u.data;
153
154 return 3+1;
155 }
156
157 static int
158 bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
159 {
160 if (size < (3+4))
161 return -1;
162
163 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
164 put_u32(buf+3, a->u.data);
165
166 return 3+4;
167 }
168
169 static int
170 bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
171 {
172 uint len = a->u.ptr->length;
173
174 if (size < (4+len))
175 return -1;
176
177 uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
178 put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
179
180 return hdr + len;
181 }
182
183 static int
184 bgp_put_attr(byte *buf, uint size, uint code, uint flags, byte *data, uint len)
185 {
186 if (size < (4+len))
187 return -1;
188
189 uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
190 memcpy(buf + hdr, data, len);
191
192 return hdr + len;
193 }
194
195 static int
196 bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
197 {
198 return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
199 }
200
201
202 /*
203 * Attribute hooks
204 */
205
206 static void
207 bgp_export_origin(struct bgp_export_state *s, eattr *a)
208 {
209 if (a->u.data > 2)
210 WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
211 }
212
213 static void
214 bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
215 {
216 if (len != 1)
217 WITHDRAW(BAD_LENGTH, "ORIGIN", len);
218
219 if (data[0] > 2)
220 WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
221
222 bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
223 }
224
225 static void
226 bgp_format_origin(eattr *a, byte *buf, uint size UNUSED)
227 {
228 static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
229
230 bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
231 }
232
233
234 static int
235 bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
236 {
237 byte *data = a->u.ptr->data;
238 uint len = a->u.ptr->length;
239
240 if (!s->as4_session)
241 {
242 /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
243 byte *src = data;
244 data = alloca(len);
245 len = as_path_32to16(data, src, len);
246 }
247
248 return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
249 }
250
251 static void
252 bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
253 {
254 struct bgp_proto *p = s->proto;
255 int as_length = s->as4_session ? 4 : 2;
256 int as_confed = p->cf->confederation && p->is_interior;
257 char err[128];
258
259 if (!as_path_valid(data, len, as_length, as_confed, err, sizeof(err)))
260 WITHDRAW("Malformed AS_PATH attribute - %s", err);
261
262 /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
263 if (p->is_interior && !p->is_internal &&
264 ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
265 WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
266
267 if (!s->as4_session)
268 {
269 /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
270 byte *src = data;
271 data = alloca(2*len);
272 len = as_path_16to32(data, src, len);
273 }
274
275 bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
276 }
277
278
279 static int
280 bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
281 {
282 /*
283 * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
284 * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
285 * store it and encode it later by AFI-specific hooks.
286 */
287
288 if ((s->channel->afi == BGP_AF_IPV4) && !s->channel->ext_next_hop)
289 {
290 ASSERT(a->u.ptr->length == sizeof(ip_addr));
291
292 if (size < (3+4))
293 return -1;
294
295 bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
296 put_ip4(buf+3, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
297
298 return 3+4;
299 }
300 else
301 {
302 s->mp_next_hop = a;
303 return 0;
304 }
305 }
306
307 static void
308 bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
309 {
310 if (len != 4)
311 WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
312
313 /* Semantic checks are done later */
314 s->ip_next_hop_len = len;
315 s->ip_next_hop_data = data;
316 }
317
318 /* TODO: This function should use AF-specific hook */
319 static void
320 bgp_format_next_hop(eattr *a, byte *buf, uint size UNUSED)
321 {
322 ip_addr *nh = (void *) a->u.ptr->data;
323 uint len = a->u.ptr->length;
324
325 ASSERT((len == 16) || (len == 32));
326
327 /* in IPv6, we may have two addresses in NEXT HOP */
328 if ((len == 16) || ipa_zero(nh[1]))
329 bsprintf(buf, "%I", nh[0]);
330 else
331 bsprintf(buf, "%I %I", nh[0], nh[1]);
332 }
333
334
335 static void
336 bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
337 {
338 if (len != 4)
339 WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
340
341 u32 val = get_u32(data);
342 bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
343 }
344
345
346 static void
347 bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
348 {
349 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
350 UNSET(a);
351 }
352
353 static void
354 bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
355 {
356 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
357 DISCARD(BAD_EBGP, "LOCAL_PREF");
358
359 if (len != 4)
360 WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
361
362 u32 val = get_u32(data);
363 bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
364 }
365
366
367 static void
368 bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
369 {
370 if (len != 0)
371 DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
372
373 bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
374 }
375
376 static int
377 bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
378 {
379 byte *data = a->u.ptr->data;
380 uint len = a->u.ptr->length;
381
382 if (!s->as4_session)
383 {
384 /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
385 byte *src = data;
386 data = alloca(6);
387 len = aggregator_32to16(data, src);
388 }
389
390 return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
391 }
392
393 static void
394 bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
395 {
396 if (len != (s->as4_session ? 8 : 6))
397 DISCARD(BAD_LENGTH, "AGGREGATOR", len);
398
399 if (!s->as4_session)
400 {
401 /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
402 byte *src = data;
403 data = alloca(8);
404 len = aggregator_16to32(data, src);
405 }
406
407 bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
408 }
409
410 static void
411 bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED)
412 {
413 byte *data = a->u.ptr->data;
414
415 bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
416 }
417
418
419 static void
420 bgp_export_community(struct bgp_export_state *s, eattr *a)
421 {
422 if (a->u.ptr->length == 0)
423 UNSET(a);
424
425 a->u.ptr = int_set_sort(s->pool, a->u.ptr);
426 }
427
428 static void
429 bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
430 {
431 if (!len || (len % 4))
432 WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
433
434 struct adata *ad = lp_alloc_adata(s->pool, len);
435 get_u32s(data, (u32 *) ad->data, len / 4);
436 bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
437 }
438
439
440 static void
441 bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
442 {
443 if (!s->proto->is_internal)
444 UNSET(a);
445 }
446
447 static void
448 bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
449 {
450 if (!s->proto->is_internal)
451 DISCARD(BAD_EBGP, "ORIGINATOR_ID");
452
453 if (len != 4)
454 WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
455
456 u32 val = get_u32(data);
457 bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
458 }
459
460
461 static void
462 bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
463 {
464 if (!s->proto->is_internal)
465 UNSET(a);
466
467 if (a->u.ptr->length == 0)
468 UNSET(a);
469 }
470
471 static void
472 bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
473 {
474 if (!s->proto->is_internal)
475 DISCARD(BAD_EBGP, "CLUSTER_LIST");
476
477 if (!len || (len % 4))
478 WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
479
480 struct adata *ad = lp_alloc_adata(s->pool, len);
481 get_u32s(data, (u32 *) ad->data, len / 4);
482 bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
483 }
484
485 static void
486 bgp_format_cluster_list(eattr *a, byte *buf, uint size)
487 {
488 /* Truncates cluster lists larger than buflen, probably not a problem */
489 int_set_format(a->u.ptr, 0, -1, buf, size);
490 }
491
492
493 static inline u32
494 get_af3(byte *buf)
495 {
496 return (get_u16(buf) << 16) | buf[2];
497 }
498
499 static void
500 bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
501 {
502 /*
503 * 2 B MP_REACH_NLRI data - Address Family Identifier
504 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
505 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
506 * var MP_REACH_NLRI data - Network Address of Next Hop
507 * 1 B MP_REACH_NLRI data - Reserved (zero)
508 * var MP_REACH_NLRI data - Network Layer Reachability Information
509 */
510
511 if ((len < 5) || (len < (5 + (uint) data[3])))
512 bgp_parse_error(s, 9);
513
514 s->mp_reach_af = get_af3(data);
515 s->mp_next_hop_len = data[3];
516 s->mp_next_hop_data = data + 4;
517 s->mp_reach_len = len - 5 - s->mp_next_hop_len;
518 s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
519 }
520
521
522 static void
523 bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
524 {
525 /*
526 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
527 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
528 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
529 */
530
531 if (len < 3)
532 bgp_parse_error(s, 9);
533
534 s->mp_unreach_af = get_af3(data);
535 s->mp_unreach_len = len - 3;
536 s->mp_unreach_nlri = data + 3;
537 }
538
539
540 static void
541 bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
542 {
543 a->u.ptr = ec_set_del_nontrans(s->pool, a->u.ptr);
544
545 if (a->u.ptr->length == 0)
546 UNSET(a);
547
548 ec_set_sort_x(a->u.ptr);
549 }
550
551 static void
552 bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
553 {
554 if (!len || (len % 8))
555 WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
556
557 struct adata *ad = lp_alloc_adata(s->pool, len);
558 get_u32s(data, (u32 *) ad->data, len / 4);
559 bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
560 }
561
562
563 static void
564 bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
565 {
566 if (s->as4_session)
567 DISCARD(NEW_BGP, "AS4_AGGREGATOR");
568
569 if (len != 8)
570 DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
571
572 bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
573 }
574
575 static void
576 bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
577 {
578 char err[128];
579
580 if (s->as4_session)
581 DISCARD(NEW_BGP, "AS4_PATH");
582
583 if (len < 6)
584 DISCARD(BAD_LENGTH, "AS4_PATH", len);
585
586 if (!as_path_valid(data, len, 4, 1, err, sizeof(err)))
587 DISCARD("Malformed AS4_PATH attribute - %s", err);
588
589 struct adata *a = lp_alloc_adata(s->pool, len);
590 memcpy(a->data, data, len);
591
592 /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
593 if (as_path_contains_confed(a))
594 {
595 REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
596 a = as_path_strip_confed(s->pool, a);
597 }
598
599 bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
600 }
601
602 static void
603 bgp_export_large_community(struct bgp_export_state *s, eattr *a)
604 {
605 if (a->u.ptr->length == 0)
606 UNSET(a);
607
608 a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
609 }
610
611 static void
612 bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
613 {
614 if (!len || (len % 12))
615 WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
616
617 struct adata *ad = lp_alloc_adata(s->pool, len);
618 get_u32s(data, (u32 *) ad->data, len / 4);
619 bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
620 }
621
622 static void
623 bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
624 {
625 net_addr *n = s->route->net->n.addr;
626 u32 *labels = (u32 *) a->u.ptr->data;
627 uint lnum = a->u.ptr->length / 4;
628
629 /* Perhaps we should just ignore it? */
630 if (!s->mpls)
631 WITHDRAW("Unexpected MPLS stack");
632
633 /* Empty MPLS stack is not allowed */
634 if (!lnum)
635 WITHDRAW("Malformed MPLS stack - empty");
636
637 /* This is ugly, but we must ensure that labels fit into NLRI field */
638 if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
639 WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
640
641 for (uint i = 0; i < lnum; i++)
642 {
643 if (labels[i] > 0xfffff)
644 WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
645
646 /* TODO: Check for special-purpose label values? */
647 }
648 }
649
650 static int
651 bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
652 {
653 /*
654 * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
655 * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
656 */
657
658 s->mpls_labels = a->u.ptr;
659 return 0;
660 }
661
662 static void
663 bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
664 {
665 DISCARD("Discarding received attribute #0");
666 }
667
668 static void
669 bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size)
670 {
671 u32 *labels = (u32 *) a->u.ptr->data;
672 uint lnum = a->u.ptr->length / 4;
673 char *pos = buf;
674
675 for (uint i = 0; i < lnum; i++)
676 {
677 if (size < 20)
678 {
679 bsprintf(pos, "...");
680 return;
681 }
682
683 uint l = bsprintf(pos, "%d/", labels[i]);
684 ADVANCE(pos, size, l);
685 }
686
687 /* Clear last slash or terminate empty string */
688 pos[lnum ? -1 : 0] = 0;
689 }
690
691 static inline void
692 bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
693 {
694 /* Cannot use bgp_set_attr_data() as it works on known attributes only */
695 ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
696 }
697
698
699 /*
700 * Attribute table
701 */
702
703 static const struct bgp_attr_desc bgp_attr_table[] = {
704 [BA_ORIGIN] = {
705 .name = "origin",
706 .type = EAF_TYPE_INT,
707 .flags = BAF_TRANSITIVE,
708 .export = bgp_export_origin,
709 .encode = bgp_encode_u8,
710 .decode = bgp_decode_origin,
711 .format = bgp_format_origin,
712 },
713 [BA_AS_PATH] = {
714 .name = "as_path",
715 .type = EAF_TYPE_AS_PATH,
716 .flags = BAF_TRANSITIVE,
717 .encode = bgp_encode_as_path,
718 .decode = bgp_decode_as_path,
719 },
720 [BA_NEXT_HOP] = {
721 .name = "next_hop",
722 .type = EAF_TYPE_IP_ADDRESS,
723 .flags = BAF_TRANSITIVE,
724 .encode = bgp_encode_next_hop,
725 .decode = bgp_decode_next_hop,
726 .format = bgp_format_next_hop,
727 },
728 [BA_MULTI_EXIT_DISC] = {
729 .name = "med",
730 .type = EAF_TYPE_INT,
731 .flags = BAF_OPTIONAL,
732 .encode = bgp_encode_u32,
733 .decode = bgp_decode_med,
734 },
735 [BA_LOCAL_PREF] = {
736 .name = "local_pref",
737 .type = EAF_TYPE_INT,
738 .flags = BAF_TRANSITIVE,
739 .export = bgp_export_local_pref,
740 .encode = bgp_encode_u32,
741 .decode = bgp_decode_local_pref,
742 },
743 [BA_ATOMIC_AGGR] = {
744 .name = "atomic_aggr",
745 .type = EAF_TYPE_OPAQUE,
746 .flags = BAF_TRANSITIVE,
747 .encode = bgp_encode_raw,
748 .decode = bgp_decode_atomic_aggr,
749 },
750 [BA_AGGREGATOR] = {
751 .name = "aggregator",
752 .type = EAF_TYPE_OPAQUE,
753 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
754 .encode = bgp_encode_aggregator,
755 .decode = bgp_decode_aggregator,
756 .format = bgp_format_aggregator,
757 },
758 [BA_COMMUNITY] = {
759 .name = "community",
760 .type = EAF_TYPE_INT_SET,
761 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
762 .export = bgp_export_community,
763 .encode = bgp_encode_u32s,
764 .decode = bgp_decode_community,
765 },
766 [BA_ORIGINATOR_ID] = {
767 .name = "originator_id",
768 .type = EAF_TYPE_ROUTER_ID,
769 .flags = BAF_OPTIONAL,
770 .export = bgp_export_originator_id,
771 .encode = bgp_encode_u32,
772 .decode = bgp_decode_originator_id,
773 },
774 [BA_CLUSTER_LIST] = {
775 .name = "cluster_list",
776 .type = EAF_TYPE_INT_SET,
777 .flags = BAF_OPTIONAL,
778 .export = bgp_export_cluster_list,
779 .encode = bgp_encode_u32s,
780 .decode = bgp_decode_cluster_list,
781 .format = bgp_format_cluster_list,
782 },
783 [BA_MP_REACH_NLRI] = {
784 .name = "mp_reach_nlri",
785 .type = EAF_TYPE_OPAQUE,
786 .flags = BAF_OPTIONAL,
787 .decode = bgp_decode_mp_reach_nlri,
788 },
789 [BA_MP_UNREACH_NLRI] = {
790 .name = "mp_unreach_nlri",
791 .type = EAF_TYPE_OPAQUE,
792 .flags = BAF_OPTIONAL,
793 .decode = bgp_decode_mp_unreach_nlri,
794 },
795 [BA_EXT_COMMUNITY] = {
796 .name = "ext_community",
797 .type = EAF_TYPE_EC_SET,
798 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
799 .export = bgp_export_ext_community,
800 .encode = bgp_encode_u32s,
801 .decode = bgp_decode_ext_community,
802 },
803 [BA_AS4_PATH] = {
804 .name = "as4_path",
805 .type = EAF_TYPE_AS_PATH,
806 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
807 .encode = bgp_encode_raw,
808 .decode = bgp_decode_as4_path,
809 },
810 [BA_AS4_AGGREGATOR] = {
811 .name = "as4_aggregator",
812 .type = EAF_TYPE_OPAQUE,
813 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
814 .encode = bgp_encode_raw,
815 .decode = bgp_decode_as4_aggregator,
816 .format = bgp_format_aggregator,
817 },
818 [BA_LARGE_COMMUNITY] = {
819 .name = "large_community",
820 .type = EAF_TYPE_LC_SET,
821 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
822 .export = bgp_export_large_community,
823 .encode = bgp_encode_u32s,
824 .decode = bgp_decode_large_community,
825 },
826 [BA_MPLS_LABEL_STACK] = {
827 .name = "mpls_label_stack",
828 .type = EAF_TYPE_INT_SET,
829 .export = bgp_export_mpls_label_stack,
830 .encode = bgp_encode_mpls_label_stack,
831 .decode = bgp_decode_mpls_label_stack,
832 .format = bgp_format_mpls_label_stack,
833 },
834 };
835
836 static inline int
837 bgp_attr_known(uint code)
838 {
839 return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
840 }
841
842
843 /*
844 * Attribute export
845 */
846
847 static inline void
848 bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
849 {
850 if (EA_PROTO(a->id) != PROTOCOL_BGP)
851 return;
852
853 uint code = EA_ID(a->id);
854
855 if (bgp_attr_known(code))
856 {
857 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
858
859 /* The flags might have been zero if the attr was added by filters */
860 a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
861
862 /* Set partial bit if new opt-trans attribute is attached to non-local route */
863 if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
864 (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
865 a->flags |= BAF_PARTIAL;
866
867 /* Call specific hook */
868 CALL(desc->export, s, a);
869
870 /* Attribute might become undefined in hook */
871 if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
872 return;
873 }
874 else
875 {
876 /* Don't re-export unknown non-transitive attributes */
877 if (!(a->flags & BAF_TRANSITIVE))
878 return;
879
880 a->flags |= BAF_PARTIAL;
881 }
882
883 /* Append updated attribute */
884 to->attrs[to->count++] = *a;
885 }
886
887 /**
888 * bgp_export_attrs - export BGP attributes
889 * @s: BGP export state
890 * @attrs: a list of extended attributes
891 *
892 * The bgp_export_attrs() function takes a list of attributes and merges it to
893 * one newly allocated and sorted segment. Attributes are validated and
894 * normalized by type-specific export hooks and attribute flags are updated.
895 * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
896 * empty community sets).
897 *
898 * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
899 */
900 static inline ea_list *
901 bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
902 {
903 /* Merge the attribute list */
904 ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
905 ea_merge(attrs, new);
906 ea_sort(new);
907
908 uint i, count;
909 count = new->count;
910 new->count = 0;
911
912 /* Export each attribute */
913 for (i = 0; i < count; i++)
914 bgp_export_attr(s, &new->attrs[i], new);
915
916 if (s->err_withdraw)
917 return NULL;
918
919 return new;
920 }
921
922
923 /*
924 * Attribute encoding
925 */
926
927 static inline int
928 bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
929 {
930 ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP);
931
932 uint code = EA_ID(a->id);
933
934 if (bgp_attr_known(code))
935 return bgp_attr_table[code].encode(s, a, buf, size);
936 else
937 return bgp_encode_raw(s, a, buf, size);
938 }
939
940 /**
941 * bgp_encode_attrs - encode BGP attributes
942 * @s: BGP write state
943 * @attrs: a list of extended attributes
944 * @buf: buffer
945 * @end: buffer end
946 *
947 * The bgp_encode_attrs() function takes a list of extended attributes
948 * and converts it to its BGP representation (a part of an Update message).
949 *
950 * Result: Length of the attribute block generated or -1 if not enough space.
951 */
952 int
953 bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
954 {
955 byte *pos = buf;
956 int i, len;
957
958 for (i = 0; i < attrs->count; i++)
959 {
960 len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
961
962 if (len < 0)
963 return -1;
964
965 pos += len;
966 }
967
968 return pos - buf;
969 }
970
971
972 /*
973 * Attribute decoding
974 */
975
976 static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
977
978 static inline int
979 bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
980 {
981 eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
982 int num = p->cf->allow_local_as + 1;
983 return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
984 }
985
986 static inline int
987 bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
988 {
989 eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
990 return (e && (e->u.data == p->local_id));
991 }
992
993 static inline int
994 bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
995 {
996 eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
997 return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
998 }
999
1000 static inline void
1001 bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
1002 {
1003 /* Handle duplicate attributes; RFC 7606 3 (g) */
1004 if (BIT32_TEST(s->attrs_seen, code))
1005 {
1006 if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
1007 bgp_parse_error(s, 1);
1008 else
1009 DISCARD("Discarding duplicate attribute (code %u)", code);
1010 }
1011 BIT32_SET(s->attrs_seen, code);
1012
1013 if (bgp_attr_known(code))
1014 {
1015 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1016
1017 /* Handle conflicting flags; RFC 7606 3 (c) */
1018 if ((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
1019 WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
1020
1021 desc->decode(s, code, flags, data, len, to);
1022 }
1023 else /* Unknown attribute */
1024 {
1025 if (!(flags & BAF_OPTIONAL))
1026 WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
1027
1028 bgp_decode_unknown(s, code, flags, data, len, to);
1029 }
1030 }
1031
1032 /**
1033 * bgp_decode_attrs - check and decode BGP attributes
1034 * @s: BGP parse state
1035 * @data: start of attribute block
1036 * @len: length of attribute block
1037 *
1038 * This function takes a BGP attribute block (a part of an Update message), checks
1039 * its consistency and converts it to a list of BIRD route attributes represented
1040 * by an (uncached) &rta.
1041 */
1042 ea_list *
1043 bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
1044 {
1045 struct bgp_proto *p = s->proto;
1046 ea_list *attrs = NULL;
1047 uint code, flags, alen;
1048 byte *pos = data;
1049
1050 /* Parse the attributes */
1051 while (len)
1052 {
1053 alen = 0;
1054
1055 /* Read attribute type */
1056 if (len < 2)
1057 goto framing_error;
1058 flags = pos[0];
1059 code = pos[1];
1060 ADVANCE(pos, len, 2);
1061
1062 /* Read attribute length */
1063 if (flags & BAF_EXT_LEN)
1064 {
1065 if (len < 2)
1066 goto framing_error;
1067 alen = get_u16(pos);
1068 ADVANCE(pos, len, 2);
1069 }
1070 else
1071 {
1072 if (len < 1)
1073 goto framing_error;
1074 alen = *pos;
1075 ADVANCE(pos, len, 1);
1076 }
1077
1078 if (alen > len)
1079 goto framing_error;
1080
1081 DBG("Attr %02x %02x %u\n", code, flags, alen);
1082
1083 bgp_decode_attr(s, code, flags, pos, alen, &attrs);
1084 ADVANCE(pos, len, alen);
1085 }
1086
1087 if (s->err_withdraw)
1088 goto withdraw;
1089
1090 /* If there is no reachability NLRI, we are finished */
1091 if (!s->ip_reach_len && !s->mp_reach_len)
1092 return NULL;
1093
1094
1095 /* Handle missing mandatory attributes; RFC 7606 3 (d) */
1096 if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
1097 { REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
1098
1099 if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
1100 { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
1101
1102 if (s->ip_reach_len && !BIT32_TEST(s->attrs_seen, BA_NEXT_HOP))
1103 { REPORT(NO_MANDATORY, "NEXT_HOP"); goto withdraw; }
1104
1105 /* When receiving attributes from non-AS4-aware BGP speaker, we have to
1106 reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
1107 if (!p->as4_session)
1108 bgp_process_as4_attrs(&attrs, s->pool);
1109
1110 /* Reject routes with our ASN in AS_PATH attribute */
1111 if (bgp_as_path_loopy(p, attrs, p->local_as))
1112 goto withdraw;
1113
1114 /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
1115 if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
1116 goto withdraw;
1117
1118 /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
1119 if (p->is_internal && bgp_originator_id_loopy(p, attrs))
1120 goto withdraw;
1121
1122 /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
1123 if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
1124 goto withdraw;
1125
1126 /* If there is no local preference, define one */
1127 if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
1128 bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1129
1130 return attrs;
1131
1132
1133 framing_error:
1134 /* RFC 7606 4 - handle attribute framing errors */
1135 REPORT("Malformed attribute list - framing error (%u/%u) at %d",
1136 alen, len, (int) (pos - s->attrs));
1137
1138 withdraw:
1139 /* RFC 7606 5.2 - handle missing NLRI during errors */
1140 if (!s->ip_reach_len && !s->mp_reach_len)
1141 bgp_parse_error(s, 1);
1142
1143 s->err_withdraw = 1;
1144 return NULL;
1145 }
1146
1147
1148 /*
1149 * Route bucket hash table
1150 */
1151
1152 #define RBH_KEY(b) b->eattrs, b->hash
1153 #define RBH_NEXT(b) b->next
1154 #define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
1155 #define RBH_FN(a,h) h
1156
1157 #define RBH_REHASH bgp_rbh_rehash
1158 #define RBH_PARAMS /8, *2, 2, 2, 8, 20
1159
1160
1161 HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
1162
1163 void
1164 bgp_init_bucket_table(struct bgp_channel *c)
1165 {
1166 HASH_INIT(c->bucket_hash, c->pool, 8);
1167
1168 init_list(&c->bucket_queue);
1169 c->withdraw_bucket = NULL;
1170 }
1171
1172 void
1173 bgp_free_bucket_table(struct bgp_channel *c)
1174 {
1175 HASH_FREE(c->bucket_hash);
1176
1177 struct bgp_bucket *b;
1178 WALK_LIST_FIRST(b, c->bucket_queue)
1179 {
1180 rem_node(&b->send_node);
1181 mb_free(b);
1182 }
1183
1184 mb_free(c->withdraw_bucket);
1185 c->withdraw_bucket = NULL;
1186 }
1187
1188 static struct bgp_bucket *
1189 bgp_get_bucket(struct bgp_channel *c, ea_list *new)
1190 {
1191 /* Hash and lookup */
1192 u32 hash = ea_hash(new);
1193 struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
1194
1195 if (b)
1196 return b;
1197
1198 uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
1199 uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
1200 uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
1201 uint i;
1202 byte *dest;
1203
1204 /* Gather total size of non-inline attributes */
1205 for (i = 0; i < new->count; i++)
1206 {
1207 eattr *a = &new->attrs[i];
1208
1209 if (!(a->type & EAF_EMBEDDED))
1210 size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
1211 }
1212
1213 /* Create the bucket */
1214 b = mb_alloc(c->pool, size);
1215 init_list(&b->prefixes);
1216 b->hash = hash;
1217
1218 /* Copy list of extended attributes */
1219 memcpy(b->eattrs, new, ea_size);
1220 dest = ((byte *) b->eattrs) + ea_size_aligned;
1221
1222 /* Copy values of non-inline attributes */
1223 for (i = 0; i < new->count; i++)
1224 {
1225 eattr *a = &b->eattrs->attrs[i];
1226
1227 if (!(a->type & EAF_EMBEDDED))
1228 {
1229 struct adata *oa = a->u.ptr;
1230 struct adata *na = (struct adata *) dest;
1231 memcpy(na, oa, sizeof(struct adata) + oa->length);
1232 a->u.ptr = na;
1233 dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
1234 }
1235 }
1236
1237 /* Insert the bucket to send queue and bucket hash */
1238 add_tail(&c->bucket_queue, &b->send_node);
1239 HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
1240
1241 return b;
1242 }
1243
1244 static struct bgp_bucket *
1245 bgp_get_withdraw_bucket(struct bgp_channel *c)
1246 {
1247 if (!c->withdraw_bucket)
1248 {
1249 c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
1250 init_list(&c->withdraw_bucket->prefixes);
1251 }
1252
1253 return c->withdraw_bucket;
1254 }
1255
1256 void
1257 bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1258 {
1259 rem_node(&b->send_node);
1260 HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
1261 mb_free(b);
1262 }
1263
1264 void
1265 bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1266 {
1267 rem_node(&b->send_node);
1268 add_tail(&c->bucket_queue, &b->send_node);
1269 }
1270
1271 void
1272 bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1273 {
1274 struct bgp_proto *p = (void *) c->c.proto;
1275 struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
1276
1277 log(L_ERR "%s: Attribute list too long", p->p.name);
1278 while (!EMPTY_LIST(b->prefixes))
1279 {
1280 struct bgp_prefix *px = HEAD(b->prefixes);
1281
1282 log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
1283 rem_node(&px->buck_node);
1284 add_tail(&wb->prefixes, &px->buck_node);
1285 }
1286 }
1287
1288
1289 /*
1290 * Prefix hash table
1291 */
1292
1293 #define PXH_KEY(px) px->net, px->path_id, px->hash
1294 #define PXH_NEXT(px) px->next
1295 #define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
1296 #define PXH_FN(n,i,h) h
1297
1298 #define PXH_REHASH bgp_pxh_rehash
1299 #define PXH_PARAMS /8, *2, 2, 2, 8, 20
1300
1301
1302 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
1303
1304 void
1305 bgp_init_prefix_table(struct bgp_channel *c)
1306 {
1307 HASH_INIT(c->prefix_hash, c->pool, 8);
1308
1309 uint alen = net_addr_length[c->c.net_type];
1310 c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
1311 }
1312
1313 void
1314 bgp_free_prefix_table(struct bgp_channel *c)
1315 {
1316 HASH_FREE(c->prefix_hash);
1317
1318 rfree(c->prefix_slab);
1319 c->prefix_slab = NULL;
1320 }
1321
1322 static struct bgp_prefix *
1323 bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
1324 {
1325 u32 hash = net_hash(net) ^ u32_hash(path_id);
1326 struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
1327
1328 if (px)
1329 {
1330 rem_node(&px->buck_node);
1331 return px;
1332 }
1333
1334 if (c->prefix_slab)
1335 px = sl_alloc(c->prefix_slab);
1336 else
1337 px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
1338
1339 px->buck_node.next = NULL;
1340 px->buck_node.prev = NULL;
1341 px->hash = hash;
1342 px->path_id = path_id;
1343 net_copy(px->net, net);
1344
1345 HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
1346
1347 return px;
1348 }
1349
1350 void
1351 bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
1352 {
1353 rem_node(&px->buck_node);
1354 HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
1355
1356 if (c->prefix_slab)
1357 sl_free(c->prefix_slab, px);
1358 else
1359 mb_free(px);
1360 }
1361
1362
1363 /*
1364 * BGP protocol glue
1365 */
1366
1367 int
1368 bgp_import_control(struct proto *P, rte **new, struct linpool *pool UNUSED)
1369 {
1370 rte *e = *new;
1371 struct proto *SRC = e->attrs->src->proto;
1372 struct bgp_proto *p = (struct bgp_proto *) P;
1373 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
1374
1375 /* Reject our routes */
1376 if (src == p)
1377 return -1;
1378
1379 /* Accept non-BGP routes */
1380 if (src == NULL)
1381 return 0;
1382
1383 // XXXX: Check next hop AF
1384
1385 /* IBGP route reflection, RFC 4456 */
1386 if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
1387 {
1388 /* Rejected unless configured as route reflector */
1389 if (!p->rr_client && !src->rr_client)
1390 return -1;
1391
1392 /* Generally, this should be handled when path is received, but we check it
1393 also here as rr_cluster_id may be undefined or different in src. */
1394 if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
1395 return -1;
1396 }
1397
1398 /* Handle well-known communities, RFC 1997 */
1399 struct eattr *c;
1400 if (p->cf->interpret_communities &&
1401 (c = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY))))
1402 {
1403 struct adata *d = c->u.ptr;
1404
1405 /* Do not export anywhere */
1406 if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
1407 return -1;
1408
1409 /* Do not export outside of AS (or member-AS) */
1410 if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
1411 return -1;
1412
1413 /* Do not export outside of AS (or confederation) */
1414 if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
1415 return -1;
1416 }
1417
1418 return 0;
1419 }
1420
1421
1422 static adata null_adata; /* adata of length 0 */
1423
1424 static ea_list *
1425 bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
1426 {
1427 struct proto *SRC = e->attrs->src->proto;
1428 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
1429 struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
1430 ea_list *attrs = attrs0;
1431 eattr *a;
1432 adata *ad;
1433
1434 /* ORIGIN attribute - mandatory, attach if missing */
1435 if (! bgp_find_attr(attrs0, BA_ORIGIN))
1436 bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
1437
1438 /* AS_PATH attribute - mandatory */
1439 a = bgp_find_attr(attrs0, BA_AS_PATH);
1440 ad = a ? a->u.ptr : &null_adata;
1441
1442 /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
1443 if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
1444 ad = as_path_strip_confed(pool, ad);
1445
1446 /* AS_PATH attribute - keep or prepend ASN */
1447 if (p->is_internal ||
1448 (p->rs_client && src && src->rs_client))
1449 {
1450 /* IBGP or route server -> just ensure there is one */
1451 if (!a)
1452 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
1453 }
1454 else if (p->is_interior)
1455 {
1456 /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
1457 ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
1458 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1459 }
1460 else /* Regular EBGP (no RS, no confederation) */
1461 {
1462 /* Regular EBGP -> prepend ASN as regular sequence */
1463 ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
1464 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1465
1466 /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
1467 a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
1468 if (a && !(a->type & EAF_FRESH))
1469 bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
1470 }
1471
1472 /* NEXT_HOP attribute - delegated to AF-specific hook */
1473 a = bgp_find_attr(attrs0, BA_NEXT_HOP);
1474 bgp_update_next_hop(&s, a, &attrs);
1475
1476 /* LOCAL_PREF attribute - required for IBGP, attach if missing */
1477 if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
1478 bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1479
1480 /* IBGP route reflection, RFC 4456 */
1481 if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
1482 {
1483 /* ORIGINATOR_ID attribute - attach if not already set */
1484 if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
1485 bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
1486
1487 /* CLUSTER_LIST attribute - prepend cluster ID */
1488 a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
1489 ad = a ? a->u.ptr : NULL;
1490
1491 /* Prepend src cluster ID */
1492 if (src->rr_cluster_id)
1493 ad = int_set_prepend(pool, ad, src->rr_cluster_id);
1494
1495 /* Prepend dst cluster ID if src and dst clusters are different */
1496 if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
1497 ad = int_set_prepend(pool, ad, p->rr_cluster_id);
1498
1499 /* Should be at least one prepended cluster ID */
1500 bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
1501 }
1502
1503 /* AS4_* transition attributes, RFC 6793 4.2.2 */
1504 if (! p->as4_session)
1505 {
1506 a = bgp_find_attr(attrs, BA_AS_PATH);
1507 if (a && as_path_contains_as4(a->u.ptr))
1508 {
1509 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
1510 bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
1511 }
1512
1513 a = bgp_find_attr(attrs, BA_AGGREGATOR);
1514 if (a && aggregator_contains_as4(a->u.ptr))
1515 {
1516 bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
1517 bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
1518 }
1519 }
1520
1521 /*
1522 * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
1523 * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
1524 * should be checked in AF-specific hooks.
1525 */
1526
1527 /* Apply per-attribute export hooks for validatation and normalization */
1528 return bgp_export_attrs(&s, attrs);
1529 }
1530
1531 void
1532 bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old)
1533 {
1534 struct bgp_proto *p = (void *) P;
1535 struct bgp_channel *c = (void *) C;
1536 struct bgp_bucket *buck;
1537 struct bgp_prefix *px;
1538 u32 path;
1539
1540 if (new)
1541 {
1542 struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2);
1543
1544 /* If attributes are invalid, we fail back to withdraw */
1545 buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
1546 path = new->attrs->src->global_id;
1547
1548 lp_flush(bgp_linpool2);
1549 }
1550 else
1551 {
1552 buck = bgp_get_withdraw_bucket(c);
1553 path = old->attrs->src->global_id;
1554 }
1555
1556 px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
1557 add_tail(&buck->prefixes, &px->buck_node);
1558
1559 bgp_schedule_packet(p->conn, c, PKT_UPDATE);
1560 }
1561
1562
1563 static inline u32
1564 bgp_get_neighbor(rte *r)
1565 {
1566 eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1567 u32 as;
1568
1569 if (e && as_path_get_first_regular(e->u.ptr, &as))
1570 return as;
1571
1572 /* If AS_PATH is not defined, we treat rte as locally originated */
1573 struct bgp_proto *p = (void *) r->attrs->src->proto;
1574 return p->cf->confederation ?: p->local_as;
1575 }
1576
1577 static inline int
1578 rte_resolvable(rte *rt)
1579 {
1580 return rt->attrs->dest == RTD_UNICAST;
1581 }
1582
1583 int
1584 bgp_rte_better(rte *new, rte *old)
1585 {
1586 struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
1587 struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
1588 eattr *x, *y;
1589 u32 n, o;
1590
1591 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1592 n = new->u.bgp.suppressed;
1593 o = old->u.bgp.suppressed;
1594 if (n > o)
1595 return 0;
1596 if (n < o)
1597 return 1;
1598
1599 /* RFC 4271 9.1.2.1. Route resolvability test */
1600 n = rte_resolvable(new);
1601 o = rte_resolvable(old);
1602 if (n > o)
1603 return 1;
1604 if (n < o)
1605 return 0;
1606
1607 /* Start with local preferences */
1608 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1609 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1610 n = x ? x->u.data : new_bgp->cf->default_local_pref;
1611 o = y ? y->u.data : old_bgp->cf->default_local_pref;
1612 if (n > o)
1613 return 1;
1614 if (n < o)
1615 return 0;
1616
1617 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1618 if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
1619 {
1620 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1621 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1622 n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1623 o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
1624 if (n < o)
1625 return 1;
1626 if (n > o)
1627 return 0;
1628 }
1629
1630 /* RFC 4271 9.1.2.2. b) Use origins */
1631 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1632 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1633 n = x ? x->u.data : ORIGIN_INCOMPLETE;
1634 o = y ? y->u.data : ORIGIN_INCOMPLETE;
1635 if (n < o)
1636 return 1;
1637 if (n > o)
1638 return 0;
1639
1640 /* RFC 4271 9.1.2.2. c) Compare MED's */
1641 /* Proper RFC 4271 path selection cannot be interpreted as finding
1642 * the best path in some ordering. It is implemented partially in
1643 * bgp_rte_recalculate() when deterministic_med option is
1644 * active. Without that option, the behavior is just an
1645 * approximation, which in specific situations may lead to
1646 * persistent routing loops, because it is nondeterministic - it
1647 * depends on the order in which routes appeared. But it is also the
1648 * same behavior as used by default in Cisco routers, so it is
1649 * probably not a big issue.
1650 */
1651 if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
1652 (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
1653 {
1654 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1655 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1656 n = x ? x->u.data : new_bgp->cf->default_med;
1657 o = y ? y->u.data : old_bgp->cf->default_med;
1658 if (n < o)
1659 return 1;
1660 if (n > o)
1661 return 0;
1662 }
1663
1664 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1665 if (new_bgp->is_interior > old_bgp->is_interior)
1666 return 0;
1667 if (new_bgp->is_interior < old_bgp->is_interior)
1668 return 1;
1669
1670 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1671 n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
1672 o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
1673 if (n < o)
1674 return 1;
1675 if (n > o)
1676 return 0;
1677
1678 /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
1679 /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
1680 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1681 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1682 n = x ? x->u.data : new_bgp->remote_id;
1683 o = y ? y->u.data : old_bgp->remote_id;
1684
1685 /* RFC 5004 - prefer older routes */
1686 /* (if both are external and from different peer) */
1687 if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
1688 !new_bgp->is_internal && n != o)
1689 return 0;
1690
1691 /* rest of RFC 4271 9.1.2.2. f) */
1692 if (n < o)
1693 return 1;
1694 if (n > o)
1695 return 0;
1696
1697 /* RFC 4456 9. b) Compare cluster list lengths */
1698 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1699 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1700 n = x ? int_set_get_size(x->u.ptr) : 0;
1701 o = y ? int_set_get_size(y->u.ptr) : 0;
1702 if (n < o)
1703 return 1;
1704 if (n > o)
1705 return 0;
1706
1707 /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
1708 return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
1709 }
1710
1711
1712 int
1713 bgp_rte_mergable(rte *pri, rte *sec)
1714 {
1715 struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
1716 struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
1717 eattr *x, *y;
1718 u32 p, s;
1719
1720 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1721 if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
1722 return 0;
1723
1724 /* RFC 4271 9.1.2.1. Route resolvability test */
1725 if (!rte_resolvable(sec))
1726 return 0;
1727
1728 /* Start with local preferences */
1729 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1730 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1731 p = x ? x->u.data : pri_bgp->cf->default_local_pref;
1732 s = y ? y->u.data : sec_bgp->cf->default_local_pref;
1733 if (p != s)
1734 return 0;
1735
1736 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1737 if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
1738 {
1739 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1740 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1741 p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1742 s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
1743
1744 if (p != s)
1745 return 0;
1746
1747 // if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
1748 // return 0;
1749 }
1750
1751 /* RFC 4271 9.1.2.2. b) Use origins */
1752 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1753 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1754 p = x ? x->u.data : ORIGIN_INCOMPLETE;
1755 s = y ? y->u.data : ORIGIN_INCOMPLETE;
1756 if (p != s)
1757 return 0;
1758
1759 /* RFC 4271 9.1.2.2. c) Compare MED's */
1760 if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
1761 (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
1762 {
1763 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1764 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1765 p = x ? x->u.data : pri_bgp->cf->default_med;
1766 s = y ? y->u.data : sec_bgp->cf->default_med;
1767 if (p != s)
1768 return 0;
1769 }
1770
1771 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1772 if (pri_bgp->is_interior != sec_bgp->is_interior)
1773 return 0;
1774
1775 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1776 p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
1777 s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
1778 if (p != s)
1779 return 0;
1780
1781 /* Remaining criteria are ignored */
1782
1783 return 1;
1784 }
1785
1786
1787 static inline int
1788 same_group(rte *r, u32 lpref, u32 lasn)
1789 {
1790 return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
1791 }
1792
1793 static inline int
1794 use_deterministic_med(rte *r)
1795 {
1796 struct proto *P = r->attrs->src->proto;
1797 return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
1798 }
1799
1800 int
1801 bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
1802 {
1803 rte *r, *s;
1804 rte *key = new ? new : old;
1805 u32 lpref = key->pref;
1806 u32 lasn = bgp_get_neighbor(key);
1807 int old_is_group_best = 0;
1808
1809 /*
1810 * Proper RFC 4271 path selection is a bit complicated, it cannot be
1811 * implemented just by rte_better(), because it is not a linear
1812 * ordering. But it can be splitted to two levels, where the lower
1813 * level chooses the best routes in each group of routes from the
1814 * same neighboring AS and higher level chooses the best route (with
1815 * a slightly different ordering) between the best-in-group routes.
1816 *
1817 * When deterministic_med is disabled, we just ignore this issue and
1818 * choose the best route by bgp_rte_better() alone. If enabled, the
1819 * lower level of the route selection is done here (for the group
1820 * to which the changed route belongs), all routes in group are
1821 * marked as suppressed, just chosen best-in-group is not.
1822 *
1823 * Global best route selection then implements higher level by
1824 * choosing between non-suppressed routes (as they are always
1825 * preferred over suppressed routes). Routes from BGP protocols
1826 * that do not set deterministic_med are just never suppressed. As
1827 * they do not participate in the lower level selection, it is OK
1828 * that this fn is not called for them.
1829 *
1830 * The idea is simple, the implementation is more problematic,
1831 * mostly because of optimizations in rte_recalculate() that
1832 * avoids full recalculation in most cases.
1833 *
1834 * We can assume that at least one of new, old is non-NULL and both
1835 * are from the same protocol with enabled deterministic_med. We
1836 * group routes by both neighbor AS (lasn) and preference (lpref),
1837 * because bgp_rte_better() does not handle preference itself.
1838 */
1839
1840 /* If new and old are from different groups, we just process that
1841 as two independent events */
1842 if (new && old && !same_group(old, lpref, lasn))
1843 {
1844 int i1, i2;
1845 i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
1846 i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
1847 return i1 || i2;
1848 }
1849
1850 /*
1851 * We could find the best-in-group and then make some shortcuts like
1852 * in rte_recalculate, but as we would have to walk through all
1853 * net->routes just to find it, it is probably not worth. So we
1854 * just have two simpler fast cases that use just the old route.
1855 * We also set suppressed flag to avoid using it in bgp_rte_better().
1856 */
1857
1858 if (new)
1859 new->u.bgp.suppressed = 1;
1860
1861 if (old)
1862 {
1863 old_is_group_best = !old->u.bgp.suppressed;
1864 old->u.bgp.suppressed = 1;
1865 int new_is_better = new && bgp_rte_better(new, old);
1866
1867 /* The first case - replace not best with worse (or remove not best) */
1868 if (!old_is_group_best && !new_is_better)
1869 return 0;
1870
1871 /* The second case - replace the best with better */
1872 if (old_is_group_best && new_is_better)
1873 {
1874 /* new is best-in-group, the see discussion below - this is
1875 a special variant of NBG && OBG. From OBG we can deduce
1876 that same_group(old_best) iff (old == old_best) */
1877 new->u.bgp.suppressed = 0;
1878 return (old == old_best);
1879 }
1880 }
1881
1882 /* The default case - find a new best-in-group route */
1883 r = new; /* new may not be in the list */
1884 for (s=net->routes; rte_is_valid(s); s=s->next)
1885 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
1886 {
1887 s->u.bgp.suppressed = 1;
1888 if (!r || bgp_rte_better(s, r))
1889 r = s;
1890 }
1891
1892 /* Simple case - the last route in group disappears */
1893 if (!r)
1894 return 0;
1895
1896 /* Found best-in-group */
1897 r->u.bgp.suppressed = 0;
1898
1899 /*
1900 * There are generally two reasons why we have to force
1901 * recalculation (return 1): First, the new route may be wrongfully
1902 * chosen to be the best in the first case check in
1903 * rte_recalculate(), this may happen only if old_best is from the
1904 * same group. Second, another (different than new route)
1905 * best-in-group is chosen and that may be the proper best (although
1906 * rte_recalculate() without ignore that possibility).
1907 *
1908 * There are three possible cases according to whether the old route
1909 * was the best in group (OBG, stored in old_is_group_best) and
1910 * whether the new route is the best in group (NBG, tested by r == new).
1911 * These cases work even if old or new is NULL.
1912 *
1913 * NBG -> new is a possible candidate for the best route, so we just
1914 * check for the first reason using same_group().
1915 *
1916 * !NBG && OBG -> Second reason applies, return 1
1917 *
1918 * !NBG && !OBG -> Best in group does not change, old != old_best,
1919 * rte_better(new, old_best) is false and therefore
1920 * the first reason does not apply, return 0
1921 */
1922
1923 if (r == new)
1924 return old_best && same_group(old_best, lpref, lasn);
1925 else
1926 return old_is_group_best;
1927 }
1928
1929
1930 /*
1931 * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
1932 */
1933 static void
1934 bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
1935 {
1936 eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
1937 eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
1938 eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
1939 eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
1940
1941 /* First, unset AS4_* attributes */
1942 if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
1943 if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
1944
1945 /* Handle AGGREGATOR attribute */
1946 if (a2 && a4)
1947 {
1948 u32 a2_asn = get_u32(a2->u.ptr->data);
1949
1950 /* If routes were aggregated by an old router, then AS4_PATH and
1951 AS4_AGGREGATOR are invalid. In that case we give up. */
1952 if (a2_asn != AS_TRANS)
1953 return;
1954
1955 /* Use AS4_AGGREGATOR instead of AGGREGATOR */
1956 a2->u.ptr = a4->u.ptr;
1957 }
1958
1959 /* Handle AS_PATH attribute */
1960 if (p2 && p4)
1961 {
1962 /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
1963 int p2_len = as_path_getlen(p2->u.ptr);
1964 int p4_len = as_path_getlen(p4->u.ptr);
1965
1966 /* AS_PATH is too short, give up */
1967 if (p2_len < p4_len)
1968 return;
1969
1970 /* Merge AS_PATH and AS4_PATH */
1971 as_path_cut(p2->u.ptr, p2_len - p4_len);
1972 p2->u.ptr = as_path_merge(pool, p2->u.ptr, p4->u.ptr);
1973 }
1974 }
1975
1976 int
1977 bgp_get_attr(eattr *a, byte *buf, int buflen)
1978 {
1979 uint i = EA_ID(a->id);
1980 const struct bgp_attr_desc *d;
1981 int len;
1982
1983 if (bgp_attr_known(i))
1984 {
1985 d = &bgp_attr_table[i];
1986 len = bsprintf(buf, "%s", d->name);
1987 buf += len;
1988 if (d->format)
1989 {
1990 *buf++ = ':';
1991 *buf++ = ' ';
1992 d->format(a, buf, buflen - len - 2);
1993 return GA_FULL;
1994 }
1995 return GA_NAME;
1996 }
1997
1998 bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
1999 return GA_NAME;
2000 }
2001
2002 void
2003 bgp_get_route_info(rte *e, byte *buf)
2004 {
2005 eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2006 eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2007 u32 origas;
2008
2009 buf += bsprintf(buf, " (%d", e->pref);
2010
2011 if (e->u.bgp.suppressed)
2012 buf += bsprintf(buf, "-");
2013
2014 if (e->attrs->hostentry)
2015 {
2016 if (!rte_resolvable(e))
2017 buf += bsprintf(buf, "/-");
2018 else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
2019 buf += bsprintf(buf, "/?");
2020 else
2021 buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
2022 }
2023 buf += bsprintf(buf, ") [");
2024
2025 if (p && as_path_get_last(p->u.ptr, &origas))
2026 buf += bsprintf(buf, "AS%u", origas);
2027 if (o)
2028 buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
2029 strcpy(buf, "]");
2030 }