]> git.ipfire.org Git - thirdparty/bird.git/blame - proto/bgp/attrs.c
Filter: macro for recursive interpretation of instructions
[thirdparty/bird.git] / proto / bgp / attrs.c
CommitLineData
c01e3741
MM
1/*
2 * BIRD -- BGP Attributes
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
d15b0b0a
OZ
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
c01e3741
MM
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
85368cd4 11#undef LOCAL_DEBUG
c00d31be 12
e3558ab1
MM
13#include <stdlib.h>
14
c01e3741
MM
15#include "nest/bird.h"
16#include "nest/iface.h"
17#include "nest/protocol.h"
18#include "nest/route.h"
c0668f36 19#include "nest/attrs.h"
c01e3741 20#include "conf/conf.h"
c00d31be
MM
21#include "lib/resource.h"
22#include "lib/string.h"
23#include "lib/unaligned.h"
c01e3741
MM
24
25#include "bgp.h"
c00d31be 26
06fb60c4
OZ
27/*
28 * UPDATE message error handling
29 *
30 * All checks from RFC 4271 6.3 are done as specified with these exceptions:
31 * - The semantic check of an IP address from NEXT_HOP attribute is missing.
32 * - Checks of some optional attribute values are missing.
33 * - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
34 * are probably inadequate.
35 *
36 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
37 * 4271 does not explicitly specifiy the behavior in that case.
38 *
39 * Loop detection related to route reflection (based on ORIGINATOR_ID
40 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
41 * specifies that such updates should be ignored, but that is generally
42 * a bad idea.
43 *
d15b0b0a
OZ
44 * BGP attribute table has several hooks:
45 *
46 * export - Hook that validates and normalizes attribute during export phase.
47 * Receives eattr, may modify it (e.g., sort community lists for canonical
48 * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
49 * necessary. May assume that eattr has value valid w.r.t. its type, but may be
50 * invalid w.r.t. BGP constraints. Optional.
51 *
52 * encode - Hook that converts internal representation to external one during
53 * packet writing. Receives eattr and puts it in the buffer (including attribute
54 * header). Returns number of bytes, or -1 if not enough space. May assume that
55 * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
56 * for all known attributes that exist internally after export phase (i.e., all
57 * except pseudoattributes MP_(UN)REACH_NLRI).
58 *
59 * decode - Hook that converts external representation to internal one during
60 * packet parsing. Receives attribute data in buffer, validates it and adds
61 * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
62 * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
63 *
64 * format - Optional hook that converts eattr to textual representation.
06fb60c4
OZ
65 */
66
ae8f5584 67
d15b0b0a
OZ
68struct bgp_attr_desc {
69 const char *name;
70 uint type;
71 uint flags;
72 void (*export)(struct bgp_export_state *s, eattr *a);
73 int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
74 void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
75 void (*format)(eattr *ea, byte *buf, uint size);
ae8f5584
MM
76};
77
d15b0b0a
OZ
78static const struct bgp_attr_desc bgp_attr_table[];
79
80static inline int bgp_attr_known(uint code);
81
82eattr *
83bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
84{
85 ASSERT(bgp_attr_known(code));
86
87 ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
88 eattr *e = &a->attrs[0];
89
90 a->flags = EALF_SORTED;
91 a->count = 1;
92 a->next = *attrs;
93 *attrs = a;
94
95 e->id = EA_CODE(EAP_BGP, code);
96 e->type = bgp_attr_table[code].type;
97 e->flags = flags;
98
99 if (e->type & EAF_EMBEDDED)
100 e->u.data = (u32) val;
101 else
102 e->u.ptr = (struct adata *) val;
103
104 return e;
105}
106
107
108
109#define REPORT(msg, args...) \
110 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
111
112#define DISCARD(msg, args...) \
113 ({ REPORT(msg, ## args); return; })
114
115#define WITHDRAW(msg, args...) \
116 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
117
118#define UNSET(a) \
119 ({ a->type = EAF_TYPE_UNDEF; return; })
120
121#define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
122#define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
123#define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
124#define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
125#define NO_MANDATORY "Missing mandatory %s attribute"
126
127
128static inline int
129bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
130{
131 *buf++ = flags;
132 *buf++ = code;
133 *buf++ = len;
134 return 3;
135}
136
137static inline int
138bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
139{
140 *buf++ = flags | BAF_EXT_LEN;
141 *buf++ = code;
142 put_u16(buf, len);
143 return 4;
144}
145
146static inline int
147bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
148{
149 if (len < 256)
150 return bgp_put_attr_hdr3(buf, code, flags, len);
151 else
152 return bgp_put_attr_hdr4(buf, code, flags, len);
153}
06fb60c4 154
f421cfdd 155static int
d15b0b0a 156bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
f421cfdd 157{
d15b0b0a
OZ
158 if (size < (3+1))
159 return -1;
160
161 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
162 buf[3] = a->u.data;
163
164 return 3+1;
f421cfdd
MM
165}
166
d15b0b0a
OZ
167static int
168bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
f421cfdd 169{
d15b0b0a
OZ
170 if (size < (3+4))
171 return -1;
172
173 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
174 put_u32(buf+3, a->u.data);
f421cfdd 175
d15b0b0a 176 return 3+4;
f421cfdd
MM
177}
178
179static int
d15b0b0a 180bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
f421cfdd 181{
d15b0b0a 182 uint len = a->u.ptr->length;
29c430f8 183
d15b0b0a
OZ
184 if (size < (4+len))
185 return -1;
29c430f8 186
d15b0b0a
OZ
187 uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
188 put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
189
190 return hdr + len;
f421cfdd
MM
191}
192
11cb6202 193static int
d15b0b0a 194bgp_put_attr(byte *buf, uint size, uint code, uint flags, byte *data, uint len)
29c430f8 195{
d15b0b0a
OZ
196 if (size < (4+len))
197 return -1;
29c430f8 198
d15b0b0a
OZ
199 uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
200 memcpy(buf + hdr, data, len);
29c430f8 201
d15b0b0a
OZ
202 return hdr + len;
203}
204
205static int
206bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
207{
208 return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
209}
210
211
212/*
213 * Attribute hooks
214 */
29c430f8 215
d15b0b0a
OZ
216static void
217bgp_export_origin(struct bgp_export_state *s, eattr *a)
218{
219 if (a->u.data > 2)
220 WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
29c430f8
OZ
221}
222
d15b0b0a
OZ
223static void
224bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
11cb6202 225{
d15b0b0a
OZ
226 if (len != 1)
227 WITHDRAW(BAD_LENGTH, "ORIGIN", len);
228
229 if (data[0] > 2)
230 WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
231
232 bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
29c430f8
OZ
233}
234
d15b0b0a
OZ
235static void
236bgp_format_origin(eattr *a, byte *buf, uint size UNUSED)
29c430f8 237{
d15b0b0a
OZ
238 static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
239
240 bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
11cb6202
OZ
241}
242
d15b0b0a 243
f421cfdd 244static int
d15b0b0a 245bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
f421cfdd 246{
d15b0b0a
OZ
247 byte *data = a->u.ptr->data;
248 uint len = a->u.ptr->length;
f421cfdd 249
d15b0b0a
OZ
250 if (!s->as4_session)
251 {
252 /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
253 byte *src = data;
254 data = alloca(len);
255 len = as_path_32to16(data, src, len);
256 }
257
258 return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
259}
260
261static void
262bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
263{
5509e17d
OZ
264 struct bgp_proto *p = s->proto;
265 int as_length = s->as4_session ? 4 : 2;
266 int as_confed = p->cf->confederation && p->is_interior;
d15b0b0a
OZ
267 char err[128];
268
5509e17d 269 if (!as_path_valid(data, len, as_length, as_confed, err, sizeof(err)))
d15b0b0a
OZ
270 WITHDRAW("Malformed AS_PATH attribute - %s", err);
271
5509e17d
OZ
272 /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
273 if (p->is_interior && !p->is_internal &&
274 ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
275 WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
276
d15b0b0a
OZ
277 if (!s->as4_session)
278 {
279 /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
280 byte *src = data;
281 data = alloca(2*len);
282 len = as_path_16to32(data, src, len);
283 }
284
285 bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
286}
287
288
289static int
290bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
291{
292 /*
293 * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
294 * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
295 * store it and encode it later by AFI-specific hooks.
296 */
297
f3a8cf05 298 if ((s->channel->afi == BGP_AF_IPV4) && !s->channel->ext_next_hop)
d15b0b0a
OZ
299 {
300 ASSERT(a->u.ptr->length == sizeof(ip_addr));
301
302 if (size < (3+4))
303 return -1;
304
305 bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
306 put_ip4(buf+3, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
307
308 return 3+4;
309 }
310 else
311 {
312 s->mp_next_hop = a;
f421cfdd 313 return 0;
d15b0b0a
OZ
314 }
315}
316
317static void
318bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
319{
320 if (len != 4)
321 WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
322
323 /* Semantic checks are done later */
324 s->ip_next_hop_len = len;
325 s->ip_next_hop_data = data;
326}
327
328/* TODO: This function should use AF-specific hook */
329static void
330bgp_format_next_hop(eattr *a, byte *buf, uint size UNUSED)
331{
332 ip_addr *nh = (void *) a->u.ptr->data;
333 uint len = a->u.ptr->length;
334
335 ASSERT((len == 16) || (len == 32));
336
337 /* in IPv6, we may have two addresses in NEXT HOP */
338 if ((len == 16) || ipa_zero(nh[1]))
339 bsprintf(buf, "%I", nh[0]);
f421cfdd 340 else
d15b0b0a 341 bsprintf(buf, "%I %I", nh[0], nh[1]);
1c1da87b
MM
342}
343
d15b0b0a 344
d0e2d6d1 345static void
d15b0b0a 346bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
d0e2d6d1 347{
d15b0b0a
OZ
348 if (len != 4)
349 WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
d0e2d6d1 350
d15b0b0a
OZ
351 u32 val = get_u32(data);
352 bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
d0e2d6d1
OZ
353}
354
d15b0b0a
OZ
355
356static void
357bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
11cb6202 358{
e919601a 359 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
d15b0b0a 360 UNSET(a);
11cb6202
OZ
361}
362
cd17c651 363static void
d15b0b0a 364bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
cd17c651 365{
e919601a 366 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
d15b0b0a 367 DISCARD(BAD_EBGP, "LOCAL_PREF");
cd17c651 368
d15b0b0a
OZ
369 if (len != 4)
370 WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
cd17c651 371
d15b0b0a
OZ
372 u32 val = get_u32(data);
373 bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
cd17c651
OZ
374}
375
d15b0b0a
OZ
376
377static void
378bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
06fb60c4 379{
d15b0b0a
OZ
380 if (len != 0)
381 DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
382
383 bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
06fb60c4
OZ
384}
385
4847a894 386static int
d15b0b0a
OZ
387bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
388{
389 byte *data = a->u.ptr->data;
390 uint len = a->u.ptr->length;
391
392 if (!s->as4_session)
393 {
394 /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
395 byte *src = data;
396 data = alloca(6);
397 len = aggregator_32to16(data, src);
398 }
399
400 return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
401}
402
403static void
404bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
405{
406 if (len != (s->as4_session ? 8 : 6))
407 DISCARD(BAD_LENGTH, "AGGREGATOR", len);
408
409 if (!s->as4_session)
410 {
411 /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
412 byte *src = data;
413 data = alloca(8);
414 len = aggregator_16to32(data, src);
415 }
416
417 bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
418}
419
420static void
421bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED)
422{
423 byte *data = a->u.ptr->data;
424
425 bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
426}
427
428
429static void
430bgp_export_community(struct bgp_export_state *s, eattr *a)
431{
432 if (a->u.ptr->length == 0)
433 UNSET(a);
434
435 a->u.ptr = int_set_sort(s->pool, a->u.ptr);
436}
437
438static void
439bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
440{
441 if (!len || (len % 4))
442 WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
443
444 struct adata *ad = lp_alloc_adata(s->pool, len);
445 get_u32s(data, (u32 *) ad->data, len / 4);
446 bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
447}
448
449
450static void
451bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
4847a894 452{
d15b0b0a
OZ
453 if (!s->proto->is_internal)
454 UNSET(a);
4847a894
OZ
455}
456
aebe06b4 457static void
d15b0b0a
OZ
458bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
459{
460 if (!s->proto->is_internal)
461 DISCARD(BAD_EBGP, "ORIGINATOR_ID");
462
463 if (len != 4)
464 WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
465
466 u32 val = get_u32(data);
467 bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
468}
469
470
471static void
472bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
473{
474 if (!s->proto->is_internal)
475 UNSET(a);
476
477 if (a->u.ptr->length == 0)
478 UNSET(a);
479}
480
481static void
482bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
483{
484 if (!s->proto->is_internal)
485 DISCARD(BAD_EBGP, "CLUSTER_LIST");
486
487 if (!len || (len % 4))
488 WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
489
490 struct adata *ad = lp_alloc_adata(s->pool, len);
491 get_u32s(data, (u32 *) ad->data, len / 4);
492 bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
493}
494
495static void
496bgp_format_cluster_list(eattr *a, byte *buf, uint size)
aebe06b4 497{
fdf16eb6 498 /* Truncates cluster lists larger than buflen, probably not a problem */
d15b0b0a 499 int_set_format(a->u.ptr, 0, -1, buf, size);
aebe06b4
OZ
500}
501
d15b0b0a
OZ
502
503static inline u32
504get_af3(byte *buf)
1c1da87b 505{
d15b0b0a 506 return (get_u16(buf) << 16) | buf[2];
1c1da87b
MM
507}
508
d15b0b0a
OZ
509static void
510bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
1c1da87b 511{
d15b0b0a
OZ
512 /*
513 * 2 B MP_REACH_NLRI data - Address Family Identifier
514 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
515 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
516 * var MP_REACH_NLRI data - Network Address of Next Hop
517 * 1 B MP_REACH_NLRI data - Reserved (zero)
518 * var MP_REACH_NLRI data - Network Layer Reachability Information
519 */
520
521 if ((len < 5) || (len < (5 + (uint) data[3])))
522 bgp_parse_error(s, 9);
523
524 s->mp_reach_af = get_af3(data);
525 s->mp_next_hop_len = data[3];
526 s->mp_next_hop_data = data + 4;
527 s->mp_reach_len = len - 5 - s->mp_next_hop_len;
528 s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
f421cfdd
MM
529}
530
d15b0b0a
OZ
531
532static void
533bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
42a0c054 534{
d15b0b0a
OZ
535 /*
536 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
537 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
538 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
539 */
540
541 if (len < 3)
542 bgp_parse_error(s, 9);
543
544 s->mp_unreach_af = get_af3(data);
545 s->mp_unreach_len = len - 3;
546 s->mp_unreach_nlri = data + 3;
42a0c054
OZ
547}
548
d15b0b0a
OZ
549
550static void
551bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
552{
d807ea08
OZ
553 a->u.ptr = ec_set_del_nontrans(s->pool, a->u.ptr);
554
d15b0b0a
OZ
555 if (a->u.ptr->length == 0)
556 UNSET(a);
557
d807ea08 558 ec_set_sort_x(a->u.ptr);
d15b0b0a
OZ
559}
560
561static void
562bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
563{
564 if (!len || (len % 8))
565 WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
566
567 struct adata *ad = lp_alloc_adata(s->pool, len);
568 get_u32s(data, (u32 *) ad->data, len / 4);
569 bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
570}
571
572
573static void
574bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
575{
576 if (s->as4_session)
577 DISCARD(NEW_BGP, "AS4_AGGREGATOR");
578
579 if (len != 8)
580 DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
581
582 bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
583}
584
585static void
586bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
587{
588 char err[128];
589
590 if (s->as4_session)
591 DISCARD(NEW_BGP, "AS4_PATH");
592
593 if (len < 6)
594 DISCARD(BAD_LENGTH, "AS4_PATH", len);
595
5509e17d 596 if (!as_path_valid(data, len, 4, 1, err, sizeof(err)))
d15b0b0a
OZ
597 DISCARD("Malformed AS4_PATH attribute - %s", err);
598
5509e17d
OZ
599 struct adata *a = lp_alloc_adata(s->pool, len);
600 memcpy(a->data, data, len);
601
602 /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
603 if (as_path_contains_confed(a))
604 {
605 REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
606 a = as_path_strip_confed(s->pool, a);
607 }
608
609 bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
d15b0b0a
OZ
610}
611
612static void
613bgp_export_large_community(struct bgp_export_state *s, eattr *a)
614{
615 if (a->u.ptr->length == 0)
616 UNSET(a);
617
618 a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
619}
620
621static void
622bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
623{
624 if (!len || (len % 12))
625 WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
626
627 struct adata *ad = lp_alloc_adata(s->pool, len);
628 get_u32s(data, (u32 *) ad->data, len / 4);
629 bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
630}
631
1e37e35c
OZ
632static void
633bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
634{
635 net_addr *n = s->route->net->n.addr;
636 u32 *labels = (u32 *) a->u.ptr->data;
637 uint lnum = a->u.ptr->length / 4;
638
639 /* Perhaps we should just ignore it? */
640 if (!s->mpls)
641 WITHDRAW("Unexpected MPLS stack");
642
643 /* Empty MPLS stack is not allowed */
644 if (!lnum)
645 WITHDRAW("Malformed MPLS stack - empty");
646
647 /* This is ugly, but we must ensure that labels fit into NLRI field */
648 if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
649 WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
650
651 for (uint i = 0; i < lnum; i++)
652 {
653 if (labels[i] > 0xfffff)
654 WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
655
656 /* TODO: Check for special-purpose label values? */
657 }
658}
659
660static int
661bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
662{
663 /*
664 * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
665 * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
666 */
667
668 s->mpls_labels = a->u.ptr;
669 return 0;
670}
671
672static void
673bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
674{
675 DISCARD("Discarding received attribute #0");
676}
677
678static void
679bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size)
680{
681 u32 *labels = (u32 *) a->u.ptr->data;
682 uint lnum = a->u.ptr->length / 4;
683 char *pos = buf;
684
685 for (uint i = 0; i < lnum; i++)
686 {
687 if (size < 20)
688 {
689 bsprintf(pos, "...");
690 return;
691 }
692
693 uint l = bsprintf(pos, "%d/", labels[i]);
694 ADVANCE(pos, size, l);
695 }
696
697 /* Clear last slash or terminate empty string */
698 pos[lnum ? -1 : 0] = 0;
699}
700
d15b0b0a
OZ
701static inline void
702bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
703{
d493d0f1
OZ
704 /* Cannot use bgp_set_attr_data() as it works on known attributes only */
705 ea_set_attr_data(to, s->pool, EA_CODE(EAP_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
d15b0b0a
OZ
706}
707
708
709/*
710 * Attribute table
711 */
712
713static const struct bgp_attr_desc bgp_attr_table[] = {
714 [BA_ORIGIN] = {
715 .name = "origin",
716 .type = EAF_TYPE_INT,
717 .flags = BAF_TRANSITIVE,
718 .export = bgp_export_origin,
719 .encode = bgp_encode_u8,
720 .decode = bgp_decode_origin,
721 .format = bgp_format_origin,
722 },
723 [BA_AS_PATH] = {
724 .name = "as_path",
725 .type = EAF_TYPE_AS_PATH,
726 .flags = BAF_TRANSITIVE,
727 .encode = bgp_encode_as_path,
728 .decode = bgp_decode_as_path,
729 },
730 [BA_NEXT_HOP] = {
731 .name = "next_hop",
732 .type = EAF_TYPE_IP_ADDRESS,
733 .flags = BAF_TRANSITIVE,
734 .encode = bgp_encode_next_hop,
735 .decode = bgp_decode_next_hop,
736 .format = bgp_format_next_hop,
737 },
738 [BA_MULTI_EXIT_DISC] = {
739 .name = "med",
740 .type = EAF_TYPE_INT,
741 .flags = BAF_OPTIONAL,
742 .encode = bgp_encode_u32,
743 .decode = bgp_decode_med,
744 },
745 [BA_LOCAL_PREF] = {
746 .name = "local_pref",
747 .type = EAF_TYPE_INT,
748 .flags = BAF_TRANSITIVE,
749 .export = bgp_export_local_pref,
750 .encode = bgp_encode_u32,
751 .decode = bgp_decode_local_pref,
752 },
753 [BA_ATOMIC_AGGR] = {
754 .name = "atomic_aggr",
755 .type = EAF_TYPE_OPAQUE,
756 .flags = BAF_TRANSITIVE,
757 .encode = bgp_encode_raw,
758 .decode = bgp_decode_atomic_aggr,
759 },
760 [BA_AGGREGATOR] = {
761 .name = "aggregator",
762 .type = EAF_TYPE_OPAQUE,
763 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
764 .encode = bgp_encode_aggregator,
765 .decode = bgp_decode_aggregator,
766 .format = bgp_format_aggregator,
767 },
768 [BA_COMMUNITY] = {
769 .name = "community",
770 .type = EAF_TYPE_INT_SET,
771 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
772 .export = bgp_export_community,
773 .encode = bgp_encode_u32s,
774 .decode = bgp_decode_community,
775 },
776 [BA_ORIGINATOR_ID] = {
777 .name = "originator_id",
778 .type = EAF_TYPE_ROUTER_ID,
779 .flags = BAF_OPTIONAL,
780 .export = bgp_export_originator_id,
781 .encode = bgp_encode_u32,
782 .decode = bgp_decode_originator_id,
783 },
784 [BA_CLUSTER_LIST] = {
785 .name = "cluster_list",
786 .type = EAF_TYPE_INT_SET,
787 .flags = BAF_OPTIONAL,
788 .export = bgp_export_cluster_list,
789 .encode = bgp_encode_u32s,
790 .decode = bgp_decode_cluster_list,
791 .format = bgp_format_cluster_list,
792 },
793 [BA_MP_REACH_NLRI] = {
794 .name = "mp_reach_nlri",
795 .type = EAF_TYPE_OPAQUE,
796 .flags = BAF_OPTIONAL,
797 .decode = bgp_decode_mp_reach_nlri,
798 },
799 [BA_MP_UNREACH_NLRI] = {
800 .name = "mp_unreach_nlri",
801 .type = EAF_TYPE_OPAQUE,
802 .flags = BAF_OPTIONAL,
803 .decode = bgp_decode_mp_unreach_nlri,
804 },
805 [BA_EXT_COMMUNITY] = {
806 .name = "ext_community",
807 .type = EAF_TYPE_EC_SET,
808 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
809 .export = bgp_export_ext_community,
810 .encode = bgp_encode_u32s,
811 .decode = bgp_decode_ext_community,
812 },
813 [BA_AS4_PATH] = {
814 .name = "as4_path",
815 .type = EAF_TYPE_AS_PATH,
816 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
817 .encode = bgp_encode_raw,
818 .decode = bgp_decode_as4_path,
819 },
820 [BA_AS4_AGGREGATOR] = {
821 .name = "as4_aggregator",
822 .type = EAF_TYPE_OPAQUE,
823 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
824 .encode = bgp_encode_raw,
825 .decode = bgp_decode_as4_aggregator,
826 .format = bgp_format_aggregator,
827 },
828 [BA_LARGE_COMMUNITY] = {
829 .name = "large_community",
830 .type = EAF_TYPE_LC_SET,
831 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
832 .export = bgp_export_large_community,
833 .encode = bgp_encode_u32s,
834 .decode = bgp_decode_large_community,
835 },
1e37e35c
OZ
836 [BA_MPLS_LABEL_STACK] = {
837 .name = "mpls_label_stack",
838 .type = EAF_TYPE_INT_SET,
839 .export = bgp_export_mpls_label_stack,
840 .encode = bgp_encode_mpls_label_stack,
841 .decode = bgp_decode_mpls_label_stack,
842 .format = bgp_format_mpls_label_stack,
843 },
f421cfdd
MM
844};
845
d15b0b0a
OZ
846static inline int
847bgp_attr_known(uint code)
848{
849 return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
850}
851
852
853/*
854 * Attribute export
11cb6202
OZ
855 */
856
d15b0b0a
OZ
857static inline void
858bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
859{
860 if (EA_PROTO(a->id) != EAP_BGP)
861 return;
862
863 uint code = EA_ID(a->id);
864
865 if (bgp_attr_known(code))
866 {
867 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
868
869 /* The flags might have been zero if the attr was added by filters */
870 a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
871
872 /* Set partial bit if new opt-trans attribute is attached to non-local route */
873 if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
874 (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
875 a->flags |= BAF_PARTIAL;
d1a74339 876
d15b0b0a
OZ
877 /* Call specific hook */
878 CALL(desc->export, s, a);
879
880 /* Attribute might become undefined in hook */
881 if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
882 return;
883 }
884 else
885 {
886 /* Don't re-export unknown non-transitive attributes */
887 if (!(a->flags & BAF_TRANSITIVE))
888 return;
889
890 a->flags |= BAF_PARTIAL;
891 }
892
893 /* Append updated attribute */
894 to->attrs[to->count++] = *a;
895}
896
897/**
898 * bgp_export_attrs - export BGP attributes
899 * @s: BGP export state
900 * @attrs: a list of extended attributes
901 *
902 * The bgp_export_attrs() function takes a list of attributes and merges it to
903 * one newly allocated and sorted segment. Attributes are validated and
904 * normalized by type-specific export hooks and attribute flags are updated.
905 * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
906 * empty community sets).
907 *
908 * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
909 */
910static inline ea_list *
911bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
4847a894 912{
d15b0b0a
OZ
913 /* Merge the attribute list */
914 ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
915 ea_merge(attrs, new);
916 ea_sort(new);
917
918 uint i, count;
919 count = new->count;
920 new->count = 0;
921
922 /* Export each attribute */
923 for (i = 0; i < count; i++)
924 bgp_export_attr(s, &new->attrs[i], new);
925
926 if (s->err_withdraw)
927 return NULL;
928
929 return new;
4847a894
OZ
930}
931
d15b0b0a
OZ
932
933/*
934 * Attribute encoding
935 */
936
937static inline int
938bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
cf3d6470 939{
d15b0b0a
OZ
940 ASSERT(EA_PROTO(a->id) == EAP_BGP);
941
942 uint code = EA_ID(a->id);
943
944 if (bgp_attr_known(code))
945 return bgp_attr_table[code].encode(s, a, buf, size);
cf3d6470 946 else
d15b0b0a 947 return bgp_encode_raw(s, a, buf, size);
cf3d6470
MM
948}
949
d15b0b0a
OZ
950/**
951 * bgp_encode_attrs - encode BGP attributes
952 * @s: BGP write state
953 * @attrs: a list of extended attributes
954 * @buf: buffer
955 * @end: buffer end
956 *
957 * The bgp_encode_attrs() function takes a list of extended attributes
958 * and converts it to its BGP representation (a part of an Update message).
959 *
960 * Result: Length of the attribute block generated or -1 if not enough space.
961 */
962int
963bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
4847a894 964{
d15b0b0a
OZ
965 byte *pos = buf;
966 int i, len;
967
968 for (i = 0; i < attrs->count; i++)
969 {
970 len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
971
972 if (len < 0)
973 return -1;
974
975 pos += len;
976 }
977
978 return pos - buf;
4847a894
OZ
979}
980
d15b0b0a
OZ
981
982/*
983 * Attribute decoding
984 */
985
986static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
987
988static inline int
989bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
cf3d6470 990{
d15b0b0a
OZ
991 eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
992 int num = p->cf->allow_local_as + 1;
993 return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
4847a894
OZ
994}
995
d15b0b0a
OZ
996static inline int
997bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
4847a894 998{
d15b0b0a
OZ
999 eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
1000 return (e && (e->u.data == p->local_id));
cf3d6470
MM
1001}
1002
d15b0b0a
OZ
1003static inline int
1004bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
11cb6202 1005{
d15b0b0a
OZ
1006 eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
1007 return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
1008}
11cb6202 1009
d15b0b0a
OZ
1010static inline void
1011bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
1012{
1013 /* Handle duplicate attributes; RFC 7606 3 (g) */
1014 if (BIT32_TEST(s->attrs_seen, code))
1015 {
1016 if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
1017 bgp_parse_error(s, 1);
1018 else
1019 DISCARD("Discarding duplicate attribute (code %u)", code);
1020 }
1021 BIT32_SET(s->attrs_seen, code);
11cb6202 1022
d15b0b0a
OZ
1023 if (bgp_attr_known(code))
1024 {
1025 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1026
1027 /* Handle conflicting flags; RFC 7606 3 (c) */
1028 if ((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
1029 WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
11cb6202 1030
d15b0b0a
OZ
1031 desc->decode(s, code, flags, data, len, to);
1032 }
1033 else /* Unknown attribute */
1034 {
1035 if (!(flags & BAF_OPTIONAL))
1036 WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
1037
1038 bgp_decode_unknown(s, code, flags, data, len, to);
1039 }
11cb6202
OZ
1040}
1041
d15b0b0a
OZ
1042/**
1043 * bgp_decode_attrs - check and decode BGP attributes
1044 * @s: BGP parse state
1045 * @data: start of attribute block
1046 * @len: length of attribute block
1047 *
1048 * This function takes a BGP attribute block (a part of an Update message), checks
1049 * its consistency and converts it to a list of BIRD route attributes represented
1050 * by an (uncached) &rta.
1051 */
1052ea_list *
1053bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
11cb6202 1054{
d15b0b0a
OZ
1055 struct bgp_proto *p = s->proto;
1056 ea_list *attrs = NULL;
1057 uint code, flags, alen;
1058 byte *pos = data;
11cb6202 1059
d15b0b0a
OZ
1060 /* Parse the attributes */
1061 while (len)
1062 {
1063 alen = 0;
1064
1065 /* Read attribute type */
1066 if (len < 2)
1067 goto framing_error;
1068 flags = pos[0];
1069 code = pos[1];
1070 ADVANCE(pos, len, 2);
1071
1072 /* Read attribute length */
1073 if (flags & BAF_EXT_LEN)
11cb6202 1074 {
d15b0b0a
OZ
1075 if (len < 2)
1076 goto framing_error;
1077 alen = get_u16(pos);
1078 ADVANCE(pos, len, 2);
11cb6202 1079 }
d15b0b0a
OZ
1080 else
1081 {
1082 if (len < 1)
1083 goto framing_error;
1084 alen = *pos;
1085 ADVANCE(pos, len, 1);
1086 }
1087
1088 if (alen > len)
1089 goto framing_error;
1090
1091 DBG("Attr %02x %02x %u\n", code, flags, alen);
1092
1093 bgp_decode_attr(s, code, flags, pos, alen, &attrs);
1094 ADVANCE(pos, len, alen);
1095 }
1096
1097 if (s->err_withdraw)
1098 goto withdraw;
1099
1100 /* If there is no reachability NLRI, we are finished */
1101 if (!s->ip_reach_len && !s->mp_reach_len)
1102 return NULL;
1103
1104
1105 /* Handle missing mandatory attributes; RFC 7606 3 (d) */
1106 if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
1107 { REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
1108
1109 if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
1110 { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
11cb6202 1111
d15b0b0a
OZ
1112 /* When receiving attributes from non-AS4-aware BGP speaker, we have to
1113 reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
1114 if (!p->as4_session)
1115 bgp_process_as4_attrs(&attrs, s->pool);
11cb6202 1116
d15b0b0a
OZ
1117 /* Reject routes with our ASN in AS_PATH attribute */
1118 if (bgp_as_path_loopy(p, attrs, p->local_as))
1119 goto withdraw;
11cb6202 1120
5509e17d 1121 /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
d15b0b0a
OZ
1122 if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
1123 goto withdraw;
11cb6202 1124
d15b0b0a
OZ
1125 /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
1126 if (p->is_internal && bgp_originator_id_loopy(p, attrs))
1127 goto withdraw;
11cb6202 1128
d15b0b0a
OZ
1129 /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
1130 if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
1131 goto withdraw;
11cb6202 1132
d15b0b0a
OZ
1133 /* If there is no local preference, define one */
1134 if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
1135 bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
11cb6202 1136
d15b0b0a 1137 return attrs;
f421cfdd 1138
11cb6202 1139
d15b0b0a
OZ
1140framing_error:
1141 /* RFC 7606 4 - handle attribute framing errors */
1142 REPORT("Malformed attribute list - framing error (%u/%u) at %d",
1143 alen, len, (int) (pos - s->attrs));
ae8f5584 1144
d15b0b0a
OZ
1145withdraw:
1146 /* RFC 7606 5.2 - handle missing NLRI during errors */
1147 if (!s->ip_reach_len && !s->mp_reach_len)
1148 bgp_parse_error(s, 1);
c2b28c99 1149
d15b0b0a
OZ
1150 s->err_withdraw = 1;
1151 return NULL;
e3558ab1
MM
1152}
1153
ae8f5584 1154
d15b0b0a
OZ
1155/*
1156 * Route bucket hash table
1157 */
42a0c054 1158
d15b0b0a
OZ
1159#define RBH_KEY(b) b->eattrs, b->hash
1160#define RBH_NEXT(b) b->next
1161#define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
1162#define RBH_FN(a,h) h
42a0c054 1163
d15b0b0a
OZ
1164#define RBH_REHASH bgp_rbh_rehash
1165#define RBH_PARAMS /8, *2, 2, 2, 8, 20
42a0c054 1166
42a0c054 1167
d15b0b0a 1168HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
66dbdbd9 1169
d15b0b0a
OZ
1170void
1171bgp_init_bucket_table(struct bgp_channel *c)
66dbdbd9 1172{
d15b0b0a 1173 HASH_INIT(c->bucket_hash, c->pool, 8);
66dbdbd9 1174
d15b0b0a
OZ
1175 init_list(&c->bucket_queue);
1176 c->withdraw_bucket = NULL;
ae8f5584
MM
1177}
1178
7fc55925
OZ
1179void
1180bgp_free_bucket_table(struct bgp_channel *c)
1181{
1182 HASH_FREE(c->bucket_hash);
1183
1184 struct bgp_bucket *b;
1185 WALK_LIST_FIRST(b, c->bucket_queue)
1186 {
1187 rem_node(&b->send_node);
1188 mb_free(b);
1189 }
1190
1191 mb_free(c->withdraw_bucket);
1192 c->withdraw_bucket = NULL;
1193}
1194
ae8f5584 1195static struct bgp_bucket *
d15b0b0a 1196bgp_get_bucket(struct bgp_channel *c, ea_list *new)
ae8f5584 1197{
d15b0b0a
OZ
1198 /* Hash and lookup */
1199 u32 hash = ea_hash(new);
1200 struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
1201
1202 if (b)
1203 return b;
1204
1205 uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
1206 uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
1207 uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
1208 uint i;
ae8f5584 1209 byte *dest;
ae8f5584
MM
1210
1211 /* Gather total size of non-inline attributes */
d15b0b0a
OZ
1212 for (i = 0; i < new->count; i++)
1213 {
1214 eattr *a = &new->attrs[i];
ae8f5584 1215
d15b0b0a
OZ
1216 if (!(a->type & EAF_EMBEDDED))
1217 size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
1218 }
1219
1220 /* Create the bucket */
1221 b = mb_alloc(c->pool, size);
f421cfdd 1222 init_list(&b->prefixes);
d15b0b0a
OZ
1223 b->hash = hash;
1224
1225 /* Copy list of extended attributes */
ae8f5584 1226 memcpy(b->eattrs, new, ea_size);
d15b0b0a 1227 dest = ((byte *) b->eattrs) + ea_size_aligned;
ae8f5584
MM
1228
1229 /* Copy values of non-inline attributes */
d15b0b0a
OZ
1230 for (i = 0; i < new->count; i++)
1231 {
1232 eattr *a = &b->eattrs->attrs[i];
1233
1234 if (!(a->type & EAF_EMBEDDED))
ae8f5584 1235 {
d15b0b0a
OZ
1236 struct adata *oa = a->u.ptr;
1237 struct adata *na = (struct adata *) dest;
1238 memcpy(na, oa, sizeof(struct adata) + oa->length);
1239 a->u.ptr = na;
1240 dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
ae8f5584 1241 }
d15b0b0a 1242 }
ae8f5584 1243
d15b0b0a
OZ
1244 /* Insert the bucket to send queue and bucket hash */
1245 add_tail(&c->bucket_queue, &b->send_node);
1246 HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
ae8f5584
MM
1247
1248 return b;
1249}
1250
1251static struct bgp_bucket *
d15b0b0a 1252bgp_get_withdraw_bucket(struct bgp_channel *c)
ae8f5584 1253{
d15b0b0a
OZ
1254 if (!c->withdraw_bucket)
1255 {
1256 c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
1257 init_list(&c->withdraw_bucket->prefixes);
1258 }
ae8f5584 1259
d15b0b0a
OZ
1260 return c->withdraw_bucket;
1261}
ae8f5584 1262
d15b0b0a
OZ
1263void
1264bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1265{
1266 rem_node(&b->send_node);
1267 HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
1268 mb_free(b);
1269}
f421cfdd 1270
d15b0b0a
OZ
1271void
1272bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1273{
1274 rem_node(&b->send_node);
1275 add_tail(&c->bucket_queue, &b->send_node);
ae8f5584
MM
1276}
1277
f421cfdd 1278void
d15b0b0a 1279bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
f421cfdd 1280{
d15b0b0a
OZ
1281 struct bgp_proto *p = (void *) c->c.proto;
1282 struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
1283
1284 log(L_ERR "%s: Attribute list too long", p->p.name);
1285 while (!EMPTY_LIST(b->prefixes))
1286 {
1287 struct bgp_prefix *px = HEAD(b->prefixes);
1288
1289 log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
1290 rem_node(&px->buck_node);
1291 add_tail(&wb->prefixes, &px->buck_node);
1292 }
f421cfdd
MM
1293}
1294
094d2bdb 1295
d15b0b0a
OZ
1296/*
1297 * Prefix hash table
1298 */
094d2bdb 1299
d15b0b0a
OZ
1300#define PXH_KEY(px) px->net, px->path_id, px->hash
1301#define PXH_NEXT(px) px->next
1302#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
1303#define PXH_FN(n,i,h) h
e7d2ac44
OZ
1304
1305#define PXH_REHASH bgp_pxh_rehash
1306#define PXH_PARAMS /8, *2, 2, 2, 8, 20
1307
094d2bdb 1308
e7d2ac44 1309HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
094d2bdb
OZ
1310
1311void
d15b0b0a 1312bgp_init_prefix_table(struct bgp_channel *c)
094d2bdb 1313{
d15b0b0a 1314 HASH_INIT(c->prefix_hash, c->pool, 8);
094d2bdb 1315
ac3ad139
OZ
1316 uint alen = net_addr_length[c->c.net_type];
1317 c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
094d2bdb
OZ
1318}
1319
ed1a908e 1320void
c259669f 1321bgp_free_prefix_table(struct bgp_channel *c)
ed1a908e 1322{
c259669f 1323 HASH_FREE(c->prefix_hash);
ed1a908e 1324
c259669f
OZ
1325 rfree(c->prefix_slab);
1326 c->prefix_slab = NULL;
094d2bdb
OZ
1327}
1328
1329static struct bgp_prefix *
d15b0b0a 1330bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
094d2bdb 1331{
d15b0b0a
OZ
1332 u32 hash = net_hash(net) ^ u32_hash(path_id);
1333 struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
094d2bdb 1334
d15b0b0a
OZ
1335 if (px)
1336 {
1337 rem_node(&px->buck_node);
1338 return px;
1339 }
094d2bdb 1340
ac3ad139
OZ
1341 if (c->prefix_slab)
1342 px = sl_alloc(c->prefix_slab);
1343 else
1344 px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
1345
d15b0b0a
OZ
1346 px->buck_node.next = NULL;
1347 px->buck_node.prev = NULL;
1348 px->hash = hash;
1349 px->path_id = path_id;
1350 net_copy(px->net, net);
094d2bdb 1351
d15b0b0a 1352 HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
094d2bdb 1353
d15b0b0a 1354 return px;
094d2bdb
OZ
1355}
1356
1357void
d15b0b0a 1358bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
094d2bdb 1359{
d15b0b0a
OZ
1360 rem_node(&px->buck_node);
1361 HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
ac3ad139
OZ
1362
1363 if (c->prefix_slab)
1364 sl_free(c->prefix_slab, px);
1365 else
1366 mb_free(px);
094d2bdb
OZ
1367}
1368
1369
d15b0b0a
OZ
1370/*
1371 * BGP protocol glue
1372 */
ef2c708d 1373
d15b0b0a
OZ
1374int
1375bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED)
ef2c708d 1376{
d15b0b0a
OZ
1377 rte *e = *new;
1378 struct proto *SRC = e->attrs->src->proto;
1379 struct bgp_proto *p = (struct bgp_proto *) P;
1380 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
ef2c708d 1381
d15b0b0a
OZ
1382 /* Reject our routes */
1383 if (src == p)
1384 return -1;
4847a894 1385
d15b0b0a
OZ
1386 /* Accept non-BGP routes */
1387 if (src == NULL)
1388 return 0;
4847a894 1389
d8022d26
OZ
1390 // XXXX: Check next hop AF
1391
d15b0b0a
OZ
1392 /* IBGP route reflection, RFC 4456 */
1393 if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
1394 {
1395 /* Rejected unless configured as route reflector */
1396 if (!p->rr_client && !src->rr_client)
1397 return -1;
1398
1399 /* Generally, this should be handled when path is received, but we check it
1400 also here as rr_cluster_id may be undefined or different in src. */
1401 if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
1402 return -1;
1403 }
1404
1405 /* Handle well-known communities, RFC 1997 */
1406 struct eattr *c;
1407 if (p->cf->interpret_communities &&
1408 (c = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY))))
1409 {
1410 struct adata *d = c->u.ptr;
1411
1412 /* Do not export anywhere */
1413 if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
1414 return -1;
1415
1416 /* Do not export outside of AS (or member-AS) */
1417 if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
1418 return -1;
1419
1420 /* Do not export outside of AS (or confederation) */
1421 if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
1422 return -1;
1423 }
4847a894 1424
d15b0b0a 1425 return 0;
4847a894
OZ
1426}
1427
1428
5509e17d 1429static adata null_adata; /* adata of length 0 */
ef2c708d 1430
d15b0b0a 1431static ea_list *
82f42ea0 1432bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
ef2c708d 1433{
d15b0b0a
OZ
1434 struct proto *SRC = e->attrs->src->proto;
1435 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
1e37e35c 1436 struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
82f42ea0 1437 ea_list *attrs = attrs0;
48e842cc 1438 eattr *a;
82f42ea0 1439 adata *ad;
48e842cc 1440
d15b0b0a 1441 /* ORIGIN attribute - mandatory, attach if missing */
82f42ea0 1442 if (! bgp_find_attr(attrs0, BA_ORIGIN))
d15b0b0a
OZ
1443 bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
1444
82f42ea0
OZ
1445 /* AS_PATH attribute - mandatory */
1446 a = bgp_find_attr(attrs0, BA_AS_PATH);
1447 ad = a ? a->u.ptr : &null_adata;
5509e17d
OZ
1448
1449 /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
1450 if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
1451 ad = as_path_strip_confed(pool, ad);
1452
d15b0b0a
OZ
1453 /* AS_PATH attribute - keep or prepend ASN */
1454 if (p->is_internal ||
1455 (p->rs_client && src && src->rs_client))
1456 {
1457 /* IBGP or route server -> just ensure there is one */
5509e17d
OZ
1458 if (!a)
1459 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
d15b0b0a
OZ
1460 }
1461 else if (p->is_interior)
1462 {
5509e17d
OZ
1463 /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
1464 ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
1465 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
d15b0b0a
OZ
1466 }
1467 else /* Regular EBGP (no RS, no confederation) */
1468 {
5509e17d
OZ
1469 /* Regular EBGP -> prepend ASN as regular sequence */
1470 ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
1471 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
d15b0b0a
OZ
1472
1473 /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
82f42ea0 1474 a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
d15b0b0a
OZ
1475 if (a && !(a->type & EAF_FRESH))
1476 bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
1477 }
1478
1479 /* NEXT_HOP attribute - delegated to AF-specific hook */
82f42ea0 1480 a = bgp_find_attr(attrs0, BA_NEXT_HOP);
d15b0b0a
OZ
1481 bgp_update_next_hop(&s, a, &attrs);
1482
1483 /* LOCAL_PREF attribute - required for IBGP, attach if missing */
82f42ea0 1484 if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
d15b0b0a
OZ
1485 bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1486
1487 /* IBGP route reflection, RFC 4456 */
1488 if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
1489 {
1490 /* ORIGINATOR_ID attribute - attach if not already set */
82f42ea0 1491 if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
d15b0b0a
OZ
1492 bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
1493
1494 /* CLUSTER_LIST attribute - prepend cluster ID */
82f42ea0
OZ
1495 a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
1496 ad = a ? a->u.ptr : NULL;
1497
1498 /* Prepend src cluster ID */
d15b0b0a 1499 if (src->rr_cluster_id)
c259669f 1500 ad = int_set_prepend(pool, ad, src->rr_cluster_id);
d15b0b0a 1501
82f42ea0 1502 /* Prepend dst cluster ID if src and dst clusters are different */
d15b0b0a 1503 if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
c259669f 1504 ad = int_set_prepend(pool, ad, p->rr_cluster_id);
82f42ea0
OZ
1505
1506 /* Should be at least one prepended cluster ID */
1507 bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
d15b0b0a
OZ
1508 }
1509
1510 /* AS4_* transition attributes, RFC 6793 4.2.2 */
1511 if (! p->as4_session)
1512 {
1513 a = bgp_find_attr(attrs, BA_AS_PATH);
1514 if (a && as_path_contains_as4(a->u.ptr))
48e842cc 1515 {
d15b0b0a
OZ
1516 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
1517 bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
4847a894
OZ
1518 }
1519
d15b0b0a
OZ
1520 a = bgp_find_attr(attrs, BA_AGGREGATOR);
1521 if (a && aggregator_contains_as4(a->u.ptr))
4847a894 1522 {
d15b0b0a
OZ
1523 bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
1524 bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
48e842cc 1525 }
d15b0b0a 1526 }
ef2c708d 1527
82f42ea0
OZ
1528 /*
1529 * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
1530 * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
1531 * should be checked in AF-specific hooks.
1532 */
1533
d15b0b0a
OZ
1534 /* Apply per-attribute export hooks for validatation and normalization */
1535 return bgp_export_attrs(&s, attrs);
ef2c708d
MM
1536}
1537
d15b0b0a
OZ
1538void
1539bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea_list *attrs)
6cb8f742 1540{
d15b0b0a
OZ
1541 struct bgp_proto *p = (void *) P;
1542 struct bgp_channel *c = (void *) C;
1543 struct bgp_bucket *buck;
1544 struct bgp_prefix *px;
1545 u32 path;
6cb8f742 1546
d15b0b0a
OZ
1547 if (new)
1548 {
1e37e35c 1549 attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool2);
6cb8f742 1550
d15b0b0a
OZ
1551 /* If attributes are invalid, we fail back to withdraw */
1552 buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
1553 path = new->attrs->src->global_id;
ef2c708d 1554
1e37e35c 1555 lp_flush(bgp_linpool2);
d15b0b0a 1556 }
ef2c708d 1557 else
d15b0b0a
OZ
1558 {
1559 buck = bgp_get_withdraw_bucket(c);
1560 path = old->attrs->src->global_id;
1561 }
1562
1563 px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
1564 add_tail(&buck->prefixes, &px->buck_node);
1565
1566 bgp_schedule_packet(p->conn, c, PKT_UPDATE);
ef2c708d
MM
1567}
1568
d15b0b0a 1569
b6bf284a
OZ
1570static inline u32
1571bgp_get_neighbor(rte *r)
1572{
1573 eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1574 u32 as;
1575
5509e17d 1576 if (e && as_path_get_first_regular(e->u.ptr, &as))
b6bf284a 1577 return as;
5509e17d
OZ
1578
1579 /* If AS_PATH is not defined, we treat rte as locally originated */
1580 struct bgp_proto *p = (void *) r->attrs->src->proto;
1581 return p->cf->confederation ?: p->local_as;
b6bf284a
OZ
1582}
1583
7e95c05d
OZ
1584static inline int
1585rte_resolvable(rte *rt)
1586{
62e64905 1587 return rt->attrs->dest == RTD_UNICAST;
7e95c05d
OZ
1588}
1589
ef2c708d
MM
1590int
1591bgp_rte_better(rte *new, rte *old)
1592{
094d2bdb
OZ
1593 struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
1594 struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
56a2bed4
MM
1595 eattr *x, *y;
1596 u32 n, o;
ef2c708d 1597
be4cd99a
OZ
1598 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1599 n = new->u.bgp.suppressed;
1600 o = old->u.bgp.suppressed;
1601 if (n > o)
1602 return 0;
1603 if (n < o)
1604 return 1;
1605
ac3ac49a 1606 /* RFC 4271 9.1.2.1. Route resolvability test */
7e95c05d
OZ
1607 n = rte_resolvable(new);
1608 o = rte_resolvable(old);
ac3ac49a
OZ
1609 if (n > o)
1610 return 1;
1611 if (n < o)
1612 return 0;
1613
ef2c708d 1614 /* Start with local preferences */
56a2bed4
MM
1615 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
1616 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
1617 n = x ? x->u.data : new_bgp->cf->default_local_pref;
1618 o = y ? y->u.data : old_bgp->cf->default_local_pref;
1619 if (n > o)
1620 return 1;
1621 if (n < o)
1622 return 0;
1623
4847a894 1624 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
56a2bed4 1625 if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
d15b0b0a
OZ
1626 {
1627 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1628 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1629 n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1630 o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
1631 if (n < o)
1632 return 1;
1633 if (n > o)
1634 return 0;
1635 }
ef2c708d 1636
4847a894 1637 /* RFC 4271 9.1.2.2. b) Use origins */
56a2bed4
MM
1638 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
1639 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
cea63664
MM
1640 n = x ? x->u.data : ORIGIN_INCOMPLETE;
1641 o = y ? y->u.data : ORIGIN_INCOMPLETE;
56a2bed4
MM
1642 if (n < o)
1643 return 1;
1644 if (n > o)
1645 return 0;
1646
4847a894 1647 /* RFC 4271 9.1.2.2. c) Compare MED's */
be4cd99a
OZ
1648 /* Proper RFC 4271 path selection cannot be interpreted as finding
1649 * the best path in some ordering. It is implemented partially in
1650 * bgp_rte_recalculate() when deterministic_med option is
1651 * active. Without that option, the behavior is just an
1652 * approximation, which in specific situations may lead to
1653 * persistent routing loops, because it is nondeterministic - it
1654 * depends on the order in which routes appeared. But it is also the
1655 * same behavior as used by default in Cisco routers, so it is
1656 * probably not a big issue.
73272f04
OZ
1657 */
1658 if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
1659 (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
d15b0b0a
OZ
1660 {
1661 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
1662 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
1663 n = x ? x->u.data : new_bgp->cf->default_med;
1664 o = y ? y->u.data : old_bgp->cf->default_med;
1665 if (n < o)
1666 return 1;
1667 if (n > o)
1668 return 0;
1669 }
56a2bed4 1670
4847a894 1671 /* RFC 4271 9.1.2.2. d) Prefer external peers */
d15b0b0a 1672 if (new_bgp->is_interior > old_bgp->is_interior)
ef2c708d 1673 return 0;
d15b0b0a 1674 if (new_bgp->is_interior < old_bgp->is_interior)
ef2c708d 1675 return 1;
ef2c708d 1676
d1e146f2
OZ
1677 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1678 n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
1679 o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
1680 if (n < o)
1681 return 1;
1682 if (n > o)
1683 return 0;
4847a894 1684
4847a894 1685 /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
d15b0b0a 1686 /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
4847a894
OZ
1687 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
1688 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
1689 n = x ? x->u.data : new_bgp->remote_id;
1690 o = y ? y->u.data : old_bgp->remote_id;
3228c72c
OZ
1691
1692 /* RFC 5004 - prefer older routes */
1693 /* (if both are external and from different peer) */
1694 if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
1695 !new_bgp->is_internal && n != o)
1696 return 0;
1697
1698 /* rest of RFC 4271 9.1.2.2. f) */
4847a894
OZ
1699 if (n < o)
1700 return 1;
1701 if (n > o)
1702 return 0;
11cb6202 1703
3075824d
OZ
1704 /* RFC 4456 9. b) Compare cluster list lengths */
1705 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
1706 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
1707 n = x ? int_set_get_size(x->u.ptr) : 0;
1708 o = y ? int_set_get_size(y->u.ptr) : 0;
1709 if (n < o)
1710 return 1;
1711 if (n > o)
1712 return 0;
1713
4847a894
OZ
1714 /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
1715 return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
1716}
1717
be4cd99a 1718
8d9eef17
OZ
1719int
1720bgp_rte_mergable(rte *pri, rte *sec)
1721{
1722 struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
1723 struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
1724 eattr *x, *y;
1725 u32 p, s;
1726
1727 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1728 if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
1729 return 0;
1730
1731 /* RFC 4271 9.1.2.1. Route resolvability test */
1732 if (!rte_resolvable(sec))
1733 return 0;
1734
1735 /* Start with local preferences */
1736 x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
1737 y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
1738 p = x ? x->u.data : pri_bgp->cf->default_local_pref;
1739 s = y ? y->u.data : sec_bgp->cf->default_local_pref;
1740 if (p != s)
1741 return 0;
1742
1743 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1744 if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
d15b0b0a
OZ
1745 {
1746 x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1747 y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1748 p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1749 s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
8d9eef17 1750
d15b0b0a
OZ
1751 if (p != s)
1752 return 0;
8d9eef17 1753
d15b0b0a
OZ
1754// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
1755// return 0;
1756 }
8d9eef17
OZ
1757
1758 /* RFC 4271 9.1.2.2. b) Use origins */
1759 x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
1760 y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
1761 p = x ? x->u.data : ORIGIN_INCOMPLETE;
1762 s = y ? y->u.data : ORIGIN_INCOMPLETE;
1763 if (p != s)
1764 return 0;
1765
1766 /* RFC 4271 9.1.2.2. c) Compare MED's */
1767 if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
1768 (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
d15b0b0a
OZ
1769 {
1770 x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
1771 y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
1772 p = x ? x->u.data : pri_bgp->cf->default_med;
1773 s = y ? y->u.data : sec_bgp->cf->default_med;
1774 if (p != s)
1775 return 0;
1776 }
8d9eef17
OZ
1777
1778 /* RFC 4271 9.1.2.2. d) Prefer external peers */
5509e17d 1779 if (pri_bgp->is_interior != sec_bgp->is_interior)
8d9eef17
OZ
1780 return 0;
1781
1782 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1783 p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
1784 s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
1785 if (p != s)
1786 return 0;
1787
1788 /* Remaining criteria are ignored */
1789
1790 return 1;
1791}
1792
1793
be4cd99a
OZ
1794static inline int
1795same_group(rte *r, u32 lpref, u32 lasn)
1796{
1797 return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
1798}
1799
1800static inline int
1801use_deterministic_med(rte *r)
1802{
094d2bdb 1803 struct proto *P = r->attrs->src->proto;
26822d8f 1804 return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
be4cd99a
OZ
1805}
1806
1807int
1808bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
1809{
1810 rte *r, *s;
1811 rte *key = new ? new : old;
1812 u32 lpref = key->pref;
1813 u32 lasn = bgp_get_neighbor(key);
1814 int old_is_group_best = 0;
1815
1816 /*
1817 * Proper RFC 4271 path selection is a bit complicated, it cannot be
1818 * implemented just by rte_better(), because it is not a linear
1819 * ordering. But it can be splitted to two levels, where the lower
1820 * level chooses the best routes in each group of routes from the
1821 * same neighboring AS and higher level chooses the best route (with
1822 * a slightly different ordering) between the best-in-group routes.
1823 *
1824 * When deterministic_med is disabled, we just ignore this issue and
1825 * choose the best route by bgp_rte_better() alone. If enabled, the
1826 * lower level of the route selection is done here (for the group
1827 * to which the changed route belongs), all routes in group are
1828 * marked as suppressed, just chosen best-in-group is not.
1829 *
1830 * Global best route selection then implements higher level by
1831 * choosing between non-suppressed routes (as they are always
1832 * preferred over suppressed routes). Routes from BGP protocols
1833 * that do not set deterministic_med are just never suppressed. As
1834 * they do not participate in the lower level selection, it is OK
1835 * that this fn is not called for them.
1836 *
1837 * The idea is simple, the implementation is more problematic,
d15b0b0a 1838 * mostly because of optimizations in rte_recalculate() that
be4cd99a
OZ
1839 * avoids full recalculation in most cases.
1840 *
1841 * We can assume that at least one of new, old is non-NULL and both
1842 * are from the same protocol with enabled deterministic_med. We
1843 * group routes by both neighbor AS (lasn) and preference (lpref),
1844 * because bgp_rte_better() does not handle preference itself.
1845 */
1846
1847 /* If new and old are from different groups, we just process that
1848 as two independent events */
1849 if (new && old && !same_group(old, lpref, lasn))
d15b0b0a
OZ
1850 {
1851 int i1, i2;
1852 i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
1853 i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
1854 return i1 || i2;
1855 }
be4cd99a 1856
d15b0b0a 1857 /*
be4cd99a
OZ
1858 * We could find the best-in-group and then make some shortcuts like
1859 * in rte_recalculate, but as we would have to walk through all
1860 * net->routes just to find it, it is probably not worth. So we
1861 * just have two simpler fast cases that use just the old route.
1862 * We also set suppressed flag to avoid using it in bgp_rte_better().
1863 */
1864
1865 if (new)
1866 new->u.bgp.suppressed = 1;
1867
1868 if (old)
d15b0b0a
OZ
1869 {
1870 old_is_group_best = !old->u.bgp.suppressed;
1871 old->u.bgp.suppressed = 1;
1872 int new_is_better = new && bgp_rte_better(new, old);
1873
1874 /* The first case - replace not best with worse (or remove not best) */
1875 if (!old_is_group_best && !new_is_better)
1876 return 0;
1877
1878 /* The second case - replace the best with better */
1879 if (old_is_group_best && new_is_better)
be4cd99a 1880 {
d15b0b0a
OZ
1881 /* new is best-in-group, the see discussion below - this is
1882 a special variant of NBG && OBG. From OBG we can deduce
1883 that same_group(old_best) iff (old == old_best) */
1884 new->u.bgp.suppressed = 0;
1885 return (old == old_best);
be4cd99a 1886 }
d15b0b0a 1887 }
be4cd99a
OZ
1888
1889 /* The default case - find a new best-in-group route */
1890 r = new; /* new may not be in the list */
cf98be7b 1891 for (s=net->routes; rte_is_valid(s); s=s->next)
be4cd99a 1892 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
d15b0b0a
OZ
1893 {
1894 s->u.bgp.suppressed = 1;
1895 if (!r || bgp_rte_better(s, r))
1896 r = s;
1897 }
be4cd99a
OZ
1898
1899 /* Simple case - the last route in group disappears */
1900 if (!r)
1901 return 0;
1902
1903 /* Found best-in-group */
1904 r->u.bgp.suppressed = 0;
1905
1906 /*
1907 * There are generally two reasons why we have to force
1908 * recalculation (return 1): First, the new route may be wrongfully
1909 * chosen to be the best in the first case check in
1910 * rte_recalculate(), this may happen only if old_best is from the
1911 * same group. Second, another (different than new route)
1912 * best-in-group is chosen and that may be the proper best (although
1913 * rte_recalculate() without ignore that possibility).
1914 *
1915 * There are three possible cases according to whether the old route
1916 * was the best in group (OBG, stored in old_is_group_best) and
1917 * whether the new route is the best in group (NBG, tested by r == new).
1918 * These cases work even if old or new is NULL.
1919 *
1920 * NBG -> new is a possible candidate for the best route, so we just
1921 * check for the first reason using same_group().
1922 *
1923 * !NBG && OBG -> Second reason applies, return 1
1924 *
1925 * !NBG && !OBG -> Best in group does not change, old != old_best,
1926 * rte_better(new, old_best) is false and therefore
1927 * the first reason does not apply, return 0
1928 */
1929
1930 if (r == new)
1931 return old_best && same_group(old_best, lpref, lasn);
1932 else
1933 return old_is_group_best;
1934}
1935
11cb6202 1936
d15b0b0a
OZ
1937/*
1938 * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
11cb6202 1939 */
11cb6202 1940static void
d15b0b0a 1941bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
11cb6202 1942{
d15b0b0a
OZ
1943 eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
1944 eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
1945 eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
1946 eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
11cb6202 1947
d15b0b0a
OZ
1948 /* First, unset AS4_* attributes */
1949 if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
1950 if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
c00d31be 1951
d15b0b0a
OZ
1952 /* Handle AGGREGATOR attribute */
1953 if (a2 && a4)
1954 {
1955 u32 a2_asn = get_u32(a2->u.ptr->data);
ef2c708d 1956
d15b0b0a
OZ
1957 /* If routes were aggregated by an old router, then AS4_PATH and
1958 AS4_AGGREGATOR are invalid. In that case we give up. */
1959 if (a2_asn != AS_TRANS)
1960 return;
f307842a 1961
d15b0b0a
OZ
1962 /* Use AS4_AGGREGATOR instead of AGGREGATOR */
1963 a2->u.ptr = a4->u.ptr;
1964 }
c00d31be 1965
d15b0b0a
OZ
1966 /* Handle AS_PATH attribute */
1967 if (p2 && p4)
1968 {
5509e17d 1969 /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
d15b0b0a
OZ
1970 int p2_len = as_path_getlen(p2->u.ptr);
1971 int p4_len = as_path_getlen(p4->u.ptr);
4847a894 1972
d15b0b0a
OZ
1973 /* AS_PATH is too short, give up */
1974 if (p2_len < p4_len)
1975 return;
c00d31be 1976
d15b0b0a
OZ
1977 /* Merge AS_PATH and AS4_PATH */
1978 as_path_cut(p2->u.ptr, p2_len - p4_len);
1979 p2->u.ptr = as_path_merge(pool, p2->u.ptr, p4->u.ptr);
1980 }
c00d31be 1981}
10be74da
MM
1982
1983int
aebe06b4 1984bgp_get_attr(eattr *a, byte *buf, int buflen)
10be74da 1985{
ae80a2de 1986 uint i = EA_ID(a->id);
d15b0b0a 1987 const struct bgp_attr_desc *d;
6c4df703 1988 int len;
10be74da 1989
d15b0b0a
OZ
1990 if (bgp_attr_known(i))
1991 {
1992 d = &bgp_attr_table[i];
1993 len = bsprintf(buf, "%s", d->name);
1994 buf += len;
1995 if (d->format)
10be74da 1996 {
d15b0b0a
OZ
1997 *buf++ = ':';
1998 *buf++ = ' ';
1999 d->format(a, buf, buflen - len - 2);
2000 return GA_FULL;
10be74da 2001 }
d15b0b0a
OZ
2002 return GA_NAME;
2003 }
2004
d1a74339 2005 bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
10be74da
MM
2006 return GA_NAME;
2007}
ae8f5584 2008
5e88d730
MM
2009void
2010bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
2011{
2012 eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH));
2013 eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN));
11cb6202 2014 u32 origas;
5e88d730 2015
3ce17142
OZ
2016 buf += bsprintf(buf, " (%d", e->pref);
2017
be4cd99a 2018 if (e->u.bgp.suppressed)
3ce17142 2019 buf += bsprintf(buf, "-");
be4cd99a 2020
d1e146f2 2021 if (e->attrs->hostentry)
d15b0b0a
OZ
2022 {
2023 if (!rte_resolvable(e))
2024 buf += bsprintf(buf, "/-");
2025 else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
2026 buf += bsprintf(buf, "/?");
2027 else
2028 buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
2029 }
d1e146f2
OZ
2030 buf += bsprintf(buf, ") [");
2031
52b9b2a1 2032 if (p && as_path_get_last(p->u.ptr, &origas))
11cb6202 2033 buf += bsprintf(buf, "AS%u", origas);
5e88d730
MM
2034 if (o)
2035 buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
2036 strcpy(buf, "]");
2037}