]> git.ipfire.org Git - thirdparty/bird.git/blame - proto/bgp/attrs.c
Backport some minor changes from int-new
[thirdparty/bird.git] / proto / bgp / attrs.c
CommitLineData
c01e3741
MM
1/*
2 * BIRD -- BGP Attributes
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
85368cd4 9#undef LOCAL_DEBUG
c00d31be 10
e3558ab1
MM
11#include <stdlib.h>
12
c01e3741
MM
13#include "nest/bird.h"
14#include "nest/iface.h"
15#include "nest/protocol.h"
16#include "nest/route.h"
c0668f36 17#include "nest/attrs.h"
c01e3741 18#include "conf/conf.h"
c00d31be
MM
19#include "lib/resource.h"
20#include "lib/string.h"
21#include "lib/unaligned.h"
c01e3741
MM
22
23#include "bgp.h"
c00d31be 24
06fb60c4
OZ
25/*
26 * UPDATE message error handling
27 *
28 * All checks from RFC 4271 6.3 are done as specified with these exceptions:
29 * - The semantic check of an IP address from NEXT_HOP attribute is missing.
30 * - Checks of some optional attribute values are missing.
31 * - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
32 * are probably inadequate.
33 *
34 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
35 * 4271 does not explicitly specifiy the behavior in that case.
36 *
37 * Loop detection related to route reflection (based on ORIGINATOR_ID
38 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
39 * specifies that such updates should be ignored, but that is generally
40 * a bad idea.
41 *
42 * Error checking of optional transitive attributes is done according to
43 * draft-ietf-idr-optional-transitive-03, but errors are handled always
44 * as withdraws.
45 *
46 * Unexpected AS_CONFED_* segments in AS_PATH are logged and removed,
47 * but unknown segments cause a session drop with Malformed AS_PATH
48 * error (see validate_path()). The behavior in such case is not
49 * explicitly specified by RFC 4271. RFC 5065 specifies that
50 * inconsistent AS_CONFED_* segments should cause a session drop, but
51 * implementations that pass invalid AS_CONFED_* segments are
52 * widespread.
53 *
4e379bde
OZ
54 * Error handling of AS4_* attributes is done as specified by RFC 6793. There
55 * are several possible inconsistencies between AGGREGATOR and AS4_AGGREGATOR
56 * that are not handled by that RFC, these are logged and ignored (see
06fb60c4
OZ
57 * bgp_reconstruct_4b_attrs()).
58 */
59
e7d2ac44 60
1c1da87b
MM
61static byte bgp_mandatory_attrs[] = { BA_ORIGIN, BA_AS_PATH
62#ifndef IPV6
63,BA_NEXT_HOP
64#endif
65};
ae8f5584 66
ae8f5584 67struct attr_desc {
99f70c78 68 char *name;
ae8f5584
MM
69 int expected_length;
70 int expected_flags;
71 int type;
56a2bed4 72 int allow_in_ebgp;
ae8f5584 73 int (*validate)(struct bgp_proto *p, byte *attr, int len);
aebe06b4 74 void (*format)(eattr *ea, byte *buf, int buflen);
ae8f5584
MM
75};
76
06fb60c4
OZ
77#define IGNORE -1
78#define WITHDRAW -2
79
f421cfdd 80static int
da95a7a7 81bgp_check_origin(struct bgp_proto *p UNUSED, byte *a, int len UNUSED)
f421cfdd 82{
da95a7a7 83 if (*a > 2)
f421cfdd
MM
84 return 6;
85 return 0;
86}
87
88static void
e81b440f 89bgp_format_origin(eattr *a, byte *buf, int buflen UNUSED)
f421cfdd
MM
90{
91 static char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
92
93 bsprintf(buf, bgp_origin_names[a->u.data]);
94}
95
96static int
29c430f8 97path_segment_contains(byte *p, int bs, u32 asn)
f421cfdd 98{
29c430f8
OZ
99 int i;
100 int len = p[1];
101 p += 2;
102
103 for(i=0; i<len; i++)
f421cfdd 104 {
29c430f8
OZ
105 u32 asn2 = (bs == 4) ? get_u32(p) : get_u16(p);
106 if (asn2 == asn)
107 return 1;
108 p += bs;
f421cfdd 109 }
29c430f8 110
f421cfdd
MM
111 return 0;
112}
113
29c430f8 114/* Validates path attribute, removes AS_CONFED_* segments, and also returns path length */
11cb6202 115static int
ae80a2de 116validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, uint *ilength)
29c430f8
OZ
117{
118 int res = 0;
119 u8 *a, *dst;
775a5a81 120 int len, plen;
29c430f8
OZ
121
122 dst = a = idata;
123 len = *ilength;
124
125 while (len)
126 {
127 if (len < 2)
128 return -1;
129
130 plen = 2 + bs * a[1];
131 if (len < plen)
132 return -1;
133
775a5a81
OZ
134 if (a[1] == 0)
135 {
136 log(L_WARN "%s: %s_PATH attribute contains empty segment, skipping it",
137 p->p.name, as_path ? "AS" : "AS4");
138 goto skip;
139 }
140
29c430f8
OZ
141 switch (a[0])
142 {
143 case AS_PATH_SET:
29c430f8
OZ
144 res++;
145 break;
146
147 case AS_PATH_SEQUENCE:
29c430f8
OZ
148 res += a[1];
149 break;
150
151 case AS_PATH_CONFED_SEQUENCE:
152 case AS_PATH_CONFED_SET:
153 if (as_path && path_segment_contains(a, bs, p->remote_as))
154 {
155 log(L_WARN "%s: AS_CONFED_* segment with peer ASN found, misconfigured confederation?", p->p.name);
156 return -1;
157 }
158
159 log(L_WARN "%s: %s_PATH attribute contains AS_CONFED_* segment, skipping segment",
160 p->p.name, as_path ? "AS" : "AS4");
775a5a81 161 goto skip;
29c430f8
OZ
162
163 default:
164 return -1;
165 }
166
775a5a81
OZ
167 if (dst != a)
168 memmove(dst, a, plen);
169 dst += plen;
29c430f8 170
775a5a81 171 skip:
29c430f8
OZ
172 len -= plen;
173 a += plen;
174 }
175
176 *ilength = dst - idata;
177 return res;
178}
179
180static inline int
181validate_as_path(struct bgp_proto *p, byte *a, int *len)
11cb6202 182{
29c430f8
OZ
183 return validate_path(p, 1, p->as4_session ? 4 : 2, a, len);
184}
185
186static inline int
187validate_as4_path(struct bgp_proto *p, struct adata *path)
188{
189 return validate_path(p, 0, 4, path->data, &path->length);
11cb6202
OZ
190}
191
f421cfdd 192static int
3e236955 193bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a UNUSED6, int len UNUSED6)
f421cfdd 194{
1c1da87b 195#ifdef IPV6
06fb60c4 196 return IGNORE;
1c1da87b 197#else
f421cfdd
MM
198 ip_addr addr;
199
200 memcpy(&addr, a, len);
201 ipa_ntoh(addr);
202 if (ipa_classify(addr) & IADDR_HOST)
203 return 0;
204 else
205 return 8;
1c1da87b
MM
206#endif
207}
208
d0e2d6d1
OZ
209static void
210bgp_format_next_hop(eattr *a, byte *buf, int buflen UNUSED)
211{
212 ip_addr *ipp = (ip_addr *) a->u.ptr->data;
213#ifdef IPV6
214 /* in IPv6, we might have two addresses in NEXT HOP */
215 if ((a->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(ipp[1]))
216 {
217 bsprintf(buf, "%I %I", ipp[0], ipp[1]);
218 return;
219 }
220#endif
221
222 bsprintf(buf, "%I", ipp[0]);
223}
224
11cb6202 225static int
aebe06b4 226bgp_check_aggregator(struct bgp_proto *p, byte *a UNUSED, int len)
11cb6202 227{
ba5ed6f3 228 int exp_len = p->as4_session ? 8 : 6;
11cb6202 229
06fb60c4 230 return (len == exp_len) ? 0 : WITHDRAW;
11cb6202
OZ
231}
232
cd17c651
OZ
233static void
234bgp_format_aggregator(eattr *a, byte *buf, int buflen UNUSED)
235{
236 struct adata *ad = a->u.ptr;
237 byte *data = ad->data;
238 u32 as;
239
43c1cecc
OZ
240 as = get_u32(data);
241 data += 4;
cd17c651 242
e16469bc 243 bsprintf(buf, "%d.%d.%d.%d AS%u", data[0], data[1], data[2], data[3], as);
cd17c651
OZ
244}
245
06fb60c4
OZ
246static int
247bgp_check_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
248{
249 return ((len % 4) == 0) ? 0 : WITHDRAW;
250}
251
4847a894 252static int
aebe06b4 253bgp_check_cluster_list(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
4847a894
OZ
254{
255 return ((len % 4) == 0) ? 0 : 5;
256}
257
aebe06b4 258static void
fdf16eb6 259bgp_format_cluster_list(eattr *a, byte *buf, int buflen)
aebe06b4 260{
fdf16eb6
OZ
261 /* Truncates cluster lists larger than buflen, probably not a problem */
262 int_set_format(a->u.ptr, 0, -1, buf, buflen);
aebe06b4
OZ
263}
264
1c1da87b 265static int
e21423ba 266bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
1c1da87b
MM
267{
268#ifdef IPV6
269 p->mp_reach_start = a;
270 p->mp_reach_len = len;
1c1da87b 271#endif
06fb60c4 272 return IGNORE;
1c1da87b
MM
273}
274
275static int
e21423ba 276bgp_check_unreach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
1c1da87b
MM
277{
278#ifdef IPV6
279 p->mp_unreach_start = a;
280 p->mp_unreach_len = len;
1c1da87b 281#endif
06fb60c4 282 return IGNORE;
f421cfdd
MM
283}
284
42a0c054
OZ
285static int
286bgp_check_ext_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
287{
288 return ((len % 8) == 0) ? 0 : WITHDRAW;
289}
290
66dbdbd9
OZ
291static int
292bgp_check_large_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
293{
294 return ((len % 12) == 0) ? 0 : WITHDRAW;
295}
296
42a0c054 297
f421cfdd 298static struct attr_desc bgp_attr_table[] = {
1c1da87b 299 { NULL, -1, 0, 0, 0, /* Undefined */
f421cfdd 300 NULL, NULL },
1c1da87b 301 { "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_ORIGIN */
f421cfdd 302 bgp_check_origin, bgp_format_origin },
1c1da87b 303 { "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1, /* BA_AS_PATH */
29c430f8 304 NULL, NULL }, /* is checked by validate_as_path() as a special case */
1c1da87b 305 { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1, /* BA_NEXT_HOP */
d0e2d6d1 306 bgp_check_next_hop, bgp_format_next_hop },
b6bf284a 307 { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1, /* BA_MULTI_EXIT_DISC */
f421cfdd 308 NULL, NULL },
1950a479 309 { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_LOCAL_PREF */
f421cfdd 310 NULL, NULL },
1c1da87b 311 { "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_ATOMIC_AGGR */
f421cfdd 312 NULL, NULL },
11cb6202 313 { "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */
cd17c651 314 bgp_check_aggregator, bgp_format_aggregator },
1c1da87b 315 { "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1, /* BA_COMMUNITY */
06fb60c4 316 bgp_check_community, NULL },
aebe06b4 317 { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_ROUTER_ID, 0, /* BA_ORIGINATOR_ID */
4847a894
OZ
318 NULL, NULL },
319 { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0, /* BA_CLUSTER_LIST */
aebe06b4 320 bgp_check_cluster_list, bgp_format_cluster_list },
e81b440f 321 { .name = NULL }, /* BA_DPA */
06fb60c4
OZ
322 { .name = NULL }, /* BA_ADVERTISER */
323 { .name = NULL }, /* BA_RCID_PATH */
1c1da87b
MM
324 { "mp_reach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_REACH_NLRI */
325 bgp_check_reach_nlri, NULL },
326 { "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_UNREACH_NLRI */
327 bgp_check_unreach_nlri, NULL },
42a0c054
OZ
328 { "ext_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_EC_SET, 1, /* BA_EXT_COMMUNITY */
329 bgp_check_ext_community, NULL },
11cb6202 330 { "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
48d79d52
OZ
331 NULL, NULL },
332 { "as4_aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
66dbdbd9
OZ
333 NULL, NULL },
334 [BA_LARGE_COMMUNITY] =
335 { "large_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_LC_SET, 1,
336 bgp_check_large_community, NULL }
f421cfdd
MM
337};
338
43c1cecc
OZ
339/* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH.
340 * It does not matter as this attribute does not appear on routes in the routing table.
11cb6202
OZ
341 */
342
d1a74339
MM
343#define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name)
344
4847a894
OZ
345static inline struct adata *
346bgp_alloc_adata(struct linpool *pool, unsigned len)
347{
348 struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len);
349 ad->length = len;
350 return ad;
351}
352
353static void
354bgp_set_attr(eattr *e, unsigned attr, uintptr_t val)
cf3d6470
MM
355{
356 ASSERT(ATTR_KNOWN(attr));
357 e->id = EA_CODE(EAP_BGP, attr);
358 e->type = bgp_attr_table[attr].type;
359 e->flags = bgp_attr_table[attr].expected_flags;
360 if (e->type & EAF_EMBEDDED)
4847a894 361 e->u.data = val;
cf3d6470 362 else
4847a894 363 e->u.ptr = (struct adata *) val;
cf3d6470
MM
364}
365
4847a894
OZ
366static byte *
367bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len)
368{
369 struct adata *ad = bgp_alloc_adata(pool, len);
370 bgp_set_attr(e, attr, (uintptr_t) ad);
371 return ad->data;
372}
373
374void
375bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val)
cf3d6470
MM
376{
377 ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
378 a->next = *to;
379 *to = a;
380 a->flags = EALF_SORTED;
381 a->count = 1;
4847a894
OZ
382 bgp_set_attr(a->attrs, attr, val);
383}
384
385byte *
386bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len)
387{
388 struct adata *ad = bgp_alloc_adata(pool, len);
389 bgp_attach_attr(to, pool, attr, (uintptr_t) ad);
390 return ad->data;
cf3d6470
MM
391}
392
11cb6202 393static int
ae80a2de 394bgp_encode_attr_hdr(byte *dst, uint flags, unsigned code, int len)
11cb6202
OZ
395{
396 int wlen;
397
398 DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags);
399
400 if (len < 256)
401 {
402 *dst++ = flags;
403 *dst++ = code;
404 *dst++ = len;
405 wlen = 3;
406 }
407 else
408 {
409 *dst++ = flags | BAF_EXT_LEN;
410 *dst++ = code;
411 put_u16(dst, len);
412 wlen = 4;
413 }
414
415 return wlen;
416}
417
418static void
419aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used)
420{
421 byte *src = aggr->data;
422 *new_used = 0;
423
424 u32 as = get_u32(src);
425 if (as > 0xFFFF)
426 {
427 as = AS_TRANS;
428 *new_used = 1;
429 }
430 put_u16(dst, as);
431
432 /* Copy IPv4 address */
433 memcpy(dst + 2, src + 4, 4);
434}
435
436static void
437aggregator_convert_to_new(struct adata *aggr, byte *dst)
438{
439 byte *src = aggr->data;
440
441 u32 as = get_u16(src);
442 put_u32(dst, as);
443
444 /* Copy IPv4 address */
445 memcpy(dst + 4, src + 2, 4);
446}
447
448static int
449bgp_get_attr_len(eattr *a)
450{
451 int len;
452 if (ATTR_KNOWN(EA_ID(a->id)))
453 {
454 int code = EA_ID(a->id);
455 struct attr_desc *desc = &bgp_attr_table[code];
456 len = desc->expected_length;
457 if (len < 0)
458 {
459 ASSERT(!(a->type & EAF_EMBEDDED));
460 len = a->u.ptr->length;
461 }
462 }
463 else
464 {
465 ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE);
466 len = a->u.ptr->length;
467 }
468
469 return len;
470}
471
54e55169
MM
472/**
473 * bgp_encode_attrs - encode BGP attributes
11cb6202 474 * @p: BGP instance
54e55169
MM
475 * @w: buffer
476 * @attrs: a list of extended attributes
477 * @remains: remaining space in the buffer
478 *
479 * The bgp_encode_attrs() function takes a list of extended attributes
480 * and converts it to its BGP representation (a part of an Update message).
481 *
82a79586 482 * Result: Length of the attribute block generated or -1 if not enough space.
54e55169 483 */
ae80a2de 484uint
11cb6202 485bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains)
f421cfdd 486{
ae80a2de 487 uint i, code, type, flags;
f421cfdd 488 byte *start = w;
11cb6202 489 int len, rv;
f421cfdd 490
cf3d6470 491 for(i=0; i<attrs->count; i++)
f421cfdd 492 {
cf3d6470 493 eattr *a = &attrs->attrs[i];
f421cfdd
MM
494 ASSERT(EA_PROTO(a->id) == EAP_BGP);
495 code = EA_ID(a->id);
42a0c054 496
cf3d6470
MM
497#ifdef IPV6
498 /* When talking multiprotocol BGP, the NEXT_HOP attributes are used only temporarily. */
499 if (code == BA_NEXT_HOP)
500 continue;
501#endif
11cb6202
OZ
502
503 /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker,
504 * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH
505 * as optional AS4_PATH attribute.
506 */
43c1cecc 507 if ((code == BA_AS_PATH) && (! p->as4_session))
f421cfdd 508 {
f421cfdd 509 len = a->u.ptr->length;
11cb6202
OZ
510
511 if (remains < (len + 4))
512 goto err_no_buffer;
513
514 /* Using temporary buffer because don't know a length of created attr
515 * and therefore a length of a header. Perhaps i should better always
516 * use BAF_EXT_LEN. */
517
518 byte buf[len];
519 int new_used;
520 int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used);
521
11b32d91 522 DBG("BGP: Encoding old AS_PATH\n");
11cb6202
OZ
523 rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl);
524 ADVANCE(w, remains, rv);
525 memcpy(w, buf, nl);
526 ADVANCE(w, remains, nl);
527
528 if (! new_used)
529 continue;
530
531 if (remains < (len + 4))
532 goto err_no_buffer;
533
534 /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments
535 * here but we don't support confederations and such paths we already
536 * discarded in bgp_check_as_path().
537 */
538
11b32d91 539 DBG("BGP: Encoding AS4_PATH\n");
11cb6202
OZ
540 rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len);
541 ADVANCE(w, remains, rv);
542 memcpy(w, a->u.ptr->data, len);
543 ADVANCE(w, remains, len);
544
545 continue;
f421cfdd 546 }
11cb6202
OZ
547
548 /* The same issue with AGGREGATOR attribute */
43c1cecc 549 if ((code == BA_AGGREGATOR) && (! p->as4_session))
f421cfdd 550 {
11cb6202
OZ
551 int new_used;
552
553 len = 6;
554 if (remains < (len + 3))
555 goto err_no_buffer;
556
557 rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len);
558 ADVANCE(w, remains, rv);
559 aggregator_convert_to_old(a->u.ptr, w, &new_used);
560 ADVANCE(w, remains, len);
561
562 if (! new_used)
563 continue;
564
565 len = 8;
566 if (remains < (len + 3))
567 goto err_no_buffer;
568
569 rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len);
570 ADVANCE(w, remains, rv);
571 memcpy(w, a->u.ptr->data, len);
572 ADVANCE(w, remains, len);
573
574 continue;
f421cfdd 575 }
11cb6202
OZ
576
577 /* Standard path continues here ... */
578
42a0c054 579 type = a->type & EAF_TYPE_MASK;
11cb6202
OZ
580 flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL);
581 len = bgp_get_attr_len(a);
582
42a0c054 583 /* Skip empty sets */
66dbdbd9 584 if (((type == EAF_TYPE_INT_SET) || (type == EAF_TYPE_EC_SET) || (type == EAF_TYPE_LC_SET)) && (len == 0))
6b5a8649
OZ
585 continue;
586
11cb6202
OZ
587 if (remains < len + 4)
588 goto err_no_buffer;
589
590 rv = bgp_encode_attr_hdr(w, flags, code, len);
591 ADVANCE(w, remains, rv);
592
42a0c054 593 switch (type)
f421cfdd
MM
594 {
595 case EAF_TYPE_INT:
596 case EAF_TYPE_ROUTER_ID:
597 if (len == 4)
598 put_u32(w, a->u.data);
599 else
600 *w = a->u.data;
601 break;
602 case EAF_TYPE_IP_ADDRESS:
603 {
604 ip_addr ip = *(ip_addr *)a->u.ptr->data;
605 ipa_hton(ip);
606 memcpy(w, &ip, len);
607 break;
608 }
1ed2fe96 609 case EAF_TYPE_INT_SET:
66dbdbd9 610 case EAF_TYPE_LC_SET:
42a0c054 611 case EAF_TYPE_EC_SET:
1ed2fe96 612 {
42a0c054 613 u32 *z = int_set_get_data(a->u.ptr);
1ed2fe96
MM
614 int i;
615 for(i=0; i<len; i+=4)
616 put_u32(w+i, *z++);
617 break;
618 }
f421cfdd
MM
619 case EAF_TYPE_OPAQUE:
620 case EAF_TYPE_AS_PATH:
f421cfdd
MM
621 memcpy(w, a->u.ptr->data, len);
622 break;
623 default:
624 bug("bgp_encode_attrs: unknown attribute type %02x", a->type);
625 }
11cb6202 626 ADVANCE(w, remains, len);
f421cfdd 627 }
cf3d6470 628 return w - start;
11cb6202
OZ
629
630 err_no_buffer:
82a79586 631 return -1;
f421cfdd 632}
ae8f5584 633
094d2bdb 634/*
c2b28c99
MM
635static void
636bgp_init_prefix(struct fib_node *N)
637{
638 struct bgp_prefix *p = (struct bgp_prefix *) N;
f421cfdd 639 p->bucket_node.next = NULL;
c2b28c99 640}
094d2bdb 641*/
c2b28c99 642
e3558ab1
MM
643static int
644bgp_compare_u32(const u32 *x, const u32 *y)
645{
646 return (*x < *y) ? -1 : (*x > *y) ? 1 : 0;
647}
648
42a0c054
OZ
649static inline void
650bgp_normalize_int_set(u32 *dest, u32 *src, unsigned cnt)
ae8f5584
MM
651{
652 memcpy(dest, src, sizeof(u32) * cnt);
e3558ab1 653 qsort(dest, cnt, sizeof(u32), (int(*)(const void *, const void *)) bgp_compare_u32);
ae8f5584
MM
654}
655
42a0c054
OZ
656static int
657bgp_compare_ec(const u32 *xp, const u32 *yp)
658{
659 u64 x = ec_get(xp, 0);
660 u64 y = ec_get(yp, 0);
661 return (x < y) ? -1 : (x > y) ? 1 : 0;
662}
663
664static inline void
665bgp_normalize_ec_set(struct adata *ad, u32 *src, int internal)
666{
667 u32 *dst = int_set_get_data(ad);
668
669 /* Remove non-transitive communities (EC_TBIT active) on external sessions */
670 if (! internal)
671 {
672 int len = int_set_get_size(ad);
673 u32 *t = dst;
674 int i;
675
676 for (i=0; i < len; i += 2)
677 {
678 if (src[i] & EC_TBIT)
679 continue;
680
681 *t++ = src[i];
682 *t++ = src[i+1];
683 }
684
685 ad->length = (t - dst) * 4;
686 }
687 else
688 memcpy(dst, src, ad->length);
689
690 qsort(dst, ad->length / 8, 8, (int(*)(const void *, const void *)) bgp_compare_ec);
691}
692
66dbdbd9
OZ
693static int
694bgp_compare_lc(const u32 *x, const u32 *y)
695{
696 if (x[0] != y[0])
697 return (x[0] > y[0]) ? 1 : -1;
698 if (x[1] != y[1])
699 return (x[1] > y[1]) ? 1 : -1;
700 if (x[2] != y[2])
701 return (x[2] > y[2]) ? 1 : -1;
702 return 0;
703}
704
705static inline void
706bgp_normalize_lc_set(u32 *dest, u32 *src, unsigned cnt)
707{
708 memcpy(dest, src, LCOMM_LENGTH * cnt);
709 qsort(dest, cnt, LCOMM_LENGTH, (int(*)(const void *, const void *)) bgp_compare_lc);
710}
711
ae8f5584
MM
712static void
713bgp_rehash_buckets(struct bgp_proto *p)
714{
c2b28c99 715 struct bgp_bucket **old = p->bucket_hash;
ae8f5584
MM
716 struct bgp_bucket **new;
717 unsigned oldn = p->hash_size;
718 unsigned i, e, mask;
719 struct bgp_bucket *b;
720
721 p->hash_size = p->hash_limit;
722 DBG("BGP: Rehashing bucket table from %d to %d\n", oldn, p->hash_size);
723 p->hash_limit *= 4;
724 if (p->hash_limit >= 65536)
725 p->hash_limit = ~0;
c2b28c99 726 new = p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
ae8f5584
MM
727 mask = p->hash_size - 1;
728 for (i=0; i<oldn; i++)
729 while (b = old[i])
730 {
c2b28c99 731 old[i] = b->hash_next;
ae8f5584 732 e = b->hash & mask;
c2b28c99
MM
733 b->hash_next = new[e];
734 if (b->hash_next)
735 b->hash_next->hash_prev = b;
736 b->hash_prev = NULL;
ae8f5584
MM
737 new[e] = b;
738 }
739 mb_free(old);
740}
741
742static struct bgp_bucket *
743bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash)
744{
745 struct bgp_bucket *b;
746 unsigned ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
7fdd338c 747 unsigned ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
dfc7a6c6 748 unsigned size = sizeof(struct bgp_bucket) + ea_size_aligned;
ae8f5584
MM
749 unsigned i;
750 byte *dest;
751 unsigned index = hash & (p->hash_size - 1);
752
753 /* Gather total size of non-inline attributes */
754 for (i=0; i<new->count; i++)
755 {
756 eattr *a = &new->attrs[i];
757 if (!(a->type & EAF_EMBEDDED))
7fdd338c 758 size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
ae8f5584
MM
759 }
760
761 /* Create the bucket and hash it */
762 b = mb_alloc(p->p.pool, size);
c2b28c99
MM
763 b->hash_next = p->bucket_hash[index];
764 if (b->hash_next)
765 b->hash_next->hash_prev = b;
766 p->bucket_hash[index] = b;
767 b->hash_prev = NULL;
ae8f5584 768 b->hash = hash;
f421cfdd
MM
769 add_tail(&p->bucket_queue, &b->send_node);
770 init_list(&b->prefixes);
ae8f5584
MM
771 memcpy(b->eattrs, new, ea_size);
772 dest = ((byte *)b->eattrs) + ea_size_aligned;
773
774 /* Copy values of non-inline attributes */
775 for (i=0; i<new->count; i++)
776 {
85368cd4 777 eattr *a = &b->eattrs->attrs[i];
ae8f5584
MM
778 if (!(a->type & EAF_EMBEDDED))
779 {
780 struct adata *oa = a->u.ptr;
781 struct adata *na = (struct adata *) dest;
782 memcpy(na, oa, sizeof(struct adata) + oa->length);
783 a->u.ptr = na;
7fdd338c 784 dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
ae8f5584
MM
785 }
786 }
787
788 /* If needed, rehash */
789 p->hash_count++;
790 if (p->hash_count > p->hash_limit)
791 bgp_rehash_buckets(p);
792
793 return b;
794}
795
796static struct bgp_bucket *
6cb8f742 797bgp_get_bucket(struct bgp_proto *p, net *n, ea_list *attrs, int originate)
ae8f5584 798{
e21423ba 799 ea_list *new;
56a2bed4 800 unsigned i, cnt, hash, code;
ae8f5584
MM
801 eattr *a, *d;
802 u32 seen = 0;
ae8f5584
MM
803 struct bgp_bucket *b;
804
02bd064a
MM
805 /* Merge the attribute list */
806 new = alloca(ea_scan(attrs));
807 ea_merge(attrs, new);
8b258e4e 808 ea_sort(new);
ae8f5584
MM
809
810 /* Normalize attributes */
811 d = new->attrs;
812 cnt = new->count;
813 new->count = 0;
814 for(i=0; i<cnt; i++)
815 {
816 a = &new->attrs[i];
ae8f5584
MM
817 if (EA_PROTO(a->id) != EAP_BGP)
818 continue;
56a2bed4 819 code = EA_ID(a->id);
d1a74339 820 if (ATTR_KNOWN(code))
56a2bed4 821 {
1950a479
OZ
822 if (!p->is_internal)
823 {
824 if (!bgp_attr_table[code].allow_in_ebgp)
825 continue;
826 if ((code == BA_LOCAL_PREF) && !p->cf->allow_local_pref)
827 continue;
828 }
684c25d9
MM
829 /* The flags might have been zero if the attr was added by filters */
830 a->flags = (a->flags & BAF_PARTIAL) | bgp_attr_table[code].expected_flags;
d1a74339
MM
831 if (code < 32)
832 seen |= 1 << code;
833 }
834 else
835 {
836 /* Don't re-export unknown non-transitive attributes */
837 if (!(a->flags & BAF_TRANSITIVE))
838 continue;
56a2bed4 839 }
ae8f5584 840 *d = *a;
e3558ab1
MM
841 if ((d->type & EAF_ORIGINATED) && !originate && (d->flags & BAF_TRANSITIVE) && (d->flags & BAF_OPTIONAL))
842 d->flags |= BAF_PARTIAL;
ae8f5584
MM
843 switch (d->type & EAF_TYPE_MASK)
844 {
e3558ab1 845 case EAF_TYPE_INT_SET:
ae8f5584
MM
846 {
847 struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
848 z->length = d->u.ptr->length;
42a0c054
OZ
849 bgp_normalize_int_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / 4);
850 d->u.ptr = z;
851 break;
852 }
853 case EAF_TYPE_EC_SET:
854 {
855 struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
856 z->length = d->u.ptr->length;
857 bgp_normalize_ec_set(z, (u32 *) d->u.ptr->data, p->is_internal);
ae8f5584
MM
858 d->u.ptr = z;
859 break;
860 }
66dbdbd9
OZ
861 case EAF_TYPE_LC_SET:
862 {
863 struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
864 z->length = d->u.ptr->length;
865 bgp_normalize_lc_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / LCOMM_LENGTH);
866 d->u.ptr = z;
867 break;
868 }
de10a974 869 default: ;
ae8f5584
MM
870 }
871 d++;
872 new->count++;
873 }
874
875 /* Hash */
876 hash = ea_hash(new);
c2b28c99 877 for(b=p->bucket_hash[hash & (p->hash_size - 1)]; b; b=b->hash_next)
ae8f5584
MM
878 if (b->hash == hash && ea_same(b->eattrs, new))
879 {
880 DBG("Found bucket.\n");
881 return b;
882 }
883
884 /* Ensure that there are all mandatory attributes */
77506349 885 for(i=0; i<ARRAY_SIZE(bgp_mandatory_attrs); i++)
ae8f5584
MM
886 if (!(seen & (1 << bgp_mandatory_attrs[i])))
887 {
6cb8f742 888 log(L_ERR "%s: Mandatory attribute %s missing in route %I/%d", p->p.name, bgp_attr_table[bgp_mandatory_attrs[i]].name, n->n.prefix, n->n.pxlen);
ae8f5584
MM
889 return NULL;
890 }
891
6cb8f742
OZ
892 /* Check if next hop is valid */
893 a = ea_find(new, EA_CODE(EAP_BGP, BA_NEXT_HOP));
9be9a264 894 if (!a || ipa_equal(p->cf->remote_ip, *(ip_addr *)a->u.ptr->data))
6cb8f742
OZ
895 {
896 log(L_ERR "%s: Invalid NEXT_HOP attribute in route %I/%d", p->p.name, n->n.prefix, n->n.pxlen);
897 return NULL;
898 }
f421cfdd 899
ae8f5584
MM
900 /* Create new bucket */
901 DBG("Creating bucket.\n");
902 return bgp_new_bucket(p, new, hash);
903}
904
f421cfdd
MM
905void
906bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck)
907{
908 if (buck->hash_next)
909 buck->hash_next->hash_prev = buck->hash_prev;
910 if (buck->hash_prev)
911 buck->hash_prev->hash_next = buck->hash_next;
912 else
913 p->bucket_hash[buck->hash & (p->hash_size-1)] = buck->hash_next;
914 mb_free(buck);
915}
916
094d2bdb
OZ
917
918/* Prefix hash table */
919
e7d2ac44
OZ
920#define PXH_KEY(n1) n1->n.prefix, n1->n.pxlen, n1->path_id
921#define PXH_NEXT(n) n->next
922#define PXH_EQ(p1,l1,i1,p2,l2,i2) ipa_equal(p1, p2) && l1 == l2 && i1 == i2
923#define PXH_FN(p,l,i) ipa_hash32(p) ^ u32_hash((l << 16) ^ i)
924
925#define PXH_REHASH bgp_pxh_rehash
926#define PXH_PARAMS /8, *2, 2, 2, 8, 20
927
094d2bdb 928
e7d2ac44 929HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
094d2bdb
OZ
930
931void
932bgp_init_prefix_table(struct bgp_proto *p, u32 order)
933{
e7d2ac44 934 HASH_INIT(p->prefix_hash, p->p.pool, order);
094d2bdb 935
e7d2ac44 936 p->prefix_slab = sl_new(p->p.pool, sizeof(struct bgp_prefix));
094d2bdb
OZ
937}
938
ed1a908e
OZ
939void
940bgp_free_prefix_table(struct bgp_proto *p)
941{
942 HASH_FREE(p->prefix_hash);
943
944 rfree(p->prefix_slab);
945 p->prefix_slab = NULL;
946}
947
094d2bdb
OZ
948static struct bgp_prefix *
949bgp_get_prefix(struct bgp_proto *p, ip_addr prefix, int pxlen, u32 path_id)
950{
e7d2ac44 951 struct bgp_prefix *bp = HASH_FIND(p->prefix_hash, PXH, prefix, pxlen, path_id);
094d2bdb 952
e7d2ac44
OZ
953 if (bp)
954 return bp;
094d2bdb
OZ
955
956 bp = sl_alloc(p->prefix_slab);
957 bp->n.prefix = prefix;
958 bp->n.pxlen = pxlen;
959 bp->path_id = path_id;
094d2bdb
OZ
960 bp->bucket_node.next = NULL;
961
e7d2ac44 962 HASH_INSERT2(p->prefix_hash, PXH, p->p.pool, bp);
094d2bdb
OZ
963
964 return bp;
965}
966
967void
968bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp)
969{
e7d2ac44 970 HASH_REMOVE2(p->prefix_hash, PXH, p->p.pool, bp);
094d2bdb 971 sl_free(p->prefix_slab, bp);
094d2bdb
OZ
972}
973
974
ef2c708d 975void
dca75fd7 976bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs)
ef2c708d 977{
ae8f5584 978 struct bgp_proto *p = (struct bgp_proto *) P;
f421cfdd
MM
979 struct bgp_bucket *buck;
980 struct bgp_prefix *px;
094d2bdb
OZ
981 rte *key;
982 u32 path_id;
ae8f5584 983
f421cfdd 984 DBG("BGP: Got route %I/%d %s\n", n->n.prefix, n->n.pxlen, new ? "up" : "down");
ae8f5584
MM
985
986 if (new)
987 {
094d2bdb 988 key = new;
6cb8f742 989 buck = bgp_get_bucket(p, n, attrs, new->attrs->source != RTS_BGP);
ae8f5584
MM
990 if (!buck) /* Inconsistent attribute list */
991 return;
992 }
f421cfdd
MM
993 else
994 {
094d2bdb 995 key = old;
f421cfdd
MM
996 if (!(buck = p->withdraw_bucket))
997 {
998 buck = p->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket));
999 init_list(&buck->prefixes);
1000 }
1001 }
094d2bdb
OZ
1002 path_id = p->add_path_tx ? key->attrs->src->global_id : 0;
1003 px = bgp_get_prefix(p, n->n.prefix, n->n.pxlen, path_id);
f421cfdd
MM
1004 if (px->bucket_node.next)
1005 {
1006 DBG("\tRemoving old entry.\n");
1007 rem_node(&px->bucket_node);
1008 }
1009 add_tail(&buck->prefixes, &px->bucket_node);
1010 bgp_schedule_packet(p->conn, PKT_UPDATE);
ef2c708d
MM
1011}
1012
48e842cc
MM
1013static int
1014bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
ef2c708d 1015{
8b258e4e 1016 ea_list *ea = lp_alloc(pool, sizeof(ea_list) + 4*sizeof(eattr));
ef2c708d 1017 rta *rta = e->attrs;
cf3d6470 1018 byte *z;
ef2c708d 1019
48e842cc
MM
1020 ea->next = *attrs;
1021 *attrs = ea;
ef2c708d 1022 ea->flags = EALF_SORTED;
8b258e4e 1023 ea->count = 4;
ef2c708d 1024
4847a894 1025 bgp_set_attr(ea->attrs, BA_ORIGIN,
98ac6176 1026 ((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
ef2c708d 1027
ef2c708d 1028 if (p->is_internal)
4847a894 1029 bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0);
ef2c708d
MM
1030 else
1031 {
43c1cecc 1032 z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 6);
8b258e4e 1033 z[0] = AS_PATH_SEQUENCE;
ef2c708d 1034 z[1] = 1; /* 1 AS */
43c1cecc 1035 put_u32(z+2, p->local_as);
ef2c708d
MM
1036 }
1037
9be9a264
OZ
1038 /* iBGP -> use gw, eBGP multi-hop -> use source_addr,
1039 eBGP single-hop -> use gw if on the same iface */
4827b69f 1040 z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
48e842cc 1041 if (p->cf->next_hop_self ||
b7a735ea 1042 rta->dest != RTD_ROUTER ||
9be9a264 1043 ipa_equal(rta->gw, IPA_NONE) ||
88a183c6 1044 ipa_is_link_local(rta->gw) ||
48bc232f
OZ
1045 (!p->is_internal && !p->cf->next_hop_keep &&
1046 (!p->neigh || (rta->iface != p->neigh->iface))))
4827b69f 1047 set_next_hop(z, p->source_addr);
ef2c708d 1048 else
f2d7da74 1049 set_next_hop(z, rta->gw);
8b258e4e 1050
fbcb7d5f 1051 bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, p->cf->default_local_pref);
ef2c708d 1052
48e842cc 1053 return 0; /* Leave decision to the filters */
ef2c708d
MM
1054}
1055
4847a894
OZ
1056
1057static inline int
1058bgp_as_path_loopy(struct bgp_proto *p, rta *a)
1059{
a15dab76 1060 int num = p->cf->allow_local_as + 1;
4847a894 1061 eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
a15dab76 1062 return (e && (num > 0) && as_path_contains(e->u.ptr, p->local_as, num));
4847a894
OZ
1063}
1064
1065static inline int
1066bgp_originator_id_loopy(struct bgp_proto *p, rta *a)
0a40e973 1067{
4847a894
OZ
1068 eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
1069 return (e && (e->u.data == p->local_id));
1070}
1071
1072static inline int
1073bgp_cluster_list_loopy(struct bgp_proto *p, rta *a)
1074{
1075 eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
1076 return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id));
1077}
1078
1079
1080static inline void
1081bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as)
1082{
1083 eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1084 bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as));
1085}
1086
1087static inline void
1088bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid)
1089{
1090 eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
261816b0 1091 bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_prepend(pool, a ? a->u.ptr : NULL, cid));
ef2c708d
MM
1092}
1093
48e842cc 1094static int
4847a894 1095bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr)
ef2c708d 1096{
48e842cc
MM
1097 eattr *a;
1098
a92fe607 1099 if (!p->is_internal && !p->rs_client)
b6bf284a
OZ
1100 {
1101 bgp_path_prepend(e, attrs, pool, p->local_as);
1102
1103 /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be
1104 * propagated to other neighboring ASes.
1105 * Perhaps it would be better to undefine it.
1106 */
1107 a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
1108 if (a)
1109 bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0);
1110 }
ef2c708d 1111
9be9a264 1112 /* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr,
53ffbff3
OZ
1113 * eBGP single-hop -> keep next_hop if on the same iface.
1114 * If the next_hop is zero (i.e. link-local), keep only if on the same iface.
48bc232f
OZ
1115 *
1116 * Note that same-iface-check uses iface from route, which is based on gw.
53ffbff3 1117 */
48e842cc 1118 a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
9be9a264 1119 if (a && !p->cf->next_hop_self &&
48bc232f
OZ
1120 (p->cf->next_hop_keep ||
1121 (p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) ||
53ffbff3 1122 (p->neigh && (e->attrs->iface == p->neigh->iface))))
48e842cc
MM
1123 {
1124 /* Leave the original next hop attribute, will check later where does it point */
1125 }
1126 else
1127 {
1128 /* Need to create new one */
4827b69f
OZ
1129 byte *b = bgp_attach_attr_wa(attrs, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
1130 set_next_hop(b, p->source_addr);
4847a894
OZ
1131 }
1132
1133 if (rr)
1134 {
1135 /* Handling route reflection, RFC 4456 */
094d2bdb 1136 struct bgp_proto *src = (struct bgp_proto *) e->attrs->src->proto;
4847a894
OZ
1137
1138 a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
1139 if (!a)
1140 bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id);
1141
1142 /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */
1143 bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id);
1144
1145 /* Two RR clients with different cluster ID, hmmm */
1146 if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id))
1147 bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id);
48e842cc 1148 }
ef2c708d 1149
48e842cc 1150 return 0; /* Leave decision to the filters */
ef2c708d
MM
1151}
1152
6cb8f742
OZ
1153static int
1154bgp_community_filter(struct bgp_proto *p, rte *e)
1155{
1156 eattr *a;
1157 struct adata *d;
1158
1159 /* Check if we aren't forbidden to export the route by communities */
1160 a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
1161 if (a)
1162 {
1163 d = a->u.ptr;
1164 if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
1165 {
1166 DBG("\tNO_ADVERTISE\n");
1167 return 1;
1168 }
1169 if (!p->is_internal &&
1170 (int_set_contains(d, BGP_COMM_NO_EXPORT) ||
1171 int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED)))
1172 {
1173 DBG("\tNO_EXPORT\n");
1174 return 1;
1175 }
1176 }
1177
1178 return 0;
1179}
1180
ef2c708d
MM
1181int
1182bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool)
1183{
1184 rte *e = *new;
1185 struct bgp_proto *p = (struct bgp_proto *) P;
094d2bdb
OZ
1186 struct bgp_proto *new_bgp = (e->attrs->src->proto->proto == &proto_bgp) ?
1187 (struct bgp_proto *) e->attrs->src->proto : NULL;
ef2c708d 1188
48e842cc 1189 if (p == new_bgp) /* Poison reverse updates */
ef2c708d
MM
1190 return -1;
1191 if (new_bgp)
1192 {
4847a894
OZ
1193 /* We should check here for cluster list loop, because the receiving BGP instance
1194 might have different cluster ID */
1195 if (bgp_cluster_list_loopy(p, e->attrs))
1196 return -1;
1197
41677025 1198 if (p->cf->interpret_communities && bgp_community_filter(p, e))
6cb8f742
OZ
1199 return -1;
1200
ef2c708d 1201 if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal)
4847a894
OZ
1202 {
1203 /* Redistribution of internal routes with IBGP */
1204 if (p->rr_client || new_bgp->rr_client)
1205 /* Route reflection, RFC 4456 */
1206 return bgp_update_attrs(p, e, attrs, pool, 1);
1207 else
1208 return -1;
1209 }
1210 else
1211 return bgp_update_attrs(p, e, attrs, pool, 0);
ef2c708d
MM
1212 }
1213 else
48e842cc 1214 return bgp_create_attrs(p, e, attrs, pool);
ef2c708d
MM
1215}
1216
b6bf284a
OZ
1217static inline u32
1218bgp_get_neighbor(rte *r)
1219{
1220 eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1221 u32 as;
1222
52b9b2a1 1223 if (e && as_path_get_first(e->u.ptr, &as))
b6bf284a
OZ
1224 return as;
1225 else
094d2bdb 1226 return ((struct bgp_proto *) r->attrs->src->proto)->remote_as;
b6bf284a
OZ
1227}
1228
7e95c05d
OZ
1229static inline int
1230rte_resolvable(rte *rt)
1231{
1232 int rd = rt->attrs->dest;
1233 return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH);
1234}
1235
ef2c708d
MM
1236int
1237bgp_rte_better(rte *new, rte *old)
1238{
094d2bdb
OZ
1239 struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
1240 struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
56a2bed4
MM
1241 eattr *x, *y;
1242 u32 n, o;
ef2c708d 1243
be4cd99a
OZ
1244 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1245 n = new->u.bgp.suppressed;
1246 o = old->u.bgp.suppressed;
1247 if (n > o)
1248 return 0;
1249 if (n < o)
1250 return 1;
1251
ac3ac49a 1252 /* RFC 4271 9.1.2.1. Route resolvability test */
7e95c05d
OZ
1253 n = rte_resolvable(new);
1254 o = rte_resolvable(old);
ac3ac49a
OZ
1255 if (n > o)
1256 return 1;
1257 if (n < o)
1258 return 0;
1259
ef2c708d 1260 /* Start with local preferences */
56a2bed4
MM
1261 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
1262 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
1263 n = x ? x->u.data : new_bgp->cf->default_local_pref;
1264 o = y ? y->u.data : old_bgp->cf->default_local_pref;
1265 if (n > o)
1266 return 1;
1267 if (n < o)
1268 return 0;
1269
4847a894 1270 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
56a2bed4 1271 if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
ef2c708d 1272 {
56a2bed4
MM
1273 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1274 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
11cb6202
OZ
1275 n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1276 o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
56a2bed4 1277 if (n < o)
ef2c708d 1278 return 1;
56a2bed4 1279 if (n > o)
ef2c708d
MM
1280 return 0;
1281 }
1282
4847a894 1283 /* RFC 4271 9.1.2.2. b) Use origins */
56a2bed4
MM
1284 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
1285 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
cea63664
MM
1286 n = x ? x->u.data : ORIGIN_INCOMPLETE;
1287 o = y ? y->u.data : ORIGIN_INCOMPLETE;
56a2bed4
MM
1288 if (n < o)
1289 return 1;
1290 if (n > o)
1291 return 0;
1292
4847a894 1293 /* RFC 4271 9.1.2.2. c) Compare MED's */
be4cd99a
OZ
1294 /* Proper RFC 4271 path selection cannot be interpreted as finding
1295 * the best path in some ordering. It is implemented partially in
1296 * bgp_rte_recalculate() when deterministic_med option is
1297 * active. Without that option, the behavior is just an
1298 * approximation, which in specific situations may lead to
1299 * persistent routing loops, because it is nondeterministic - it
1300 * depends on the order in which routes appeared. But it is also the
1301 * same behavior as used by default in Cisco routers, so it is
1302 * probably not a big issue.
73272f04
OZ
1303 */
1304 if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
1305 (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
b6bf284a
OZ
1306 {
1307 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
1308 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
1309 n = x ? x->u.data : new_bgp->cf->default_med;
1310 o = y ? y->u.data : old_bgp->cf->default_med;
1311 if (n < o)
1312 return 1;
1313 if (n > o)
1314 return 0;
1315 }
56a2bed4 1316
4847a894 1317 /* RFC 4271 9.1.2.2. d) Prefer external peers */
ef2c708d
MM
1318 if (new_bgp->is_internal > old_bgp->is_internal)
1319 return 0;
1320 if (new_bgp->is_internal < old_bgp->is_internal)
1321 return 1;
ef2c708d 1322
d1e146f2
OZ
1323 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1324 n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
1325 o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
1326 if (n < o)
1327 return 1;
1328 if (n > o)
1329 return 0;
4847a894 1330
4847a894
OZ
1331 /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
1332 /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */
1333 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
1334 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
1335 n = x ? x->u.data : new_bgp->remote_id;
1336 o = y ? y->u.data : old_bgp->remote_id;
3228c72c
OZ
1337
1338 /* RFC 5004 - prefer older routes */
1339 /* (if both are external and from different peer) */
1340 if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
1341 !new_bgp->is_internal && n != o)
1342 return 0;
1343
1344 /* rest of RFC 4271 9.1.2.2. f) */
4847a894
OZ
1345 if (n < o)
1346 return 1;
1347 if (n > o)
1348 return 0;
11cb6202 1349
3075824d
OZ
1350 /* RFC 4456 9. b) Compare cluster list lengths */
1351 x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
1352 y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
1353 n = x ? int_set_get_size(x->u.ptr) : 0;
1354 o = y ? int_set_get_size(y->u.ptr) : 0;
1355 if (n < o)
1356 return 1;
1357 if (n > o)
1358 return 0;
1359
4847a894
OZ
1360 /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
1361 return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
1362}
1363
be4cd99a 1364
8d9eef17
OZ
1365int
1366bgp_rte_mergable(rte *pri, rte *sec)
1367{
1368 struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
1369 struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
1370 eattr *x, *y;
1371 u32 p, s;
1372
1373 /* Skip suppressed routes (see bgp_rte_recalculate()) */
1374 if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
1375 return 0;
1376
1377 /* RFC 4271 9.1.2.1. Route resolvability test */
1378 if (!rte_resolvable(sec))
1379 return 0;
1380
1381 /* Start with local preferences */
1382 x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
1383 y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
1384 p = x ? x->u.data : pri_bgp->cf->default_local_pref;
1385 s = y ? y->u.data : sec_bgp->cf->default_local_pref;
1386 if (p != s)
1387 return 0;
1388
1389 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1390 if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
1391 {
1392 x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1393 y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1394 p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1395 s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
1396
1397 if (p != s)
1398 return 0;
1399
1400// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
1401// return 0;
1402 }
1403
1404 /* RFC 4271 9.1.2.2. b) Use origins */
1405 x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
1406 y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
1407 p = x ? x->u.data : ORIGIN_INCOMPLETE;
1408 s = y ? y->u.data : ORIGIN_INCOMPLETE;
1409 if (p != s)
1410 return 0;
1411
1412 /* RFC 4271 9.1.2.2. c) Compare MED's */
1413 if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
1414 (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
1415 {
1416 x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
1417 y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
1418 p = x ? x->u.data : pri_bgp->cf->default_med;
1419 s = y ? y->u.data : sec_bgp->cf->default_med;
1420 if (p != s)
1421 return 0;
1422 }
1423
1424 /* RFC 4271 9.1.2.2. d) Prefer external peers */
1425 if (pri_bgp->is_internal != sec_bgp->is_internal)
1426 return 0;
1427
1428 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1429 p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
1430 s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
1431 if (p != s)
1432 return 0;
1433
1434 /* Remaining criteria are ignored */
1435
1436 return 1;
1437}
1438
1439
1440
be4cd99a
OZ
1441static inline int
1442same_group(rte *r, u32 lpref, u32 lasn)
1443{
1444 return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
1445}
1446
1447static inline int
1448use_deterministic_med(rte *r)
1449{
094d2bdb 1450 struct proto *P = r->attrs->src->proto;
26822d8f 1451 return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
be4cd99a
OZ
1452}
1453
1454int
1455bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
1456{
1457 rte *r, *s;
1458 rte *key = new ? new : old;
1459 u32 lpref = key->pref;
1460 u32 lasn = bgp_get_neighbor(key);
1461 int old_is_group_best = 0;
1462
1463 /*
1464 * Proper RFC 4271 path selection is a bit complicated, it cannot be
1465 * implemented just by rte_better(), because it is not a linear
1466 * ordering. But it can be splitted to two levels, where the lower
1467 * level chooses the best routes in each group of routes from the
1468 * same neighboring AS and higher level chooses the best route (with
1469 * a slightly different ordering) between the best-in-group routes.
1470 *
1471 * When deterministic_med is disabled, we just ignore this issue and
1472 * choose the best route by bgp_rte_better() alone. If enabled, the
1473 * lower level of the route selection is done here (for the group
1474 * to which the changed route belongs), all routes in group are
1475 * marked as suppressed, just chosen best-in-group is not.
1476 *
1477 * Global best route selection then implements higher level by
1478 * choosing between non-suppressed routes (as they are always
1479 * preferred over suppressed routes). Routes from BGP protocols
1480 * that do not set deterministic_med are just never suppressed. As
1481 * they do not participate in the lower level selection, it is OK
1482 * that this fn is not called for them.
1483 *
1484 * The idea is simple, the implementation is more problematic,
1485 * mostly because of optimizations in rte_recalculate() that
1486 * avoids full recalculation in most cases.
1487 *
1488 * We can assume that at least one of new, old is non-NULL and both
1489 * are from the same protocol with enabled deterministic_med. We
1490 * group routes by both neighbor AS (lasn) and preference (lpref),
1491 * because bgp_rte_better() does not handle preference itself.
1492 */
1493
1494 /* If new and old are from different groups, we just process that
1495 as two independent events */
1496 if (new && old && !same_group(old, lpref, lasn))
1497 {
1498 int i1, i2;
1499 i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
1500 i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
1501 return i1 || i2;
1502 }
1503
1504 /*
1505 * We could find the best-in-group and then make some shortcuts like
1506 * in rte_recalculate, but as we would have to walk through all
1507 * net->routes just to find it, it is probably not worth. So we
1508 * just have two simpler fast cases that use just the old route.
1509 * We also set suppressed flag to avoid using it in bgp_rte_better().
1510 */
1511
1512 if (new)
1513 new->u.bgp.suppressed = 1;
1514
1515 if (old)
1516 {
1517 old_is_group_best = !old->u.bgp.suppressed;
1518 old->u.bgp.suppressed = 1;
1519 int new_is_better = new && bgp_rte_better(new, old);
1520
1521 /* The first case - replace not best with worse (or remove not best) */
1522 if (!old_is_group_best && !new_is_better)
1523 return 0;
1524
1525 /* The second case - replace the best with better */
1526 if (old_is_group_best && new_is_better)
1527 {
1528 /* new is best-in-group, the see discussion below - this is
1529 a special variant of NBG && OBG. From OBG we can deduce
1530 that same_group(old_best) iff (old == old_best) */
1531 new->u.bgp.suppressed = 0;
1532 return (old == old_best);
1533 }
1534 }
1535
1536 /* The default case - find a new best-in-group route */
1537 r = new; /* new may not be in the list */
cf98be7b 1538 for (s=net->routes; rte_is_valid(s); s=s->next)
be4cd99a
OZ
1539 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
1540 {
1541 s->u.bgp.suppressed = 1;
1542 if (!r || bgp_rte_better(s, r))
1543 r = s;
1544 }
1545
1546 /* Simple case - the last route in group disappears */
1547 if (!r)
1548 return 0;
1549
1550 /* Found best-in-group */
1551 r->u.bgp.suppressed = 0;
1552
1553 /*
1554 * There are generally two reasons why we have to force
1555 * recalculation (return 1): First, the new route may be wrongfully
1556 * chosen to be the best in the first case check in
1557 * rte_recalculate(), this may happen only if old_best is from the
1558 * same group. Second, another (different than new route)
1559 * best-in-group is chosen and that may be the proper best (although
1560 * rte_recalculate() without ignore that possibility).
1561 *
1562 * There are three possible cases according to whether the old route
1563 * was the best in group (OBG, stored in old_is_group_best) and
1564 * whether the new route is the best in group (NBG, tested by r == new).
1565 * These cases work even if old or new is NULL.
1566 *
1567 * NBG -> new is a possible candidate for the best route, so we just
1568 * check for the first reason using same_group().
1569 *
1570 * !NBG && OBG -> Second reason applies, return 1
1571 *
1572 * !NBG && !OBG -> Best in group does not change, old != old_best,
1573 * rte_better(new, old_best) is false and therefore
1574 * the first reason does not apply, return 0
1575 */
1576
1577 if (r == new)
1578 return old_best && same_group(old_best, lpref, lasn);
1579 else
1580 return old_is_group_best;
1581}
1582
11cb6202
OZ
1583static struct adata *
1584bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
1585{
1586 struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8);
1587 newa->length = 8;
1588 aggregator_convert_to_new(old, newa->data);
1589 return newa;
1590}
1591
1592
1593/* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format
1594 * and append path old4 (in 4B format).
1595 */
1596static struct adata *
1597bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool)
1598{
1599 byte buf[old2->length * 2];
1600
1601 int ol = as_path_convert_to_new(old2, buf, req_as);
1602 int nl = ol + (old4 ? old4->length : 0);
ef2c708d 1603
11cb6202
OZ
1604 struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl);
1605 newa->length = nl;
1606 memcpy(newa->data, buf, ol);
1607 if (old4) memcpy(newa->data + ol, old4->data, old4->length);
1608
1609 return newa;
1610}
1611
48d79d52
OZ
1612static int
1613as4_aggregator_valid(struct adata *aggr)
1614{
06fb60c4 1615 return aggr->length == 8;
48d79d52
OZ
1616}
1617
11cb6202 1618
4847a894 1619/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */
11cb6202
OZ
1620static void
1621bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool)
1622{
1623 eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1624 eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH));
1625 eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR));
1626 eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR));
48d79d52
OZ
1627 int a4_removed = 0;
1628
1629 if (a4 && !as4_aggregator_valid(a4->u.ptr))
1630 {
29c430f8 1631 log(L_WARN "%s: AS4_AGGREGATOR attribute is invalid, skipping attribute", p->p.name);
48d79d52
OZ
1632 a4 = NULL;
1633 a4_removed = 1;
1634 }
11cb6202
OZ
1635
1636 if (a2)
ef2c708d 1637 {
11cb6202
OZ
1638 u32 a2_as = get_u16(a2->u.ptr->data);
1639
1640 if (a4)
ef2c708d 1641 {
11cb6202
OZ
1642 if (a2_as != AS_TRANS)
1643 {
1644 /* Routes were aggregated by old router and therefore AS4_PATH
1645 * and AS4_AGGREGATOR is invalid
1646 *
1647 * Convert AS_PATH and AGGREGATOR to 4B format and finish.
1648 */
1649
1650 a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
1651 p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
1652
1653 return;
1654 }
1655 else
1656 {
1657 /* Common case, use AS4_AGGREGATOR attribute */
1658 a2->u.ptr = a4->u.ptr;
1659 }
1660 }
1661 else
1662 {
1663 /* Common case, use old AGGREGATOR attribute */
1664 a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
1665
48d79d52 1666 if ((a2_as == AS_TRANS) && !a4_removed)
29c430f8 1667 log(L_WARN "%s: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing", p->p.name);
ef2c708d
MM
1668 }
1669 }
11cb6202
OZ
1670 else
1671 if (a4)
29c430f8 1672 log(L_WARN "%s: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing", p->p.name);
11cb6202 1673
949bd34e 1674 int p2_len = as_path_getlen_int(p2->u.ptr, 2);
29c430f8
OZ
1675 int p4_len = p4 ? validate_as4_path(p, p4->u.ptr) : -1;
1676
1677 if (p4 && (p4_len < 0))
1678 log(L_WARN "%s: AS4_PATH attribute is malformed, skipping attribute", p->p.name);
11cb6202 1679
48d79d52 1680 if ((p4_len <= 0) || (p2_len < p4_len))
11cb6202
OZ
1681 p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
1682 else
1683 p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool);
11cb6202
OZ
1684}
1685
1686static void
1687bgp_remove_as4_attrs(struct bgp_proto *p, rta *a)
1688{
1689 unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH);
1690 unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR);
1691 ea_list **el = &(a->eattrs);
1692
29c430f8 1693 /* We know that ea_lists constructed in bgp_decode attrs have one attribute per ea_list struct */
11cb6202
OZ
1694 while (*el != NULL)
1695 {
1696 unsigned fid = (*el)->attrs[0].id;
1697
1698 if ((fid == id1) || (fid == id2))
1699 {
1700 *el = (*el)->next;
ba5ed6f3 1701 if (p->as4_session)
06fb60c4 1702 log(L_WARN "%s: Unexpected AS4_* attributes received", p->p.name);
11cb6202
OZ
1703 }
1704 else
1705 el = &((*el)->next);
1706 }
ef2c708d
MM
1707}
1708
54e55169
MM
1709/**
1710 * bgp_decode_attrs - check and decode BGP attributes
1711 * @conn: connection
1712 * @attr: start of attribute block
1713 * @len: length of attribute block
1714 * @pool: linear pool to make all the allocations in
1715 * @mandatory: 1 iff presence of mandatory attributes has to be checked
1716 *
1717 * This function takes a BGP attribute block (a part of an Update message), checks
1718 * its consistency and converts it to a list of BIRD route attributes represented
1719 * by a &rta.
1720 */
c00d31be 1721struct rta *
ae80a2de 1722bgp_decode_attrs(struct bgp_conn *conn, byte *attr, uint len, struct linpool *pool, int mandatory)
c00d31be
MM
1723{
1724 struct bgp_proto *bgp = conn->bgp;
1725 rta *a = lp_alloc(pool, sizeof(struct rta));
ae80a2de 1726 uint flags, code, l, i, type;
ef2c708d 1727 int errcode;
c00d31be
MM
1728 byte *z, *attr_start;
1729 byte seen[256/8];
c00d31be
MM
1730 ea_list *ea;
1731 struct adata *ad;
06fb60c4 1732 int withdraw = 0;
c00d31be 1733
cfe34a31 1734 bzero(a, sizeof(rta));
c00d31be
MM
1735 a->source = RTS_BGP;
1736 a->scope = SCOPE_UNIVERSE;
1737 a->cast = RTC_UNICAST;
cfe34a31 1738 /* a->dest = RTD_ROUTER; -- set in bgp_set_next_hop() */
c00d31be 1739 a->from = bgp->cf->remote_ip;
c00d31be
MM
1740
1741 /* Parse the attributes */
1742 bzero(seen, sizeof(seen));
1743 DBG("BGP: Parsing attributes\n");
1744 while (len)
1745 {
1746 if (len < 2)
1747 goto malformed;
1748 attr_start = attr;
1749 flags = *attr++;
1750 code = *attr++;
1751 len -= 2;
1752 if (flags & BAF_EXT_LEN)
1753 {
1754 if (len < 2)
1755 goto malformed;
1756 l = get_u16(attr);
1757 attr += 2;
1758 len -= 2;
1759 }
1760 else
1761 {
1762 if (len < 1)
1763 goto malformed;
1764 l = *attr++;
1765 len--;
1766 }
1767 if (l > len)
1768 goto malformed;
1769 len -= l;
1770 z = attr;
1771 attr += l;
1772 DBG("Attr %02x %02x %d\n", code, flags, l);
1773 if (seen[code/8] & (1 << (code%8)))
1774 goto malformed;
d1a74339 1775 if (ATTR_KNOWN(code))
c00d31be
MM
1776 {
1777 struct attr_desc *desc = &bgp_attr_table[code];
1778 if (desc->expected_length >= 0 && desc->expected_length != (int) l)
1779 { errcode = 5; goto err; }
1780 if ((desc->expected_flags ^ flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
1781 { errcode = 4; goto err; }
1950a479
OZ
1782 if (!bgp->is_internal)
1783 {
1784 if (!desc->allow_in_ebgp)
1785 continue;
1786 if ((code == BA_LOCAL_PREF) && !bgp->cf->allow_local_pref)
1787 continue;
1788 }
ef2c708d
MM
1789 if (desc->validate)
1790 {
1791 errcode = desc->validate(bgp, z, l);
1792 if (errcode > 0)
1793 goto err;
06fb60c4 1794 if (errcode == IGNORE)
ef2c708d 1795 continue;
06fb60c4
OZ
1796 if (errcode <= WITHDRAW)
1797 {
1798 log(L_WARN "%s: Attribute %s is malformed, withdrawing update",
1799 bgp->p.name, desc->name);
1800 withdraw = 1;
1801 }
ef2c708d 1802 }
29c430f8
OZ
1803 else if (code == BA_AS_PATH)
1804 {
1805 /* Special case as it might also trim the attribute */
1806 if (validate_as_path(bgp, z, &l) < 0)
1807 { errcode = 11; goto err; }
1808 }
c00d31be
MM
1809 type = desc->type;
1810 }
1811 else /* Unknown attribute */
e3558ab1 1812 {
c00d31be
MM
1813 if (!(flags & BAF_OPTIONAL))
1814 { errcode = 2; goto err; }
1815 type = EAF_TYPE_OPAQUE;
1816 }
29c430f8
OZ
1817
1818 // Only OPTIONAL and TRANSITIVE attributes may have non-zero PARTIAL flag
1819 // if (!((flags & BAF_OPTIONAL) && (flags & BAF_TRANSITIVE)) && (flags & BAF_PARTIAL))
1820 // { errcode = 4; goto err; }
1821
ef2c708d
MM
1822 seen[code/8] |= (1 << (code%8));
1823 ea = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
c00d31be
MM
1824 ea->next = a->eattrs;
1825 a->eattrs = ea;
1826 ea->flags = 0;
1827 ea->count = 1;
1828 ea->attrs[0].id = EA_CODE(EAP_BGP, code);
1829 ea->attrs[0].flags = flags;
1830 ea->attrs[0].type = type;
1831 if (type & EAF_EMBEDDED)
1832 ad = NULL;
1833 else
1834 {
1835 ad = lp_alloc(pool, sizeof(struct adata) + l);
1836 ea->attrs[0].u.ptr = ad;
1837 ad->length = l;
1838 memcpy(ad->data, z, l);
1839 }
1840 switch (type)
1841 {
1842 case EAF_TYPE_ROUTER_ID:
1843 case EAF_TYPE_INT:
10be74da
MM
1844 if (l == 1)
1845 ea->attrs[0].u.data = *z;
1846 else
1847 ea->attrs[0].u.data = get_u32(z);
c00d31be
MM
1848 break;
1849 case EAF_TYPE_IP_ADDRESS:
f421cfdd 1850 ipa_ntoh(*(ip_addr *)ad->data);
c00d31be 1851 break;
1ed2fe96 1852 case EAF_TYPE_INT_SET:
66dbdbd9 1853 case EAF_TYPE_LC_SET:
42a0c054 1854 case EAF_TYPE_EC_SET:
1ed2fe96
MM
1855 {
1856 u32 *z = (u32 *) ad->data;
1857 for(i=0; i<ad->length/4; i++)
1858 z[i] = ntohl(z[i]);
1859 break;
1860 }
c00d31be
MM
1861 }
1862 }
1863
06fb60c4
OZ
1864 if (withdraw)
1865 goto withdraw;
1866
1c1da87b 1867#ifdef IPV6
f307842a
OZ
1868 /* If we received MP_REACH_NLRI we should check mandatory attributes */
1869 if (bgp->mp_reach_len != 0)
1c1da87b
MM
1870 mandatory = 1;
1871#endif
1872
b9539e78
OZ
1873 /* If there is no (reachability) NLRI, we should exit now */
1874 if (! mandatory)
1875 return a;
1876
c00d31be 1877 /* Check if all mandatory attributes are present */
b9539e78 1878 for(i=0; i < ARRAY_SIZE(bgp_mandatory_attrs); i++)
c00d31be 1879 {
b9539e78
OZ
1880 code = bgp_mandatory_attrs[i];
1881 if (!(seen[code/8] & (1 << (code%8))))
c00d31be 1882 {
b9539e78
OZ
1883 bgp_error(conn, 3, 3, &bgp_mandatory_attrs[i], 1);
1884 return NULL;
c00d31be
MM
1885 }
1886 }
f307842a 1887
11cb6202
OZ
1888 /* When receiving attributes from non-AS4-aware BGP speaker,
1889 * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes
1890 */
43c1cecc 1891 if (! bgp->as4_session)
11cb6202
OZ
1892 bgp_reconstruct_4b_atts(bgp, a, pool);
1893
43c1cecc 1894 bgp_remove_as4_attrs(bgp, a);
c00d31be 1895
ef2c708d 1896 /* If the AS path attribute contains our AS, reject the routes */
4847a894 1897 if (bgp_as_path_loopy(bgp, a))
06fb60c4 1898 goto withdraw;
4847a894
OZ
1899
1900 /* Two checks for IBGP loops caused by route reflection, RFC 4456 */
1901 if (bgp_originator_id_loopy(bgp, a) ||
1902 bgp_cluster_list_loopy(bgp, a))
06fb60c4 1903 goto withdraw;
ef2c708d 1904
8b258e4e 1905 /* If there's no local preference, define one */
4819c3e1 1906 if (!(seen[0] & (1 << BA_LOCAL_PREF)))
fbcb7d5f 1907 bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, bgp->cf->default_local_pref);
f307842a 1908
2a9e064d 1909 return a;
c00d31be 1910
06fb60c4 1911withdraw:
4847a894
OZ
1912 return NULL;
1913
c00d31be 1914malformed:
efcece2d 1915 bgp_error(conn, 3, 1, NULL, 0);
c00d31be
MM
1916 return NULL;
1917
1918err:
2138d3b4 1919 bgp_error(conn, 3, errcode, attr_start, z+l-attr_start);
c00d31be
MM
1920 return NULL;
1921}
10be74da
MM
1922
1923int
aebe06b4 1924bgp_get_attr(eattr *a, byte *buf, int buflen)
10be74da 1925{
ae80a2de 1926 uint i = EA_ID(a->id);
10be74da 1927 struct attr_desc *d;
6c4df703 1928 int len;
10be74da 1929
d1a74339 1930 if (ATTR_KNOWN(i))
10be74da
MM
1931 {
1932 d = &bgp_attr_table[i];
6c4df703
OZ
1933 len = bsprintf(buf, "%s", d->name);
1934 buf += len;
10be74da
MM
1935 if (d->format)
1936 {
1937 *buf++ = ':';
1938 *buf++ = ' ';
6c4df703 1939 d->format(a, buf, buflen - len - 2);
10be74da
MM
1940 return GA_FULL;
1941 }
1942 return GA_NAME;
1943 }
d1a74339 1944 bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
10be74da
MM
1945 return GA_NAME;
1946}
ae8f5584
MM
1947
1948void
094d2bdb 1949bgp_init_bucket_table(struct bgp_proto *p)
ae8f5584
MM
1950{
1951 p->hash_size = 256;
1952 p->hash_limit = p->hash_size * 4;
c2b28c99
MM
1953 p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
1954 init_list(&p->bucket_queue);
1955 p->withdraw_bucket = NULL;
094d2bdb 1956 // fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix);
ae8f5584 1957}
5e88d730 1958
ed1a908e
OZ
1959void
1960bgp_free_bucket_table(struct bgp_proto *p)
1961{
1962 mb_free(p->bucket_hash);
1963 p->bucket_hash = NULL;
1964
1965 struct bgp_bucket *b;
1966 WALK_LIST_FIRST(b, p->bucket_queue)
1967 {
1968 rem_node(&b->send_node);
1969 mb_free(b);
1970 }
1971
1972 mb_free(p->withdraw_bucket);
1973 p->withdraw_bucket = NULL;
1974}
1975
5e88d730
MM
1976void
1977bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
1978{
1979 eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH));
1980 eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN));
11cb6202 1981 u32 origas;
5e88d730 1982
3ce17142
OZ
1983 buf += bsprintf(buf, " (%d", e->pref);
1984
be4cd99a 1985 if (e->u.bgp.suppressed)
3ce17142 1986 buf += bsprintf(buf, "-");
be4cd99a 1987
d1e146f2
OZ
1988 if (e->attrs->hostentry)
1989 {
7e95c05d 1990 if (!rte_resolvable(e))
d1e146f2
OZ
1991 buf += bsprintf(buf, "/-");
1992 else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
1993 buf += bsprintf(buf, "/?");
1994 else
1995 buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
1996 }
1997 buf += bsprintf(buf, ") [");
1998
52b9b2a1 1999 if (p && as_path_get_last(p->u.ptr, &origas))
11cb6202 2000 buf += bsprintf(buf, "AS%u", origas);
5e88d730
MM
2001 if (o)
2002 buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
2003 strcpy(buf, "]");
2004}