]> git.ipfire.org Git - thirdparty/bird.git/blame - proto/bgp/attrs.c
ASPA: basic data structures and Static protocol support
[thirdparty/bird.git] / proto / bgp / attrs.c
CommitLineData
c01e3741
MM
1/*
2 * BIRD -- BGP Attributes
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
d15b0b0a
OZ
5 * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6 * (c) 2008--2016 CZ.NIC z.s.p.o.
c01e3741
MM
7 *
8 * Can be freely distributed and used under the terms of the GNU GPL.
9 */
10
85368cd4 11#undef LOCAL_DEBUG
c00d31be 12
e3558ab1
MM
13#include <stdlib.h>
14
c01e3741
MM
15#include "nest/bird.h"
16#include "nest/iface.h"
17#include "nest/protocol.h"
18#include "nest/route.h"
c0668f36 19#include "nest/attrs.h"
9d456d53 20#include "nest/mpls.h"
c01e3741 21#include "conf/conf.h"
c00d31be
MM
22#include "lib/resource.h"
23#include "lib/string.h"
24#include "lib/unaligned.h"
c01e3741
MM
25
26#include "bgp.h"
c00d31be 27
06fb60c4
OZ
28/*
29 * UPDATE message error handling
30 *
31 * All checks from RFC 4271 6.3 are done as specified with these exceptions:
32 * - The semantic check of an IP address from NEXT_HOP attribute is missing.
33 * - Checks of some optional attribute values are missing.
34 * - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
35 * are probably inadequate.
36 *
37 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
09ee846d 38 * 4271 does not explicitly specify the behavior in that case.
06fb60c4
OZ
39 *
40 * Loop detection related to route reflection (based on ORIGINATOR_ID
41 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
42 * specifies that such updates should be ignored, but that is generally
43 * a bad idea.
44 *
d15b0b0a
OZ
45 * BGP attribute table has several hooks:
46 *
47 * export - Hook that validates and normalizes attribute during export phase.
48 * Receives eattr, may modify it (e.g., sort community lists for canonical
963b2c7c
OZ
49 * representation), UNSET() it (e.g., skip empty lists), or REJECT() the route
50 * if necessary. May assume that eattr has value valid w.r.t. its type, but may
51 * be invalid w.r.t. BGP constraints. Optional.
d15b0b0a
OZ
52 *
53 * encode - Hook that converts internal representation to external one during
54 * packet writing. Receives eattr and puts it in the buffer (including attribute
55 * header). Returns number of bytes, or -1 if not enough space. May assume that
56 * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
57 * for all known attributes that exist internally after export phase (i.e., all
58 * except pseudoattributes MP_(UN)REACH_NLRI).
59 *
60 * decode - Hook that converts external representation to internal one during
61 * packet parsing. Receives attribute data in buffer, validates it and adds
62 * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
63 * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
64 *
65 * format - Optional hook that converts eattr to textual representation.
06fb60c4
OZ
66 */
67
ae8f5584 68
d15b0b0a
OZ
69struct bgp_attr_desc {
70 const char *name;
71 uint type;
72 uint flags;
73 void (*export)(struct bgp_export_state *s, eattr *a);
74 int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
75 void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
258be565 76 void (*format)(const eattr *ea, byte *buf, uint size);
ae8f5584
MM
77};
78
d15b0b0a
OZ
79static const struct bgp_attr_desc bgp_attr_table[];
80
81static inline int bgp_attr_known(uint code);
82
83eattr *
84bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
85{
86 ASSERT(bgp_attr_known(code));
87
daf113ac
MJM
88 return ea_set_attr(
89 attrs,
90 pool,
91 EA_CODE(PROTOCOL_BGP, code),
2d0652dd 92 bgp_attr_table[code].flags | (flags & BAF_PARTIAL),
daf113ac
MJM
93 bgp_attr_table[code].type,
94 val
95 );
d15b0b0a
OZ
96}
97
98
99
100#define REPORT(msg, args...) \
101 ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
102
103#define DISCARD(msg, args...) \
104 ({ REPORT(msg, ## args); return; })
105
106#define WITHDRAW(msg, args...) \
107 ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
108
109#define UNSET(a) \
0f685152 110 ({ a->undef = 1; return; })
d15b0b0a 111
963b2c7c
OZ
112#define REJECT(msg, args...) \
113 ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; })
114
d15b0b0a
OZ
115#define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
116#define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
117#define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
118#define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
119#define NO_MANDATORY "Missing mandatory %s attribute"
120
121
122static inline int
123bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
124{
7a74ad5a 125 *buf++ = flags & ~BAF_EXT_LEN;
d15b0b0a
OZ
126 *buf++ = code;
127 *buf++ = len;
128 return 3;
129}
130
131static inline int
132bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
133{
134 *buf++ = flags | BAF_EXT_LEN;
135 *buf++ = code;
136 put_u16(buf, len);
137 return 4;
138}
139
140static inline int
141bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
142{
143 if (len < 256)
144 return bgp_put_attr_hdr3(buf, code, flags, len);
145 else
146 return bgp_put_attr_hdr4(buf, code, flags, len);
147}
06fb60c4 148
f421cfdd 149static int
d15b0b0a 150bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
f421cfdd 151{
d15b0b0a
OZ
152 if (size < (3+1))
153 return -1;
154
155 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
156 buf[3] = a->u.data;
157
158 return 3+1;
f421cfdd
MM
159}
160
d15b0b0a
OZ
161static int
162bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
f421cfdd 163{
d15b0b0a
OZ
164 if (size < (3+4))
165 return -1;
166
167 bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
168 put_u32(buf+3, a->u.data);
f421cfdd 169
d15b0b0a 170 return 3+4;
f421cfdd
MM
171}
172
173static int
d15b0b0a 174bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
f421cfdd 175{
d15b0b0a 176 uint len = a->u.ptr->length;
29c430f8 177
d15b0b0a
OZ
178 if (size < (4+len))
179 return -1;
29c430f8 180
d15b0b0a
OZ
181 uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
182 put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
183
184 return hdr + len;
f421cfdd
MM
185}
186
11cb6202 187static int
4c553c5a 188bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint len)
29c430f8 189{
d15b0b0a
OZ
190 if (size < (4+len))
191 return -1;
29c430f8 192
d15b0b0a
OZ
193 uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
194 memcpy(buf + hdr, data, len);
29c430f8 195
d15b0b0a
OZ
196 return hdr + len;
197}
198
199static int
200bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
201{
202 return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
203}
204
205
09ee846d
OZ
206/*
207 * AIGP handling
208 */
209
210static int
211bgp_aigp_valid(byte *data, uint len, char *err, uint elen)
212{
213 byte *pos = data;
214 char *err_dsc = NULL;
215 uint err_val = 0;
216
217#define BAD(DSC,VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; })
218 while (len)
219 {
220 if (len < 3)
221 BAD("TLV framing error", len);
222
223 /* Process one TLV */
224 uint ptype = pos[0];
225 uint plen = get_u16(pos + 1);
226
227 if (len < plen)
228 BAD("TLV framing error", plen);
229
230 if (plen < 3)
231 BAD("Bad TLV length", plen);
232
233 if ((ptype == BGP_AIGP_METRIC) && (plen != 11))
234 BAD("Bad AIGP TLV length", plen);
235
236 ADVANCE(pos, len, plen);
237 }
238#undef BAD
239
240 return 1;
241
242bad:
243 if (err)
244 if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0)
245 err[0] = 0;
246
247 return 0;
248}
249
250static const byte *
251bgp_aigp_get_tlv(const struct adata *ad, uint type)
252{
253 if (!ad)
254 return NULL;
255
256 uint len = ad->length;
257 const byte *pos = ad->data;
258
259 while (len)
260 {
261 uint ptype = pos[0];
262 uint plen = get_u16(pos + 1);
263
264 if (ptype == type)
265 return pos;
266
267 ADVANCE(pos, len, plen);
268 }
269
270 return NULL;
271}
272
273static const struct adata *
274bgp_aigp_set_tlv(struct linpool *pool, const struct adata *ad, uint type, byte *data, uint dlen)
275{
276 uint len = ad ? ad->length : 0;
277 const byte *pos = ad ? ad->data : NULL;
278 struct adata *res = lp_alloc_adata(pool, len + 3 + dlen);
279 byte *dst = res->data;
280 byte *tlv = NULL;
281 int del = 0;
282
283 while (len)
284 {
285 uint ptype = pos[0];
286 uint plen = get_u16(pos + 1);
287
288 /* Find position for new TLV */
289 if ((ptype >= type) && !tlv)
290 {
291 tlv = dst;
292 dst += 3 + dlen;
293 }
294
295 /* Skip first matching TLV, copy others */
296 if ((ptype == type) && !del)
297 del = 1;
298 else
299 {
300 memcpy(dst, pos, plen);
301 dst += plen;
302 }
303
304 ADVANCE(pos, len, plen);
305 }
306
307 if (!tlv)
308 {
309 tlv = dst;
310 dst += 3 + dlen;
311 }
312
313 /* Store the TLD */
314 put_u8(tlv + 0, type);
315 put_u16(tlv + 1, 3 + dlen);
316 memcpy(tlv + 3, data, dlen);
317
318 /* Update length */
319 res->length = dst - res->data;
320
321 return res;
322}
323
324static u64 UNUSED
325bgp_aigp_get_metric(const struct adata *ad, u64 def)
326{
327 const byte *b = bgp_aigp_get_tlv(ad, BGP_AIGP_METRIC);
328 return b ? get_u64(b + 3) : def;
329}
330
331static const struct adata *
332bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric)
333{
334 byte data[8];
335 put_u64(data, metric);
336 return bgp_aigp_set_tlv(pool, ad, BGP_AIGP_METRIC, data, 8);
337}
338
339int
340bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad)
341{
342 eattr *a = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP));
343 if (!a)
344 return 0;
345
346 const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
347 if (!b)
348 return 0;
349
350 u64 aigp = get_u64(b + 3);
351 u64 step = e->attrs->igp_metric;
352
353 if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN))
354 step = BGP_AIGP_MAX;
355
356 if (!step)
357 step = 1;
358
359 *ad = a->u.ptr;
360 *metric = aigp + step;
361 if (*metric < aigp)
362 *metric = BGP_AIGP_MAX;
363
364 return 1;
365}
366
367static inline int
368bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
369{
370 if (e->attrs->source == RTS_BGP)
371 return 0;
372
373 *metric = rt_get_igp_metric(e);
374 *ad = NULL;
375 return *metric < IGP_METRIC_UNKNOWN;
376}
377
d471d5fc
MM
378u32
379bgp_rte_igp_metric(struct rte *rt)
380{
381 u64 metric = bgp_total_aigp_metric(rt);
382 return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN);
383}
384
09ee846d 385
d15b0b0a
OZ
386/*
387 * Attribute hooks
388 */
29c430f8 389
d15b0b0a
OZ
390static void
391bgp_export_origin(struct bgp_export_state *s, eattr *a)
392{
393 if (a->u.data > 2)
963b2c7c 394 REJECT(BAD_VALUE, "ORIGIN", a->u.data);
29c430f8
OZ
395}
396
d15b0b0a
OZ
397static void
398bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
11cb6202 399{
d15b0b0a
OZ
400 if (len != 1)
401 WITHDRAW(BAD_LENGTH, "ORIGIN", len);
402
403 if (data[0] > 2)
404 WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
405
406 bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
29c430f8
OZ
407}
408
d15b0b0a 409static void
258be565 410bgp_format_origin(const eattr *a, byte *buf, uint size UNUSED)
29c430f8 411{
d15b0b0a
OZ
412 static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
413
414 bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
11cb6202
OZ
415}
416
d15b0b0a 417
0b228fca
OZ
418static inline int
419bgp_as_path_first_as_equal(const byte *data, uint len, u32 asn)
420{
421 return (len >= 6) &&
422 ((data[0] == AS_PATH_SEQUENCE) || (data[0] == AS_PATH_CONFED_SEQUENCE)) &&
423 (data[1] > 0) &&
424 (get_u32(data+2) == asn);
425}
426
f421cfdd 427static int
d15b0b0a 428bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
f421cfdd 429{
4c553c5a 430 const byte *data = a->u.ptr->data;
d15b0b0a 431 uint len = a->u.ptr->length;
f421cfdd 432
d15b0b0a
OZ
433 if (!s->as4_session)
434 {
435 /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
4c553c5a
MM
436 byte *dst = alloca(len);
437 len = as_path_32to16(dst, data, len);
438 data = dst;
d15b0b0a
OZ
439 }
440
441 return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
442}
443
444static void
445bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
446{
5509e17d
OZ
447 struct bgp_proto *p = s->proto;
448 int as_length = s->as4_session ? 4 : 2;
a52476c9 449 int as_sets = p->cf->allow_as_sets;
5509e17d 450 int as_confed = p->cf->confederation && p->is_interior;
d15b0b0a
OZ
451 char err[128];
452
a52476c9 453 if (!as_path_valid(data, len, as_length, as_sets, as_confed, err, sizeof(err)))
d15b0b0a
OZ
454 WITHDRAW("Malformed AS_PATH attribute - %s", err);
455
456 if (!s->as4_session)
457 {
458 /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
459 byte *src = data;
460 data = alloca(2*len);
461 len = as_path_16to32(data, src, len);
462 }
463
0b228fca
OZ
464 /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
465 if (p->is_interior && !p->is_internal &&
466 ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
467 WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
468
469 /* Reject routes with first AS in AS_PATH not matching neighbor AS; RFC 4271 6.3 */
470 if (!p->is_internal && p->cf->enforce_first_as &&
471 !bgp_as_path_first_as_equal(data, len, p->remote_as))
472 WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS");
473
d15b0b0a
OZ
474 bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
475}
476
477
478static int
479bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
480{
481 /*
482 * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
483 * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
484 * store it and encode it later by AFI-specific hooks.
485 */
486
863ecfc7 487 if (!s->mp_reach)
d15b0b0a 488 {
863ecfc7
OZ
489 // ASSERT(a->u.ptr->length == sizeof(ip_addr));
490
491 /* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */
492 ip_addr *addr = (void *) a->u.ptr->data;
493 if ((a->u.ptr->length != sizeof(ip_addr)) || !ipa_is_ip4(*addr))
494 return 0;
d15b0b0a
OZ
495
496 if (size < (3+4))
497 return -1;
498
499 bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
863ecfc7 500 put_ip4(buf+3, ipa_to_ip4(*addr));
d15b0b0a
OZ
501
502 return 3+4;
503 }
504 else
505 {
506 s->mp_next_hop = a;
f421cfdd 507 return 0;
d15b0b0a
OZ
508 }
509}
510
511static void
512bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
513{
514 if (len != 4)
515 WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
516
517 /* Semantic checks are done later */
518 s->ip_next_hop_len = len;
519 s->ip_next_hop_data = data;
520}
521
522/* TODO: This function should use AF-specific hook */
523static void
258be565 524bgp_format_next_hop(const eattr *a, byte *buf, uint size UNUSED)
d15b0b0a
OZ
525{
526 ip_addr *nh = (void *) a->u.ptr->data;
527 uint len = a->u.ptr->length;
528
529 ASSERT((len == 16) || (len == 32));
530
531 /* in IPv6, we may have two addresses in NEXT HOP */
532 if ((len == 16) || ipa_zero(nh[1]))
533 bsprintf(buf, "%I", nh[0]);
f421cfdd 534 else
d15b0b0a 535 bsprintf(buf, "%I %I", nh[0], nh[1]);
1c1da87b
MM
536}
537
d15b0b0a 538
d0e2d6d1 539static void
d15b0b0a 540bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
d0e2d6d1 541{
d15b0b0a
OZ
542 if (len != 4)
543 WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
d0e2d6d1 544
d15b0b0a
OZ
545 u32 val = get_u32(data);
546 bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
d0e2d6d1
OZ
547}
548
d15b0b0a
OZ
549
550static void
551bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
11cb6202 552{
e919601a 553 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
d15b0b0a 554 UNSET(a);
11cb6202
OZ
555}
556
cd17c651 557static void
d15b0b0a 558bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
cd17c651 559{
e919601a 560 if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
d15b0b0a 561 DISCARD(BAD_EBGP, "LOCAL_PREF");
cd17c651 562
d15b0b0a
OZ
563 if (len != 4)
564 WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
cd17c651 565
d15b0b0a
OZ
566 u32 val = get_u32(data);
567 bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
cd17c651
OZ
568}
569
d15b0b0a
OZ
570
571static void
572bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
06fb60c4 573{
d15b0b0a
OZ
574 if (len != 0)
575 DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
576
577 bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
06fb60c4
OZ
578}
579
4847a894 580static int
d15b0b0a
OZ
581bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
582{
4c553c5a 583 const byte *data = a->u.ptr->data;
d15b0b0a
OZ
584 uint len = a->u.ptr->length;
585
586 if (!s->as4_session)
587 {
588 /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
4c553c5a
MM
589 byte *dst = alloca(6);
590 len = aggregator_32to16(dst, data);
a6548d5b 591 data = dst;
d15b0b0a
OZ
592 }
593
594 return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
595}
596
597static void
598bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
599{
600 if (len != (s->as4_session ? 8 : 6))
601 DISCARD(BAD_LENGTH, "AGGREGATOR", len);
602
603 if (!s->as4_session)
604 {
605 /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
606 byte *src = data;
607 data = alloca(8);
608 len = aggregator_16to32(data, src);
609 }
610
611 bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
612}
613
614static void
258be565 615bgp_format_aggregator(const eattr *a, byte *buf, uint size UNUSED)
d15b0b0a 616{
4c553c5a 617 const byte *data = a->u.ptr->data;
d15b0b0a
OZ
618
619 bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
620}
621
622
623static void
624bgp_export_community(struct bgp_export_state *s, eattr *a)
625{
626 if (a->u.ptr->length == 0)
627 UNSET(a);
628
629 a->u.ptr = int_set_sort(s->pool, a->u.ptr);
630}
631
632static void
633bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
634{
635 if (!len || (len % 4))
636 WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
637
638 struct adata *ad = lp_alloc_adata(s->pool, len);
639 get_u32s(data, (u32 *) ad->data, len / 4);
640 bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
641}
642
643
644static void
645bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
4847a894 646{
d15b0b0a
OZ
647 if (!s->proto->is_internal)
648 UNSET(a);
4847a894
OZ
649}
650
aebe06b4 651static void
d15b0b0a
OZ
652bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
653{
654 if (!s->proto->is_internal)
655 DISCARD(BAD_EBGP, "ORIGINATOR_ID");
656
657 if (len != 4)
658 WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
659
660 u32 val = get_u32(data);
661 bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
662}
663
664
665static void
666bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
667{
668 if (!s->proto->is_internal)
669 UNSET(a);
670
671 if (a->u.ptr->length == 0)
672 UNSET(a);
673}
674
675static void
676bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
677{
678 if (!s->proto->is_internal)
679 DISCARD(BAD_EBGP, "CLUSTER_LIST");
680
681 if (!len || (len % 4))
682 WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
683
684 struct adata *ad = lp_alloc_adata(s->pool, len);
685 get_u32s(data, (u32 *) ad->data, len / 4);
686 bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
687}
688
689static void
258be565 690bgp_format_cluster_list(const eattr *a, byte *buf, uint size)
aebe06b4 691{
fdf16eb6 692 /* Truncates cluster lists larger than buflen, probably not a problem */
08571b20 693 int_set_format(a->u.ptr, ISF_ROUTER_ID, -1, buf, size);
aebe06b4
OZ
694}
695
d15b0b0a 696
d774f6d7
OZ
697int
698bgp_encode_mp_reach_mrt(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
699{
700 /*
701 * Limited version of MP_REACH_NLRI used for MRT table dumps (IPv6 only):
702 *
703 * 3 B MP_REACH_NLRI header
704 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
705 * var MP_REACH_NLRI data - Network Address of Next Hop
706 */
707
708 ip_addr *nh = (void *) a->u.ptr->data;
709 uint len = a->u.ptr->length;
710
711 ASSERT((len == 16) || (len == 32));
712
713 if (size < (3+1+len))
714 return -1;
715
716 bgp_put_attr_hdr3(buf, BA_MP_REACH_NLRI, BAF_OPTIONAL, 1+len);
717 buf[3] = len;
718 buf += 4;
719
720 put_ip6(buf, ipa_to_ip6(nh[0]));
721
722 if (len == 32)
723 put_ip6(buf+16, ipa_to_ip6(nh[1]));
724
725 return 3+1+len;
726}
727
d15b0b0a
OZ
728static inline u32
729get_af3(byte *buf)
1c1da87b 730{
d15b0b0a 731 return (get_u16(buf) << 16) | buf[2];
1c1da87b
MM
732}
733
d15b0b0a
OZ
734static void
735bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
1c1da87b 736{
d15b0b0a
OZ
737 /*
738 * 2 B MP_REACH_NLRI data - Address Family Identifier
739 * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
740 * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
741 * var MP_REACH_NLRI data - Network Address of Next Hop
742 * 1 B MP_REACH_NLRI data - Reserved (zero)
743 * var MP_REACH_NLRI data - Network Layer Reachability Information
744 */
745
746 if ((len < 5) || (len < (5 + (uint) data[3])))
747 bgp_parse_error(s, 9);
748
749 s->mp_reach_af = get_af3(data);
750 s->mp_next_hop_len = data[3];
751 s->mp_next_hop_data = data + 4;
752 s->mp_reach_len = len - 5 - s->mp_next_hop_len;
753 s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
f421cfdd
MM
754}
755
d15b0b0a
OZ
756
757static void
758bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
42a0c054 759{
d15b0b0a
OZ
760 /*
761 * 2 B MP_UNREACH_NLRI data - Address Family Identifier
762 * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
763 * var MP_UNREACH_NLRI data - Network Layer Reachability Information
764 */
765
766 if (len < 3)
767 bgp_parse_error(s, 9);
768
769 s->mp_unreach_af = get_af3(data);
770 s->mp_unreach_len = len - 3;
771 s->mp_unreach_nlri = data + 3;
42a0c054
OZ
772}
773
d15b0b0a
OZ
774
775static void
776bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
777{
ec331acf
OZ
778 if (!s->proto->is_interior)
779 {
780 struct adata *ad = ec_set_del_nontrans(s->pool, a->u.ptr);
d807ea08 781
ec331acf
OZ
782 if (ad->length == 0)
783 UNSET(a);
d15b0b0a 784
ec331acf
OZ
785 ec_set_sort_x(ad);
786 a->u.ptr = ad;
787 }
788 else
789 {
790 if (a->u.ptr->length == 0)
791 UNSET(a);
792
793 a->u.ptr = ec_set_sort(s->pool, a->u.ptr);
794 }
d15b0b0a
OZ
795}
796
797static void
798bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
799{
800 if (!len || (len % 8))
801 WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
802
803 struct adata *ad = lp_alloc_adata(s->pool, len);
804 get_u32s(data, (u32 *) ad->data, len / 4);
805 bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
806}
807
808
809static void
810bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
811{
812 if (s->as4_session)
813 DISCARD(NEW_BGP, "AS4_AGGREGATOR");
814
815 if (len != 8)
816 DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
817
818 bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
819}
820
821static void
822bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
823{
a52476c9
OZ
824 struct bgp_proto *p = s->proto;
825 int sets = p->cf->allow_as_sets;
826
d15b0b0a
OZ
827 char err[128];
828
829 if (s->as4_session)
830 DISCARD(NEW_BGP, "AS4_PATH");
831
832 if (len < 6)
833 DISCARD(BAD_LENGTH, "AS4_PATH", len);
834
a52476c9 835 if (!as_path_valid(data, len, 4, sets, 1, err, sizeof(err)))
d15b0b0a
OZ
836 DISCARD("Malformed AS4_PATH attribute - %s", err);
837
5509e17d
OZ
838 struct adata *a = lp_alloc_adata(s->pool, len);
839 memcpy(a->data, data, len);
840
841 /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
842 if (as_path_contains_confed(a))
843 {
844 REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
845 a = as_path_strip_confed(s->pool, a);
846 }
847
848 bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
d15b0b0a
OZ
849}
850
09ee846d
OZ
851
852static void
853bgp_export_aigp(struct bgp_export_state *s, eattr *a)
854{
855 if (!s->channel->cf->aigp)
856 UNSET(a);
857}
858
859static void
860bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
861{
862 char err[128];
863
864 /* Acceptability test postponed to bgp_finish_attrs() */
865
866 if ((flags ^ bgp_attr_table[BA_AIGP].flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
867 DISCARD("Malformed AIGP attribute - conflicting flags (%02x)", flags);
868
869 if (!bgp_aigp_valid(data, len, err, sizeof(err)))
870 DISCARD("Malformed AIGP attribute - %s", err);
871
872 bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len);
873}
874
875static void
258be565 876bgp_format_aigp(const eattr *a, byte *buf, uint size UNUSED)
09ee846d
OZ
877{
878 const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
879
880 if (!b)
881 bsprintf(buf, "?");
882 else
883 bsprintf(buf, "%lu", get_u64(b + 3));
884}
885
886
d15b0b0a
OZ
887static void
888bgp_export_large_community(struct bgp_export_state *s, eattr *a)
889{
890 if (a->u.ptr->length == 0)
891 UNSET(a);
892
893 a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
894}
895
896static void
897bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
898{
899 if (!len || (len % 12))
900 WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
901
902 struct adata *ad = lp_alloc_adata(s->pool, len);
903 get_u32s(data, (u32 *) ad->data, len / 4);
904 bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
905}
906
c73b5d2d
EB
907
908static void
909bgp_decode_otc(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
910{
911 if (len != 4)
912 WITHDRAW(BAD_LENGTH, "OTC", len);
913
914 u32 val = get_u32(data);
915 bgp_set_attr_u32(to, s->pool, BA_ONLY_TO_CUSTOMER, flags, val);
916}
917
918
1e37e35c
OZ
919static void
920bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
921{
922 net_addr *n = s->route->net->n.addr;
923 u32 *labels = (u32 *) a->u.ptr->data;
924 uint lnum = a->u.ptr->length / 4;
925
926 /* Perhaps we should just ignore it? */
927 if (!s->mpls)
963b2c7c 928 REJECT("Unexpected MPLS stack");
1e37e35c
OZ
929
930 /* Empty MPLS stack is not allowed */
931 if (!lnum)
963b2c7c 932 REJECT("Malformed MPLS stack - empty");
1e37e35c
OZ
933
934 /* This is ugly, but we must ensure that labels fit into NLRI field */
935 if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
963b2c7c 936 REJECT("Malformed MPLS stack - too many labels (%u)", lnum);
1e37e35c
OZ
937
938 for (uint i = 0; i < lnum; i++)
939 {
940 if (labels[i] > 0xfffff)
963b2c7c 941 REJECT("Malformed MPLS stack - invalid label (%u)", labels[i]);
1e37e35c
OZ
942
943 /* TODO: Check for special-purpose label values? */
944 }
945}
946
947static int
948bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
949{
950 /*
951 * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
952 * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
953 */
954
955 s->mpls_labels = a->u.ptr;
956 return 0;
957}
958
959static void
960bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
961{
962 DISCARD("Discarding received attribute #0");
963}
964
965static void
258be565 966bgp_format_mpls_label_stack(const eattr *a, byte *buf, uint size)
1e37e35c
OZ
967{
968 u32 *labels = (u32 *) a->u.ptr->data;
969 uint lnum = a->u.ptr->length / 4;
970 char *pos = buf;
971
972 for (uint i = 0; i < lnum; i++)
973 {
974 if (size < 20)
975 {
976 bsprintf(pos, "...");
977 return;
978 }
979
980 uint l = bsprintf(pos, "%d/", labels[i]);
981 ADVANCE(pos, size, l);
982 }
983
984 /* Clear last slash or terminate empty string */
985 pos[lnum ? -1 : 0] = 0;
986}
987
d15b0b0a
OZ
988static inline void
989bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
990{
d493d0f1 991 /* Cannot use bgp_set_attr_data() as it works on known attributes only */
ee7e2ffd 992 ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
d15b0b0a
OZ
993}
994
995
996/*
997 * Attribute table
998 */
999
1000static const struct bgp_attr_desc bgp_attr_table[] = {
1001 [BA_ORIGIN] = {
1002 .name = "origin",
1003 .type = EAF_TYPE_INT,
1004 .flags = BAF_TRANSITIVE,
1005 .export = bgp_export_origin,
1006 .encode = bgp_encode_u8,
1007 .decode = bgp_decode_origin,
1008 .format = bgp_format_origin,
1009 },
1010 [BA_AS_PATH] = {
1011 .name = "as_path",
1012 .type = EAF_TYPE_AS_PATH,
1013 .flags = BAF_TRANSITIVE,
1014 .encode = bgp_encode_as_path,
1015 .decode = bgp_decode_as_path,
1016 },
1017 [BA_NEXT_HOP] = {
1018 .name = "next_hop",
1019 .type = EAF_TYPE_IP_ADDRESS,
1020 .flags = BAF_TRANSITIVE,
1021 .encode = bgp_encode_next_hop,
1022 .decode = bgp_decode_next_hop,
1023 .format = bgp_format_next_hop,
1024 },
1025 [BA_MULTI_EXIT_DISC] = {
1026 .name = "med",
1027 .type = EAF_TYPE_INT,
1028 .flags = BAF_OPTIONAL,
1029 .encode = bgp_encode_u32,
1030 .decode = bgp_decode_med,
1031 },
1032 [BA_LOCAL_PREF] = {
1033 .name = "local_pref",
1034 .type = EAF_TYPE_INT,
1035 .flags = BAF_TRANSITIVE,
1036 .export = bgp_export_local_pref,
1037 .encode = bgp_encode_u32,
1038 .decode = bgp_decode_local_pref,
1039 },
1040 [BA_ATOMIC_AGGR] = {
1041 .name = "atomic_aggr",
1042 .type = EAF_TYPE_OPAQUE,
1043 .flags = BAF_TRANSITIVE,
1044 .encode = bgp_encode_raw,
1045 .decode = bgp_decode_atomic_aggr,
1046 },
1047 [BA_AGGREGATOR] = {
1048 .name = "aggregator",
1049 .type = EAF_TYPE_OPAQUE,
1050 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1051 .encode = bgp_encode_aggregator,
1052 .decode = bgp_decode_aggregator,
1053 .format = bgp_format_aggregator,
1054 },
1055 [BA_COMMUNITY] = {
1056 .name = "community",
1057 .type = EAF_TYPE_INT_SET,
1058 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1059 .export = bgp_export_community,
1060 .encode = bgp_encode_u32s,
1061 .decode = bgp_decode_community,
1062 },
1063 [BA_ORIGINATOR_ID] = {
1064 .name = "originator_id",
1065 .type = EAF_TYPE_ROUTER_ID,
1066 .flags = BAF_OPTIONAL,
1067 .export = bgp_export_originator_id,
1068 .encode = bgp_encode_u32,
1069 .decode = bgp_decode_originator_id,
1070 },
1071 [BA_CLUSTER_LIST] = {
1072 .name = "cluster_list",
1073 .type = EAF_TYPE_INT_SET,
1074 .flags = BAF_OPTIONAL,
1075 .export = bgp_export_cluster_list,
1076 .encode = bgp_encode_u32s,
1077 .decode = bgp_decode_cluster_list,
1078 .format = bgp_format_cluster_list,
1079 },
1080 [BA_MP_REACH_NLRI] = {
1081 .name = "mp_reach_nlri",
1082 .type = EAF_TYPE_OPAQUE,
1083 .flags = BAF_OPTIONAL,
1084 .decode = bgp_decode_mp_reach_nlri,
1085 },
1086 [BA_MP_UNREACH_NLRI] = {
1087 .name = "mp_unreach_nlri",
1088 .type = EAF_TYPE_OPAQUE,
1089 .flags = BAF_OPTIONAL,
1090 .decode = bgp_decode_mp_unreach_nlri,
1091 },
1092 [BA_EXT_COMMUNITY] = {
1093 .name = "ext_community",
1094 .type = EAF_TYPE_EC_SET,
1095 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1096 .export = bgp_export_ext_community,
1097 .encode = bgp_encode_u32s,
1098 .decode = bgp_decode_ext_community,
1099 },
1100 [BA_AS4_PATH] = {
1101 .name = "as4_path",
1102 .type = EAF_TYPE_AS_PATH,
1103 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1104 .encode = bgp_encode_raw,
1105 .decode = bgp_decode_as4_path,
1106 },
1107 [BA_AS4_AGGREGATOR] = {
1108 .name = "as4_aggregator",
1109 .type = EAF_TYPE_OPAQUE,
1110 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1111 .encode = bgp_encode_raw,
1112 .decode = bgp_decode_as4_aggregator,
1113 .format = bgp_format_aggregator,
1114 },
09ee846d
OZ
1115 [BA_AIGP] = {
1116 .name = "aigp",
1117 .type = EAF_TYPE_OPAQUE,
1118 .flags = BAF_OPTIONAL | BAF_DECODE_FLAGS,
1119 .export = bgp_export_aigp,
1120 .encode = bgp_encode_raw,
1121 .decode = bgp_decode_aigp,
1122 .format = bgp_format_aigp,
1123 },
d15b0b0a
OZ
1124 [BA_LARGE_COMMUNITY] = {
1125 .name = "large_community",
1126 .type = EAF_TYPE_LC_SET,
1127 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1128 .export = bgp_export_large_community,
1129 .encode = bgp_encode_u32s,
1130 .decode = bgp_decode_large_community,
1131 },
c73b5d2d
EB
1132 [BA_ONLY_TO_CUSTOMER] = {
1133 .name = "otc",
1134 .type = EAF_TYPE_INT,
1135 .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1136 .encode = bgp_encode_u32,
1137 .decode = bgp_decode_otc,
1138 },
1e37e35c
OZ
1139 [BA_MPLS_LABEL_STACK] = {
1140 .name = "mpls_label_stack",
1141 .type = EAF_TYPE_INT_SET,
1142 .export = bgp_export_mpls_label_stack,
1143 .encode = bgp_encode_mpls_label_stack,
1144 .decode = bgp_decode_mpls_label_stack,
1145 .format = bgp_format_mpls_label_stack,
1146 },
f421cfdd
MM
1147};
1148
d15b0b0a
OZ
1149static inline int
1150bgp_attr_known(uint code)
1151{
1152 return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
1153}
1154
57aa0772
OZ
1155const char *
1156bgp_attr_name(uint code)
1157{
1158 return (code < ARRAY_SIZE(bgp_attr_table)) ? bgp_attr_table[code].name : NULL;
1159}
1160
d15b0b0a
OZ
1161
1162/*
1163 * Attribute export
11cb6202
OZ
1164 */
1165
d15b0b0a
OZ
1166static inline void
1167bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
1168{
ee7e2ffd 1169 if (EA_PROTO(a->id) != PROTOCOL_BGP)
d15b0b0a
OZ
1170 return;
1171
1172 uint code = EA_ID(a->id);
1173
1174 if (bgp_attr_known(code))
1175 {
1176 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1177
2d0652dd
OZ
1178 /* The flags should be correct, we reset them just to be sure */
1179 ASSERT(!((a->flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)));
d15b0b0a
OZ
1180 a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
1181
1182 /* Set partial bit if new opt-trans attribute is attached to non-local route */
63cf5d5d 1183 if ((s->src != NULL) && (a->originated) &&
d15b0b0a
OZ
1184 (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
1185 a->flags |= BAF_PARTIAL;
d1a74339 1186
d15b0b0a
OZ
1187 /* Call specific hook */
1188 CALL(desc->export, s, a);
1189
1190 /* Attribute might become undefined in hook */
0f685152 1191 if (a->undef)
d15b0b0a
OZ
1192 return;
1193 }
1194 else
1195 {
1196 /* Don't re-export unknown non-transitive attributes */
1197 if (!(a->flags & BAF_TRANSITIVE))
1198 return;
1199
1200 a->flags |= BAF_PARTIAL;
1201 }
1202
1203 /* Append updated attribute */
1204 to->attrs[to->count++] = *a;
1205}
1206
1207/**
1208 * bgp_export_attrs - export BGP attributes
1209 * @s: BGP export state
1210 * @attrs: a list of extended attributes
1211 *
1212 * The bgp_export_attrs() function takes a list of attributes and merges it to
1213 * one newly allocated and sorted segment. Attributes are validated and
1214 * normalized by type-specific export hooks and attribute flags are updated.
1215 * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
1216 * empty community sets).
1217 *
1218 * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
1219 */
1220static inline ea_list *
1221bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
4847a894 1222{
d15b0b0a
OZ
1223 /* Merge the attribute list */
1224 ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
1225 ea_merge(attrs, new);
1226 ea_sort(new);
1227
1228 uint i, count;
1229 count = new->count;
1230 new->count = 0;
1231
1232 /* Export each attribute */
1233 for (i = 0; i < count; i++)
1234 bgp_export_attr(s, &new->attrs[i], new);
1235
963b2c7c 1236 if (s->err_reject)
d15b0b0a
OZ
1237 return NULL;
1238
1239 return new;
4847a894
OZ
1240}
1241
d15b0b0a
OZ
1242
1243/*
1244 * Attribute encoding
1245 */
1246
1247static inline int
1248bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
cf3d6470 1249{
ee7e2ffd 1250 ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP);
d15b0b0a
OZ
1251
1252 uint code = EA_ID(a->id);
1253
1254 if (bgp_attr_known(code))
1255 return bgp_attr_table[code].encode(s, a, buf, size);
cf3d6470 1256 else
d15b0b0a 1257 return bgp_encode_raw(s, a, buf, size);
cf3d6470
MM
1258}
1259
d15b0b0a
OZ
1260/**
1261 * bgp_encode_attrs - encode BGP attributes
1262 * @s: BGP write state
1263 * @attrs: a list of extended attributes
1264 * @buf: buffer
1265 * @end: buffer end
1266 *
1267 * The bgp_encode_attrs() function takes a list of extended attributes
1268 * and converts it to its BGP representation (a part of an Update message).
863ecfc7 1269 * BGP write state may be fake when called from MRT protocol.
d15b0b0a
OZ
1270 *
1271 * Result: Length of the attribute block generated or -1 if not enough space.
1272 */
1273int
1274bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
4847a894 1275{
d15b0b0a
OZ
1276 byte *pos = buf;
1277 int i, len;
1278
1279 for (i = 0; i < attrs->count; i++)
1280 {
1281 len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
1282
1283 if (len < 0)
1284 return -1;
1285
1286 pos += len;
1287 }
1288
1289 return pos - buf;
4847a894
OZ
1290}
1291
d15b0b0a
OZ
1292
1293/*
1294 * Attribute decoding
1295 */
1296
1297static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
1298
1299static inline int
1300bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
cf3d6470 1301{
d15b0b0a
OZ
1302 eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
1303 int num = p->cf->allow_local_as + 1;
1304 return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
4847a894
OZ
1305}
1306
d15b0b0a
OZ
1307static inline int
1308bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
4847a894 1309{
d15b0b0a
OZ
1310 eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
1311 return (e && (e->u.data == p->local_id));
cf3d6470
MM
1312}
1313
d15b0b0a
OZ
1314static inline int
1315bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
11cb6202 1316{
d15b0b0a
OZ
1317 eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
1318 return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
1319}
11cb6202 1320
d15b0b0a
OZ
1321static inline void
1322bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
1323{
1324 /* Handle duplicate attributes; RFC 7606 3 (g) */
1325 if (BIT32_TEST(s->attrs_seen, code))
1326 {
1327 if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
1328 bgp_parse_error(s, 1);
1329 else
1330 DISCARD("Discarding duplicate attribute (code %u)", code);
1331 }
1332 BIT32_SET(s->attrs_seen, code);
11cb6202 1333
d15b0b0a
OZ
1334 if (bgp_attr_known(code))
1335 {
1336 const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1337
1338 /* Handle conflicting flags; RFC 7606 3 (c) */
09ee846d
OZ
1339 if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
1340 !(desc->flags & BAF_DECODE_FLAGS))
d15b0b0a 1341 WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
11cb6202 1342
d15b0b0a
OZ
1343 desc->decode(s, code, flags, data, len, to);
1344 }
1345 else /* Unknown attribute */
1346 {
1347 if (!(flags & BAF_OPTIONAL))
1348 WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
1349
1350 bgp_decode_unknown(s, code, flags, data, len, to);
1351 }
11cb6202
OZ
1352}
1353
d15b0b0a
OZ
1354/**
1355 * bgp_decode_attrs - check and decode BGP attributes
1356 * @s: BGP parse state
1357 * @data: start of attribute block
1358 * @len: length of attribute block
1359 *
1360 * This function takes a BGP attribute block (a part of an Update message), checks
1361 * its consistency and converts it to a list of BIRD route attributes represented
1362 * by an (uncached) &rta.
1363 */
1364ea_list *
1365bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
11cb6202 1366{
d15b0b0a
OZ
1367 struct bgp_proto *p = s->proto;
1368 ea_list *attrs = NULL;
1369 uint code, flags, alen;
1370 byte *pos = data;
11cb6202 1371
d15b0b0a
OZ
1372 /* Parse the attributes */
1373 while (len)
1374 {
1375 alen = 0;
1376
1377 /* Read attribute type */
1378 if (len < 2)
1379 goto framing_error;
1380 flags = pos[0];
1381 code = pos[1];
1382 ADVANCE(pos, len, 2);
1383
1384 /* Read attribute length */
1385 if (flags & BAF_EXT_LEN)
11cb6202 1386 {
d15b0b0a
OZ
1387 if (len < 2)
1388 goto framing_error;
1389 alen = get_u16(pos);
1390 ADVANCE(pos, len, 2);
11cb6202 1391 }
d15b0b0a
OZ
1392 else
1393 {
1394 if (len < 1)
1395 goto framing_error;
1396 alen = *pos;
1397 ADVANCE(pos, len, 1);
1398 }
1399
1400 if (alen > len)
1401 goto framing_error;
1402
1403 DBG("Attr %02x %02x %u\n", code, flags, alen);
1404
1405 bgp_decode_attr(s, code, flags, pos, alen, &attrs);
1406 ADVANCE(pos, len, alen);
1407 }
1408
1409 if (s->err_withdraw)
1410 goto withdraw;
1411
1412 /* If there is no reachability NLRI, we are finished */
1413 if (!s->ip_reach_len && !s->mp_reach_len)
1414 return NULL;
1415
1416
1417 /* Handle missing mandatory attributes; RFC 7606 3 (d) */
1418 if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
1419 { REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
1420
1421 if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
1422 { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
11cb6202 1423
9c9050ff
OZ
1424 if (s->ip_reach_len && !BIT32_TEST(s->attrs_seen, BA_NEXT_HOP))
1425 { REPORT(NO_MANDATORY, "NEXT_HOP"); goto withdraw; }
1426
d15b0b0a
OZ
1427 /* When receiving attributes from non-AS4-aware BGP speaker, we have to
1428 reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
1429 if (!p->as4_session)
1430 bgp_process_as4_attrs(&attrs, s->pool);
11cb6202 1431
d15b0b0a
OZ
1432 /* Reject routes with our ASN in AS_PATH attribute */
1433 if (bgp_as_path_loopy(p, attrs, p->local_as))
4c6ee53f 1434 goto loop;
11cb6202 1435
5509e17d 1436 /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
d15b0b0a 1437 if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
4c6ee53f 1438 goto loop;
11cb6202 1439
d15b0b0a
OZ
1440 /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
1441 if (p->is_internal && bgp_originator_id_loopy(p, attrs))
4c6ee53f 1442 goto loop;
11cb6202 1443
d15b0b0a
OZ
1444 /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
1445 if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
4c6ee53f 1446 goto loop;
11cb6202 1447
d15b0b0a
OZ
1448 /* If there is no local preference, define one */
1449 if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
1450 bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
11cb6202 1451
d15b0b0a 1452 return attrs;
f421cfdd 1453
11cb6202 1454
d15b0b0a
OZ
1455framing_error:
1456 /* RFC 7606 4 - handle attribute framing errors */
1457 REPORT("Malformed attribute list - framing error (%u/%u) at %d",
1458 alen, len, (int) (pos - s->attrs));
ae8f5584 1459
d15b0b0a
OZ
1460withdraw:
1461 /* RFC 7606 5.2 - handle missing NLRI during errors */
1462 if (!s->ip_reach_len && !s->mp_reach_len)
1463 bgp_parse_error(s, 1);
c2b28c99 1464
d15b0b0a
OZ
1465 s->err_withdraw = 1;
1466 return NULL;
4c6ee53f
OZ
1467
1468loop:
1469 /* Loops are handled as withdraws, but ignored silently. Do not set err_withdraw. */
1470 return NULL;
e3558ab1
MM
1471}
1472
09ee846d
OZ
1473void
1474bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
1475{
1476 /* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
1477 if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp)
1478 {
1479 REPORT("Discarding AIGP attribute received on non-AIGP session");
1480 bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP);
1481 }
c73b5d2d
EB
1482
1483 /* Handle OTC ingress procedure, RFC 9234 */
1484 if (bgp_channel_is_role_applicable(s->channel))
1485 {
1486 struct bgp_proto *p = s->proto;
1487 eattr *e = bgp_find_attr(a->eattrs, BA_ONLY_TO_CUSTOMER);
1488
1489 /* Reject routes from downstream if they are leaked */
1490 if (e && (p->cf->local_role == BGP_ROLE_PROVIDER ||
1491 p->cf->local_role == BGP_ROLE_RS_SERVER))
1492 WITHDRAW("Route leak detected - OTC attribute from downstream");
1493
1494 /* Reject routes from peers if they are leaked */
1495 if (e && (p->cf->local_role == BGP_ROLE_PEER) && (e->u.data != p->cf->remote_as))
1496 WITHDRAW("Route leak detected - OTC attribute with mismatched ASN (%u)",
1497 (uint) e->u.data);
1498
1499 /* Mark routes from upstream if it did not happened before */
1500 if (!e && (p->cf->local_role == BGP_ROLE_CUSTOMER ||
1501 p->cf->local_role == BGP_ROLE_PEER ||
1502 p->cf->local_role == BGP_ROLE_RS_CLIENT))
1503 bgp_set_attr_u32(&a->eattrs, s->pool, BA_ONLY_TO_CUSTOMER, 0, p->cf->remote_as);
1504 }
9d456d53
OZ
1505
1506 /* Apply MPLS policy for labeled SAFIs */
1507 if (s->mpls && s->proto->p.mpls_channel)
1508 {
1509 struct mpls_channel *mc = (void *) s->proto->p.mpls_channel;
1510 ea_set_attr_u32(&a->eattrs, s->pool, EA_MPLS_POLICY, 0, EAF_TYPE_INT, mc->label_policy);
1511 }
09ee846d
OZ
1512}
1513
ae8f5584 1514
d15b0b0a
OZ
1515/*
1516 * Route bucket hash table
1517 */
42a0c054 1518
d15b0b0a
OZ
1519#define RBH_KEY(b) b->eattrs, b->hash
1520#define RBH_NEXT(b) b->next
1521#define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
1522#define RBH_FN(a,h) h
42a0c054 1523
d15b0b0a 1524#define RBH_REHASH bgp_rbh_rehash
ba2a0760 1525#define RBH_PARAMS /8, *2, 2, 2, 12, 20
42a0c054 1526
42a0c054 1527
d15b0b0a 1528HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
66dbdbd9 1529
d15b0b0a
OZ
1530void
1531bgp_init_bucket_table(struct bgp_channel *c)
66dbdbd9 1532{
d15b0b0a 1533 HASH_INIT(c->bucket_hash, c->pool, 8);
66dbdbd9 1534
d15b0b0a
OZ
1535 init_list(&c->bucket_queue);
1536 c->withdraw_bucket = NULL;
ae8f5584
MM
1537}
1538
7fc55925
OZ
1539void
1540bgp_free_bucket_table(struct bgp_channel *c)
1541{
1542 HASH_FREE(c->bucket_hash);
1543
1544 struct bgp_bucket *b;
1545 WALK_LIST_FIRST(b, c->bucket_queue)
1546 {
1547 rem_node(&b->send_node);
1548 mb_free(b);
1549 }
1550
1551 mb_free(c->withdraw_bucket);
1552 c->withdraw_bucket = NULL;
1553}
1554
ae8f5584 1555static struct bgp_bucket *
d15b0b0a 1556bgp_get_bucket(struct bgp_channel *c, ea_list *new)
ae8f5584 1557{
d15b0b0a
OZ
1558 /* Hash and lookup */
1559 u32 hash = ea_hash(new);
1560 struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
1561
1562 if (b)
1563 return b;
1564
1565 uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
1566 uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
1567 uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
1568 uint i;
ae8f5584 1569 byte *dest;
ae8f5584
MM
1570
1571 /* Gather total size of non-inline attributes */
d15b0b0a
OZ
1572 for (i = 0; i < new->count; i++)
1573 {
1574 eattr *a = &new->attrs[i];
ae8f5584 1575
d15b0b0a
OZ
1576 if (!(a->type & EAF_EMBEDDED))
1577 size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
1578 }
1579
1580 /* Create the bucket */
1581 b = mb_alloc(c->pool, size);
24658677 1582 *b = (struct bgp_bucket) { };
f421cfdd 1583 init_list(&b->prefixes);
d15b0b0a
OZ
1584 b->hash = hash;
1585
1586 /* Copy list of extended attributes */
ae8f5584 1587 memcpy(b->eattrs, new, ea_size);
d15b0b0a 1588 dest = ((byte *) b->eattrs) + ea_size_aligned;
ae8f5584
MM
1589
1590 /* Copy values of non-inline attributes */
d15b0b0a
OZ
1591 for (i = 0; i < new->count; i++)
1592 {
1593 eattr *a = &b->eattrs->attrs[i];
1594
1595 if (!(a->type & EAF_EMBEDDED))
ae8f5584 1596 {
4c553c5a 1597 const struct adata *oa = a->u.ptr;
d15b0b0a
OZ
1598 struct adata *na = (struct adata *) dest;
1599 memcpy(na, oa, sizeof(struct adata) + oa->length);
1600 a->u.ptr = na;
1601 dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
ae8f5584 1602 }
d15b0b0a 1603 }
ae8f5584 1604
d15b0b0a
OZ
1605 /* Insert the bucket to send queue and bucket hash */
1606 add_tail(&c->bucket_queue, &b->send_node);
1607 HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
ae8f5584
MM
1608
1609 return b;
1610}
1611
1612static struct bgp_bucket *
d15b0b0a 1613bgp_get_withdraw_bucket(struct bgp_channel *c)
ae8f5584 1614{
d15b0b0a
OZ
1615 if (!c->withdraw_bucket)
1616 {
1617 c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
1618 init_list(&c->withdraw_bucket->prefixes);
1619 }
ae8f5584 1620
d15b0b0a
OZ
1621 return c->withdraw_bucket;
1622}
ae8f5584 1623
d15b0b0a
OZ
1624void
1625bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1626{
1627 rem_node(&b->send_node);
1628 HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
1629 mb_free(b);
1630}
f421cfdd 1631
d15b0b0a
OZ
1632void
1633bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1634{
1635 rem_node(&b->send_node);
1636 add_tail(&c->bucket_queue, &b->send_node);
ae8f5584
MM
1637}
1638
f421cfdd 1639void
d15b0b0a 1640bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
f421cfdd 1641{
d15b0b0a
OZ
1642 struct bgp_proto *p = (void *) c->c.proto;
1643 struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
1644
1645 log(L_ERR "%s: Attribute list too long", p->p.name);
1646 while (!EMPTY_LIST(b->prefixes))
1647 {
1648 struct bgp_prefix *px = HEAD(b->prefixes);
1649
1650 log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
1651 rem_node(&px->buck_node);
1652 add_tail(&wb->prefixes, &px->buck_node);
1653 }
f421cfdd
MM
1654}
1655
094d2bdb 1656
d15b0b0a
OZ
1657/*
1658 * Prefix hash table
1659 */
094d2bdb 1660
d15b0b0a
OZ
1661#define PXH_KEY(px) px->net, px->path_id, px->hash
1662#define PXH_NEXT(px) px->next
1663#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
1664#define PXH_FN(n,i,h) h
e7d2ac44
OZ
1665
1666#define PXH_REHASH bgp_pxh_rehash
ba2a0760 1667#define PXH_PARAMS /8, *2, 2, 2, 12, 24
e7d2ac44 1668
094d2bdb 1669
e7d2ac44 1670HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
094d2bdb
OZ
1671
1672void
d15b0b0a 1673bgp_init_prefix_table(struct bgp_channel *c)
094d2bdb 1674{
d15b0b0a 1675 HASH_INIT(c->prefix_hash, c->pool, 8);
094d2bdb 1676
ac3ad139
OZ
1677 uint alen = net_addr_length[c->c.net_type];
1678 c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
094d2bdb
OZ
1679}
1680
ed1a908e 1681void
c259669f 1682bgp_free_prefix_table(struct bgp_channel *c)
ed1a908e 1683{
c259669f 1684 HASH_FREE(c->prefix_hash);
ed1a908e 1685
c259669f
OZ
1686 rfree(c->prefix_slab);
1687 c->prefix_slab = NULL;
094d2bdb
OZ
1688}
1689
1690static struct bgp_prefix *
d15b0b0a 1691bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
094d2bdb 1692{
ba2a0760
OZ
1693 /* We must use a different hash function than the rtable */
1694 u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id));
d15b0b0a 1695 struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
094d2bdb 1696
d15b0b0a
OZ
1697 if (px)
1698 {
1699 rem_node(&px->buck_node);
1700 return px;
1701 }
094d2bdb 1702
ac3ad139
OZ
1703 if (c->prefix_slab)
1704 px = sl_alloc(c->prefix_slab);
1705 else
1706 px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
1707
24658677 1708 *px = (struct bgp_prefix) { };
d15b0b0a
OZ
1709 px->hash = hash;
1710 px->path_id = path_id;
1711 net_copy(px->net, net);
094d2bdb 1712
d15b0b0a 1713 HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
094d2bdb 1714
d15b0b0a 1715 return px;
094d2bdb
OZ
1716}
1717
1718void
d15b0b0a 1719bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
094d2bdb 1720{
d15b0b0a
OZ
1721 rem_node(&px->buck_node);
1722 HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
ac3ad139
OZ
1723
1724 if (c->prefix_slab)
ebd807c0 1725 sl_free(px);
ac3ad139
OZ
1726 else
1727 mb_free(px);
094d2bdb
OZ
1728}
1729
1730
d15b0b0a
OZ
1731/*
1732 * BGP protocol glue
1733 */
ef2c708d 1734
d15b0b0a 1735int
d429bc5c 1736bgp_preexport(struct channel *C, rte *e)
ef2c708d 1737{
5cff1d5f 1738 struct proto *SRC = e->src->proto;
beb5f78a 1739 struct bgp_proto *p = (struct bgp_proto *) C->proto;
d15b0b0a 1740 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
2e484f8d 1741 struct bgp_channel *c = (struct bgp_channel *) C;
ef2c708d 1742
54430df9
OZ
1743 /* Ignore non-BGP channels */
1744 if (C->channel != &channel_bgp)
1745 return -1;
1746
d15b0b0a
OZ
1747 /* Reject our routes */
1748 if (src == p)
1749 return -1;
4847a894 1750
d15b0b0a
OZ
1751 /* Accept non-BGP routes */
1752 if (src == NULL)
1753 return 0;
4847a894 1754
1f2eb2ac
OZ
1755 /* Reject flowspec that failed validation */
1756 if ((e->attrs->dest == RTD_UNREACHABLE) && net_is_flow(e->net->n.addr))
1757 return -1;
1758
d15b0b0a
OZ
1759 /* IBGP route reflection, RFC 4456 */
1760 if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
1761 {
1762 /* Rejected unless configured as route reflector */
1763 if (!p->rr_client && !src->rr_client)
1764 return -1;
1765
1766 /* Generally, this should be handled when path is received, but we check it
1767 also here as rr_cluster_id may be undefined or different in src. */
1768 if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
1769 return -1;
1770 }
1771
1772 /* Handle well-known communities, RFC 1997 */
c73b5d2d 1773 struct eattr *a;
d15b0b0a 1774 if (p->cf->interpret_communities &&
c73b5d2d 1775 (a = bgp_find_attr(e->attrs->eattrs, BA_COMMUNITY)))
d15b0b0a 1776 {
c73b5d2d 1777 const struct adata *d = a->u.ptr;
d15b0b0a
OZ
1778
1779 /* Do not export anywhere */
1780 if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
1781 return -1;
1782
1783 /* Do not export outside of AS (or member-AS) */
1784 if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
1785 return -1;
1786
1787 /* Do not export outside of AS (or confederation) */
1788 if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
1789 return -1;
5bd73431
OZ
1790
1791 /* Do not export LLGR_STALE routes to LLGR-ignorant peers */
1792 if (!p->conn->remote_caps->llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
1793 return -1;
d15b0b0a 1794 }
4847a894 1795
c73b5d2d
EB
1796 /* Do not export routes marked with OTC to upstream, RFC 9234 */
1797 if (bgp_channel_is_role_applicable(c))
1798 {
1799 a = bgp_find_attr(e->attrs->eattrs, BA_ONLY_TO_CUSTOMER);
1800 if (a && (p->cf->local_role==BGP_ROLE_CUSTOMER ||
1801 p->cf->local_role==BGP_ROLE_PEER ||
1802 p->cf->local_role==BGP_ROLE_RS_CLIENT))
1803 return -1;
1804 }
1805
d15b0b0a 1806 return 0;
4847a894
OZ
1807}
1808
d15b0b0a 1809static ea_list *
82f42ea0 1810bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
ef2c708d 1811{
5cff1d5f 1812 struct proto *SRC = e->src->proto;
d15b0b0a 1813 struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
1e37e35c 1814 struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
82f42ea0 1815 ea_list *attrs = attrs0;
48e842cc 1816 eattr *a;
4c553c5a 1817 const adata *ad;
48e842cc 1818
d15b0b0a 1819 /* ORIGIN attribute - mandatory, attach if missing */
82f42ea0 1820 if (! bgp_find_attr(attrs0, BA_ORIGIN))
d15b0b0a
OZ
1821 bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
1822
82f42ea0
OZ
1823 /* AS_PATH attribute - mandatory */
1824 a = bgp_find_attr(attrs0, BA_AS_PATH);
1825 ad = a ? a->u.ptr : &null_adata;
5509e17d
OZ
1826
1827 /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
1828 if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
1829 ad = as_path_strip_confed(pool, ad);
1830
d15b0b0a 1831 /* AS_PATH attribute - keep or prepend ASN */
532116e7 1832 if (p->is_internal || p->rs_client)
d15b0b0a
OZ
1833 {
1834 /* IBGP or route server -> just ensure there is one */
5509e17d
OZ
1835 if (!a)
1836 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
d15b0b0a
OZ
1837 }
1838 else if (p->is_interior)
1839 {
5509e17d
OZ
1840 /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
1841 ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
1842 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
d15b0b0a
OZ
1843 }
1844 else /* Regular EBGP (no RS, no confederation) */
1845 {
5509e17d
OZ
1846 /* Regular EBGP -> prepend ASN as regular sequence */
1847 ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
1848 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
d15b0b0a
OZ
1849
1850 /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
82f42ea0 1851 a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
9e44ace3 1852 if (a && !a->fresh && !p->cf->allow_med)
d15b0b0a
OZ
1853 bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
1854 }
1855
1856 /* NEXT_HOP attribute - delegated to AF-specific hook */
82f42ea0 1857 a = bgp_find_attr(attrs0, BA_NEXT_HOP);
d15b0b0a
OZ
1858 bgp_update_next_hop(&s, a, &attrs);
1859
1860 /* LOCAL_PREF attribute - required for IBGP, attach if missing */
82f42ea0 1861 if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
d15b0b0a
OZ
1862 bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1863
09ee846d
OZ
1864 /* AIGP attribute - accumulate local metric or originate new one */
1865 u64 metric;
1866 if (s.local_next_hop &&
1867 (bgp_total_aigp_metric_(e, &metric, &ad) ||
1868 (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad))))
1869 {
1870 ad = bgp_aigp_set_metric(pool, ad, metric);
1871 bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad);
1872 }
1873
d15b0b0a
OZ
1874 /* IBGP route reflection, RFC 4456 */
1875 if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
1876 {
1877 /* ORIGINATOR_ID attribute - attach if not already set */
82f42ea0 1878 if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
d15b0b0a
OZ
1879 bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
1880
1881 /* CLUSTER_LIST attribute - prepend cluster ID */
82f42ea0
OZ
1882 a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
1883 ad = a ? a->u.ptr : NULL;
1884
1885 /* Prepend src cluster ID */
d15b0b0a 1886 if (src->rr_cluster_id)
c259669f 1887 ad = int_set_prepend(pool, ad, src->rr_cluster_id);
d15b0b0a 1888
82f42ea0 1889 /* Prepend dst cluster ID if src and dst clusters are different */
d15b0b0a 1890 if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
c259669f 1891 ad = int_set_prepend(pool, ad, p->rr_cluster_id);
82f42ea0
OZ
1892
1893 /* Should be at least one prepended cluster ID */
1894 bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
d15b0b0a
OZ
1895 }
1896
1897 /* AS4_* transition attributes, RFC 6793 4.2.2 */
1898 if (! p->as4_session)
1899 {
1900 a = bgp_find_attr(attrs, BA_AS_PATH);
1901 if (a && as_path_contains_as4(a->u.ptr))
48e842cc 1902 {
d15b0b0a
OZ
1903 bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
1904 bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
4847a894
OZ
1905 }
1906
d15b0b0a
OZ
1907 a = bgp_find_attr(attrs, BA_AGGREGATOR);
1908 if (a && aggregator_contains_as4(a->u.ptr))
4847a894 1909 {
d15b0b0a
OZ
1910 bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
1911 bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
48e842cc 1912 }
d15b0b0a 1913 }
ef2c708d 1914
c73b5d2d
EB
1915 /* Mark routes for downstream with OTC, RFC 9234 */
1916 if (bgp_channel_is_role_applicable(c))
1917 {
1918 a = bgp_find_attr(attrs, BA_ONLY_TO_CUSTOMER);
1919 if (!a && (p->cf->local_role == BGP_ROLE_PROVIDER ||
1920 p->cf->local_role == BGP_ROLE_PEER ||
1921 p->cf->local_role == BGP_ROLE_RS_SERVER))
1922 bgp_set_attr_u32(&attrs, pool, BA_ONLY_TO_CUSTOMER, 0, p->public_as);
1923 }
1924
82f42ea0
OZ
1925 /*
1926 * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
1927 * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
1928 * should be checked in AF-specific hooks.
1929 */
1930
d15b0b0a
OZ
1931 /* Apply per-attribute export hooks for validatation and normalization */
1932 return bgp_export_attrs(&s, attrs);
ef2c708d
MM
1933}
1934
d15b0b0a 1935void
13c0be19 1936bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old)
6cb8f742 1937{
d15b0b0a
OZ
1938 struct bgp_proto *p = (void *) P;
1939 struct bgp_channel *c = (void *) C;
1940 struct bgp_bucket *buck;
1941 struct bgp_prefix *px;
1942 u32 path;
6cb8f742 1943
54430df9
OZ
1944 /* Ignore non-BGP channels */
1945 if (C->channel != &channel_bgp)
1946 return;
1947
d15b0b0a
OZ
1948 if (new)
1949 {
06ece326 1950 struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, tmp_linpool);
6cb8f742 1951
9dbb7eb6
OZ
1952 /* Error during attribute processing */
1953 if (!attrs)
1954 log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n->n.addr);
1955
d15b0b0a
OZ
1956 /* If attributes are invalid, we fail back to withdraw */
1957 buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
5cff1d5f 1958 path = new->src->global_id;
d15b0b0a 1959 }
ef2c708d 1960 else
d15b0b0a
OZ
1961 {
1962 buck = bgp_get_withdraw_bucket(c);
5cff1d5f 1963 path = old->src->global_id;
d15b0b0a
OZ
1964 }
1965
1966 px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
1967 add_tail(&buck->prefixes, &px->buck_node);
1968
1969 bgp_schedule_packet(p->conn, c, PKT_UPDATE);
ef2c708d
MM
1970}
1971
d15b0b0a 1972
b6bf284a
OZ
1973static inline u32
1974bgp_get_neighbor(rte *r)
1975{
ee7e2ffd 1976 eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
b6bf284a
OZ
1977 u32 as;
1978
5509e17d 1979 if (e && as_path_get_first_regular(e->u.ptr, &as))
b6bf284a 1980 return as;
5509e17d
OZ
1981
1982 /* If AS_PATH is not defined, we treat rte as locally originated */
5cff1d5f 1983 struct bgp_proto *p = (void *) r->src->proto;
5509e17d 1984 return p->cf->confederation ?: p->local_as;
b6bf284a
OZ
1985}
1986
5bd73431
OZ
1987static inline int
1988rte_stale(rte *r)
1989{
ddd89ba1
MM
1990 if (r->pflags & BGP_REF_STALE)
1991 return 1;
1992
1993 if (r->pflags & BGP_REF_NOT_STALE)
1994 return 0;
1995
1996 /* If staleness is unknown, compute and cache it */
1997 eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
1998 if (a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE))
5bd73431 1999 {
ddd89ba1
MM
2000 r->pflags |= BGP_REF_STALE;
2001 return 1;
2002 }
2003 else
2004 {
2005 r->pflags |= BGP_REF_NOT_STALE;
2006 return 0;
5bd73431 2007 }
5bd73431
OZ
2008}
2009
ef2c708d
MM
2010int
2011bgp_rte_better(rte *new, rte *old)
2012{
5cff1d5f
MM
2013 struct bgp_proto *new_bgp = (struct bgp_proto *) new->src->proto;
2014 struct bgp_proto *old_bgp = (struct bgp_proto *) old->src->proto;
56a2bed4
MM
2015 eattr *x, *y;
2016 u32 n, o;
ef2c708d 2017
be4cd99a 2018 /* Skip suppressed routes (see bgp_rte_recalculate()) */
ddd89ba1
MM
2019 n = new->pflags & BGP_REF_SUPPRESSED;
2020 o = old->pflags & BGP_REF_SUPPRESSED;
be4cd99a
OZ
2021 if (n > o)
2022 return 0;
2023 if (n < o)
2024 return 1;
2025
ac3ac49a 2026 /* RFC 4271 9.1.2.1. Route resolvability test */
7e95c05d
OZ
2027 n = rte_resolvable(new);
2028 o = rte_resolvable(old);
ac3ac49a
OZ
2029 if (n > o)
2030 return 1;
2031 if (n < o)
2032 return 0;
2033
5bd73431
OZ
2034 /* LLGR draft - depreference stale routes */
2035 n = rte_stale(new);
2036 o = rte_stale(old);
2037 if (n > o)
2038 return 0;
2039 if (n < o)
2040 return 1;
2041
2042 /* Start with local preferences */
ee7e2ffd
JMM
2043 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
2044 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
56a2bed4
MM
2045 n = x ? x->u.data : new_bgp->cf->default_local_pref;
2046 o = y ? y->u.data : old_bgp->cf->default_local_pref;
2047 if (n > o)
2048 return 1;
2049 if (n < o)
2050 return 0;
2051
09ee846d
OZ
2052 /* RFC 7311 4.1 - Apply AIGP metric */
2053 u64 n2 = bgp_total_aigp_metric(new);
2054 u64 o2 = bgp_total_aigp_metric(old);
2055 if (n2 < o2)
2056 return 1;
2057 if (n2 > o2)
2058 return 0;
2059
4847a894 2060 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
56a2bed4 2061 if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
d15b0b0a 2062 {
ee7e2ffd
JMM
2063 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2064 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
d15b0b0a
OZ
2065 n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
2066 o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
2067 if (n < o)
2068 return 1;
2069 if (n > o)
2070 return 0;
2071 }
ef2c708d 2072
4847a894 2073 /* RFC 4271 9.1.2.2. b) Use origins */
ee7e2ffd
JMM
2074 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2075 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
cea63664
MM
2076 n = x ? x->u.data : ORIGIN_INCOMPLETE;
2077 o = y ? y->u.data : ORIGIN_INCOMPLETE;
56a2bed4
MM
2078 if (n < o)
2079 return 1;
2080 if (n > o)
2081 return 0;
2082
4847a894 2083 /* RFC 4271 9.1.2.2. c) Compare MED's */
be4cd99a
OZ
2084 /* Proper RFC 4271 path selection cannot be interpreted as finding
2085 * the best path in some ordering. It is implemented partially in
2086 * bgp_rte_recalculate() when deterministic_med option is
2087 * active. Without that option, the behavior is just an
2088 * approximation, which in specific situations may lead to
2089 * persistent routing loops, because it is nondeterministic - it
2090 * depends on the order in which routes appeared. But it is also the
2091 * same behavior as used by default in Cisco routers, so it is
2092 * probably not a big issue.
73272f04
OZ
2093 */
2094 if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
2095 (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
d15b0b0a 2096 {
ee7e2ffd
JMM
2097 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
2098 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
d15b0b0a
OZ
2099 n = x ? x->u.data : new_bgp->cf->default_med;
2100 o = y ? y->u.data : old_bgp->cf->default_med;
2101 if (n < o)
2102 return 1;
2103 if (n > o)
2104 return 0;
2105 }
56a2bed4 2106
4847a894 2107 /* RFC 4271 9.1.2.2. d) Prefer external peers */
d15b0b0a 2108 if (new_bgp->is_interior > old_bgp->is_interior)
ef2c708d 2109 return 0;
d15b0b0a 2110 if (new_bgp->is_interior < old_bgp->is_interior)
ef2c708d 2111 return 1;
ef2c708d 2112
d1e146f2
OZ
2113 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
2114 n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
2115 o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
2116 if (n < o)
2117 return 1;
2118 if (n > o)
2119 return 0;
4847a894 2120
4847a894 2121 /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
d15b0b0a 2122 /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
ee7e2ffd
JMM
2123 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
2124 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
4847a894
OZ
2125 n = x ? x->u.data : new_bgp->remote_id;
2126 o = y ? y->u.data : old_bgp->remote_id;
3228c72c
OZ
2127
2128 /* RFC 5004 - prefer older routes */
2129 /* (if both are external and from different peer) */
2130 if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
2131 !new_bgp->is_internal && n != o)
2132 return 0;
2133
2134 /* rest of RFC 4271 9.1.2.2. f) */
4847a894
OZ
2135 if (n < o)
2136 return 1;
2137 if (n > o)
2138 return 0;
11cb6202 2139
3075824d 2140 /* RFC 4456 9. b) Compare cluster list lengths */
ee7e2ffd
JMM
2141 x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
2142 y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
3075824d
OZ
2143 n = x ? int_set_get_size(x->u.ptr) : 0;
2144 o = y ? int_set_get_size(y->u.ptr) : 0;
2145 if (n < o)
2146 return 1;
2147 if (n > o)
2148 return 0;
2149
4847a894 2150 /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
a22c3e59 2151 return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0;
4847a894
OZ
2152}
2153
be4cd99a 2154
8d9eef17
OZ
2155int
2156bgp_rte_mergable(rte *pri, rte *sec)
2157{
5cff1d5f
MM
2158 struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->src->proto;
2159 struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->src->proto;
8d9eef17
OZ
2160 eattr *x, *y;
2161 u32 p, s;
2162
2163 /* Skip suppressed routes (see bgp_rte_recalculate()) */
dabd7bcc 2164 if ((pri->pflags ^ sec->pflags) & BGP_REF_SUPPRESSED)
8d9eef17
OZ
2165 return 0;
2166
2167 /* RFC 4271 9.1.2.1. Route resolvability test */
f6a6a776 2168 if (rte_resolvable(pri) != rte_resolvable(sec))
8d9eef17
OZ
2169 return 0;
2170
dabd7bcc
MM
2171 /* LLGR draft - depreference stale routes */
2172 if (rte_stale(pri) != rte_stale(sec))
2173 return 0;
2174
8d9eef17 2175 /* Start with local preferences */
ee7e2ffd
JMM
2176 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
2177 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
8d9eef17
OZ
2178 p = x ? x->u.data : pri_bgp->cf->default_local_pref;
2179 s = y ? y->u.data : sec_bgp->cf->default_local_pref;
2180 if (p != s)
2181 return 0;
2182
2183 /* RFC 4271 9.1.2.2. a) Use AS path lengths */
2184 if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
d15b0b0a 2185 {
ee7e2ffd
JMM
2186 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2187 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
d15b0b0a
OZ
2188 p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
2189 s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
8d9eef17 2190
d15b0b0a
OZ
2191 if (p != s)
2192 return 0;
8d9eef17 2193
d15b0b0a
OZ
2194// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
2195// return 0;
2196 }
8d9eef17
OZ
2197
2198 /* RFC 4271 9.1.2.2. b) Use origins */
ee7e2ffd
JMM
2199 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2200 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
8d9eef17
OZ
2201 p = x ? x->u.data : ORIGIN_INCOMPLETE;
2202 s = y ? y->u.data : ORIGIN_INCOMPLETE;
2203 if (p != s)
2204 return 0;
2205
2206 /* RFC 4271 9.1.2.2. c) Compare MED's */
2207 if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
2208 (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
d15b0b0a 2209 {
ee7e2ffd
JMM
2210 x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
2211 y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
d15b0b0a
OZ
2212 p = x ? x->u.data : pri_bgp->cf->default_med;
2213 s = y ? y->u.data : sec_bgp->cf->default_med;
2214 if (p != s)
2215 return 0;
2216 }
8d9eef17
OZ
2217
2218 /* RFC 4271 9.1.2.2. d) Prefer external peers */
5509e17d 2219 if (pri_bgp->is_interior != sec_bgp->is_interior)
8d9eef17
OZ
2220 return 0;
2221
2222 /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
2223 p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
2224 s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
2225 if (p != s)
2226 return 0;
2227
2228 /* Remaining criteria are ignored */
2229
2230 return 1;
2231}
2232
2233
be4cd99a
OZ
2234static inline int
2235same_group(rte *r, u32 lpref, u32 lasn)
2236{
eb937358 2237 return (r->attrs->pref == lpref) && (bgp_get_neighbor(r) == lasn);
be4cd99a
OZ
2238}
2239
2240static inline int
2241use_deterministic_med(rte *r)
2242{
5cff1d5f 2243 struct proto *P = r->src->proto;
26822d8f 2244 return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
be4cd99a
OZ
2245}
2246
2247int
2248bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
2249{
2250 rte *r, *s;
2251 rte *key = new ? new : old;
eb937358 2252 u32 lpref = key->attrs->pref;
be4cd99a 2253 u32 lasn = bgp_get_neighbor(key);
ddd89ba1 2254 int old_suppressed = old ? !!(old->pflags & BGP_REF_SUPPRESSED) : 0;
be4cd99a
OZ
2255
2256 /*
2257 * Proper RFC 4271 path selection is a bit complicated, it cannot be
2258 * implemented just by rte_better(), because it is not a linear
2259 * ordering. But it can be splitted to two levels, where the lower
2260 * level chooses the best routes in each group of routes from the
2261 * same neighboring AS and higher level chooses the best route (with
2262 * a slightly different ordering) between the best-in-group routes.
2263 *
2264 * When deterministic_med is disabled, we just ignore this issue and
2265 * choose the best route by bgp_rte_better() alone. If enabled, the
2266 * lower level of the route selection is done here (for the group
2267 * to which the changed route belongs), all routes in group are
2268 * marked as suppressed, just chosen best-in-group is not.
2269 *
2270 * Global best route selection then implements higher level by
2271 * choosing between non-suppressed routes (as they are always
2272 * preferred over suppressed routes). Routes from BGP protocols
2273 * that do not set deterministic_med are just never suppressed. As
2274 * they do not participate in the lower level selection, it is OK
2275 * that this fn is not called for them.
2276 *
2277 * The idea is simple, the implementation is more problematic,
d15b0b0a 2278 * mostly because of optimizations in rte_recalculate() that
be4cd99a
OZ
2279 * avoids full recalculation in most cases.
2280 *
2281 * We can assume that at least one of new, old is non-NULL and both
2282 * are from the same protocol with enabled deterministic_med. We
2283 * group routes by both neighbor AS (lasn) and preference (lpref),
2284 * because bgp_rte_better() does not handle preference itself.
2285 */
2286
2287 /* If new and old are from different groups, we just process that
2288 as two independent events */
2289 if (new && old && !same_group(old, lpref, lasn))
d15b0b0a
OZ
2290 {
2291 int i1, i2;
2292 i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
2293 i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
2294 return i1 || i2;
2295 }
be4cd99a 2296
d15b0b0a 2297 /*
be4cd99a
OZ
2298 * We could find the best-in-group and then make some shortcuts like
2299 * in rte_recalculate, but as we would have to walk through all
2300 * net->routes just to find it, it is probably not worth. So we
f6a6a776 2301 * just have one simple fast case that use just the old route.
be4cd99a
OZ
2302 * We also set suppressed flag to avoid using it in bgp_rte_better().
2303 */
2304
2305 if (new)
ddd89ba1 2306 new->pflags |= BGP_REF_SUPPRESSED;
be4cd99a
OZ
2307
2308 if (old)
d15b0b0a 2309 {
ddd89ba1 2310 old->pflags |= BGP_REF_SUPPRESSED;
d15b0b0a 2311
f6a6a776
OZ
2312 /* The fast case - replace not best with worse (or remove not best) */
2313 if (old_suppressed && !(new && bgp_rte_better(new, old)))
d15b0b0a 2314 return 0;
d15b0b0a 2315 }
be4cd99a
OZ
2316
2317 /* The default case - find a new best-in-group route */
2318 r = new; /* new may not be in the list */
cf98be7b 2319 for (s=net->routes; rte_is_valid(s); s=s->next)
be4cd99a 2320 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
d15b0b0a 2321 {
ddd89ba1 2322 s->pflags |= BGP_REF_SUPPRESSED;
d15b0b0a
OZ
2323 if (!r || bgp_rte_better(s, r))
2324 r = s;
2325 }
be4cd99a
OZ
2326
2327 /* Simple case - the last route in group disappears */
2328 if (!r)
2329 return 0;
2330
f6a6a776
OZ
2331 /* Found if new is mergable with best-in-group */
2332 if (new && (new != r) && bgp_rte_mergable(r, new))
ddd89ba1 2333 new->pflags &= ~BGP_REF_SUPPRESSED;
f6a6a776
OZ
2334
2335 /* Found all existing routes mergable with best-in-group */
2336 for (s=net->routes; rte_is_valid(s); s=s->next)
2337 if (use_deterministic_med(s) && same_group(s, lpref, lasn))
2338 if ((s != r) && bgp_rte_mergable(r, s))
ddd89ba1 2339 s->pflags &= ~BGP_REF_SUPPRESSED;
f6a6a776 2340
be4cd99a 2341 /* Found best-in-group */
ddd89ba1 2342 r->pflags &= ~BGP_REF_SUPPRESSED;
be4cd99a
OZ
2343
2344 /*
2345 * There are generally two reasons why we have to force
2346 * recalculation (return 1): First, the new route may be wrongfully
2347 * chosen to be the best in the first case check in
2348 * rte_recalculate(), this may happen only if old_best is from the
2349 * same group. Second, another (different than new route)
2350 * best-in-group is chosen and that may be the proper best (although
2351 * rte_recalculate() without ignore that possibility).
2352 *
2353 * There are three possible cases according to whether the old route
f6a6a776
OZ
2354 * was the best in group (OBG, i.e. !old_suppressed) and whether the
2355 * new route is the best in group (NBG, tested by r == new). These
2356 * cases work even if old or new is NULL.
be4cd99a
OZ
2357 *
2358 * NBG -> new is a possible candidate for the best route, so we just
2359 * check for the first reason using same_group().
2360 *
2361 * !NBG && OBG -> Second reason applies, return 1
2362 *
2363 * !NBG && !OBG -> Best in group does not change, old != old_best,
2364 * rte_better(new, old_best) is false and therefore
2365 * the first reason does not apply, return 0
2366 */
2367
2368 if (r == new)
2369 return old_best && same_group(old_best, lpref, lasn);
2370 else
f6a6a776 2371 return !old_suppressed;
be4cd99a
OZ
2372}
2373
5bd73431
OZ
2374struct rte *
2375bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
2376{
2377 eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
4c553c5a 2378 const struct adata *ad = a ? a->u.ptr : NULL;
5bd73431
OZ
2379 uint flags = a ? a->flags : BAF_PARTIAL;
2380
2381 if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
2382 return NULL;
2383
2384 if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
2385 return r;
2386
2387 r = rte_cow_rta(r, pool);
2388 bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags,
2389 int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
ddd89ba1 2390 r->pflags |= BGP_REF_STALE;
5bd73431
OZ
2391
2392 return r;
2393}
2394
11cb6202 2395
d15b0b0a
OZ
2396/*
2397 * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
11cb6202 2398 */
11cb6202 2399static void
d15b0b0a 2400bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
11cb6202 2401{
d15b0b0a
OZ
2402 eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
2403 eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
2404 eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
2405 eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
11cb6202 2406
d15b0b0a
OZ
2407 /* First, unset AS4_* attributes */
2408 if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
2409 if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
c00d31be 2410
d15b0b0a
OZ
2411 /* Handle AGGREGATOR attribute */
2412 if (a2 && a4)
2413 {
2414 u32 a2_asn = get_u32(a2->u.ptr->data);
ef2c708d 2415
d15b0b0a
OZ
2416 /* If routes were aggregated by an old router, then AS4_PATH and
2417 AS4_AGGREGATOR are invalid. In that case we give up. */
2418 if (a2_asn != AS_TRANS)
2419 return;
f307842a 2420
d15b0b0a
OZ
2421 /* Use AS4_AGGREGATOR instead of AGGREGATOR */
2422 a2->u.ptr = a4->u.ptr;
2423 }
c00d31be 2424
d15b0b0a
OZ
2425 /* Handle AS_PATH attribute */
2426 if (p2 && p4)
2427 {
5509e17d 2428 /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
d15b0b0a
OZ
2429 int p2_len = as_path_getlen(p2->u.ptr);
2430 int p4_len = as_path_getlen(p4->u.ptr);
4847a894 2431
d15b0b0a
OZ
2432 /* AS_PATH is too short, give up */
2433 if (p2_len < p4_len)
2434 return;
c00d31be 2435
d15b0b0a 2436 /* Merge AS_PATH and AS4_PATH */
4c553c5a
MM
2437 struct adata *apc = as_path_cut(pool, p2->u.ptr, p2_len - p4_len);
2438 p2->u.ptr = as_path_merge(pool, apc, p4->u.ptr);
d15b0b0a 2439 }
c00d31be 2440}
10be74da
MM
2441
2442int
258be565 2443bgp_get_attr(const eattr *a, byte *buf, int buflen)
10be74da 2444{
ae80a2de 2445 uint i = EA_ID(a->id);
d15b0b0a 2446 const struct bgp_attr_desc *d;
6c4df703 2447 int len;
10be74da 2448
d15b0b0a
OZ
2449 if (bgp_attr_known(i))
2450 {
2451 d = &bgp_attr_table[i];
2452 len = bsprintf(buf, "%s", d->name);
2453 buf += len;
2454 if (d->format)
10be74da 2455 {
d15b0b0a
OZ
2456 *buf++ = ':';
2457 *buf++ = ' ';
2458 d->format(a, buf, buflen - len - 2);
2459 return GA_FULL;
10be74da 2460 }
d15b0b0a
OZ
2461 return GA_NAME;
2462 }
2463
d1a74339 2464 bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
10be74da
MM
2465 return GA_NAME;
2466}
ae8f5584 2467
5e88d730 2468void
13c0be19 2469bgp_get_route_info(rte *e, byte *buf)
5e88d730 2470{
13c0be19
JMM
2471 eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2472 eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
11cb6202 2473 u32 origas;
5e88d730 2474
eb937358 2475 buf += bsprintf(buf, " (%d", e->attrs->pref);
3ce17142 2476
ddd89ba1 2477 if (e->pflags & BGP_REF_SUPPRESSED)
3ce17142 2478 buf += bsprintf(buf, "-");
be4cd99a 2479
5bd73431
OZ
2480 if (rte_stale(e))
2481 buf += bsprintf(buf, "s");
2482
09ee846d
OZ
2483 u64 metric = bgp_total_aigp_metric(e);
2484 if (metric < BGP_AIGP_MAX)
2485 {
2486 buf += bsprintf(buf, "/%lu", metric);
2487 }
2488 else if (e->attrs->igp_metric)
d15b0b0a
OZ
2489 {
2490 if (!rte_resolvable(e))
2491 buf += bsprintf(buf, "/-");
2492 else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
2493 buf += bsprintf(buf, "/?");
2494 else
2495 buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
2496 }
d1e146f2
OZ
2497 buf += bsprintf(buf, ") [");
2498
52b9b2a1 2499 if (p && as_path_get_last(p->u.ptr, &origas))
11cb6202 2500 buf += bsprintf(buf, "AS%u", origas);
5e88d730
MM
2501 if (o)
2502 buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
2503 strcpy(buf, "]");
2504}