]> git.ipfire.org Git - thirdparty/bird.git/blame - nest/rt-table.c
Filter: Add more tests for append operator
[thirdparty/bird.git] / nest / rt-table.c
CommitLineData
62aa008a 1/*
58740ed4 2 * BIRD -- Routing Tables
62aa008a 3 *
50fe90ed 4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
62aa008a
MM
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
58740ed4
MM
9/**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
725270cb 16 * There are multiple routing tables (a primary one together with any
58740ed4
MM
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
58f7d004 19 * destination networks. For each network (represented by structure &net),
725270cb
MM
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
58740ed4
MM
22 * for routing), the order of the other ones is undetermined.
23 *
6d1ae197
OZ
24 * The &rte contains information about the route. There are net and src, which
25 * together forms a key identifying the route in a routing table. There is a
26 * pointer to a &rta structure (see the route attribute module for a precise
27 * explanation) holding the route attributes, which are primary data about the
28 * route. There are several technical fields used by routing table code (route
29 * id, REF_* flags), There is also the pflags field, holding protocol-specific
30 * flags. They are not used by routing table code, but by protocol-specific
31 * hooks. In contrast to route attributes, they are not primary data and their
32 * validity is also limited to the routing table.
1f2eb2ac
OZ
33 *
34 * There are several mechanisms that allow automatic update of routes in one
35 * routing table (dst) as a result of changes in another routing table (src).
36 * They handle issues of recursive next hop resolving, flowspec validation and
37 * RPKI validation.
38 *
39 * The first such mechanism is handling of recursive next hops. A route in the
40 * dst table has an indirect next hop address, which is resolved through a route
41 * in the src table (which may also be the same table) to get an immediate next
42 * hop. This is implemented using structure &hostcache attached to the src
43 * table, which contains &hostentry structures for each tracked next hop
44 * address. These structures are linked from recursive routes in dst tables,
45 * possibly multiple routes sharing one hostentry (as many routes may have the
46 * same indirect next hop). There is also a trie in the hostcache, which matches
47 * all prefixes that may influence resolving of tracked next hops.
48 *
49 * When a best route changes in the src table, the hostcache is notified using
50 * rt_notify_hostcache(), which immediately checks using the trie whether the
51 * change is relevant and if it is, then it schedules asynchronous hostcache
52 * recomputation. The recomputation is done by rt_update_hostcache() (called
53 * from rt_event() of src table), it walks through all hostentries and resolves
54 * them (by rt_update_hostentry()). It also updates the trie. If a change in
55 * hostentry resolution was found, then it schedules asynchronous nexthop
56 * recomputation of associated dst table. That is done by rt_next_hop_update()
57 * (called from rt_event() of dst table), it iterates over all routes in the dst
58 * table and re-examines their hostentries for changes. Note that in contrast to
59 * hostcache update, next hop update can be interrupted by main loop. These two
60 * full-table walks (over hostcache and dst table) are necessary due to absence
61 * of direct lookups (route -> affected nexthop, nexthop -> its route).
62 *
63 * The second mechanism is for flowspec validation, where validity of flowspec
64 * routes depends of resolving their network prefixes in IP routing tables. This
65 * is similar to the recursive next hop mechanism, but simpler as there are no
66 * intermediate hostcache and hostentries (because flows are less likely to
67 * share common net prefix than routes sharing a common next hop). In src table,
68 * there is a list of dst tables (list flowspec_links), this list is updated by
69 * flowpsec channels (by rt_flowspec_link() and rt_flowspec_unlink() during
70 * channel start/stop). Each dst table has its own trie of prefixes that may
71 * influence validation of flowspec routes in it (flowspec_trie).
72 *
73 * When a best route changes in the src table, rt_flowspec_notify() immediately
74 * checks all dst tables from the list using their tries to see whether the
75 * change is relevant for them. If it is, then an asynchronous re-validation of
76 * flowspec routes in the dst table is scheduled. That is also done by function
77 * rt_next_hop_update(), like nexthop recomputation above. It iterates over all
78 * flowspec routes and re-validates them. It also recalculates the trie.
79 *
80 * Note that in contrast to the hostcache update, here the trie is recalculated
81 * during the rt_next_hop_update(), which may be interleaved with IP route
82 * updates. The trie is flushed at the beginning of recalculation, which means
83 * that such updates may use partial trie to see if they are relevant. But it
84 * works anyway! Either affected flowspec was already re-validated and added to
85 * the trie, then IP route change would match the trie and trigger a next round
86 * of re-validation, or it was not yet re-validated and added to the trie, but
87 * will be re-validated later in this round anyway.
88 *
89 * The third mechanism is used for RPKI re-validation of IP routes and it is the
90 * simplest. It is just a list of subscribers in src table, who are notified
91 * when any change happened, but only after a settle time. Also, in RPKI case
92 * the dst is not a table, but a channel, who refeeds routes through a filter.
58740ed4
MM
93 */
94
6b9fa320 95#undef LOCAL_DEBUG
1a54b1c6 96
62aa008a
MM
97#include "nest/bird.h"
98#include "nest/route.h"
2326b001 99#include "nest/protocol.h"
730f2e2c 100#include "nest/iface.h"
333ddd4f 101#include "nest/mpls.h"
2326b001 102#include "lib/resource.h"
5996da6a 103#include "lib/event.h"
00b85905 104#include "lib/timer.h"
730f2e2c 105#include "lib/string.h"
0e02abfd 106#include "conf/conf.h"
529c4149 107#include "filter/filter.h"
4f082dfa 108#include "filter/data.h"
586c1800 109#include "lib/hash.h"
221135d6 110#include "lib/string.h"
10af3676 111#include "lib/alloca.h"
1f2eb2ac 112#include "lib/flowspec.h"
7d875e09 113
09ee846d
OZ
114#ifdef CONFIG_BGP
115#include "proto/bgp/bgp.h"
116#endif
117
acb60628
OZ
118pool *rt_table_pool;
119
2326b001 120static slab *rte_slab;
977b82fb 121linpool *rte_update_pool;
2326b001 122
863ecfc7 123list routing_tables;
5996da6a 124
cfe34a31
OZ
125static void rt_free_hostcache(rtable *tab);
126static void rt_notify_hostcache(rtable *tab, net *net);
127static void rt_update_hostcache(rtable *tab);
128static void rt_next_hop_update(rtable *tab);
f4a60a9b 129static inline void rt_prune_table(rtable *tab);
00b85905 130static inline void rt_schedule_notify(rtable *tab);
1f2eb2ac 131static void rt_flowspec_notify(rtable *tab, net *net);
a8a3d95b 132static void rt_kick_prune_timer(rtable *tab);
0c791f87 133
cfd46ee4 134
836a87b8
OZ
135static void
136net_init_with_trie(struct fib *f, void *N)
137{
138 rtable *tab = SKIP_BACK(rtable, fib, f);
139 net *n = N;
140
141 if (tab->trie)
142 trie_add_prefix(tab->trie, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen);
de6318f7
OZ
143
144 if (tab->trie_new)
145 trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen);
836a87b8
OZ
146}
147
04632fd7 148static inline void *
836a87b8
OZ
149net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0)
150{
151 TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px)
152 {
153 net_addr_ip6_sadr n = NET_ADDR_IP6_SADR(px.prefix, px.pxlen, n0->src_prefix, n0->src_pxlen);
154 net *best = NULL;
155 int best_pxlen = 0;
156
157 /* We need to do dst first matching. Since sadr addresses are hashed on dst
158 prefix only, find the hash table chain and go through it to find the
159 match with the longest matching src prefix. */
160 for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next)
161 {
162 net_addr_ip6_sadr *a = (void *) fn->addr;
163
164 if (net_equal_dst_ip6_sadr(&n, a) &&
165 net_in_net_src_ip6_sadr(&n, a) &&
166 (a->src_pxlen >= best_pxlen))
167 {
168 best = fib_node_to_user(&t->fib, fn);
169 best_pxlen = a->src_pxlen;
170 }
171 }
172
173 if (best)
174 return best;
175 }
176 TRIE_WALK_TO_ROOT_END;
177
178 return NULL;
179}
180
04632fd7 181
be17805c 182static inline void *
836a87b8 183net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0)
be17805c 184{
836a87b8
OZ
185 net_addr_ip6_sadr n;
186 net_copy_ip6_sadr(&n, n0);
be17805c
OZ
187
188 while (1)
189 {
190 net *best = NULL;
191 int best_pxlen = 0;
192
193 /* We need to do dst first matching. Since sadr addresses are hashed on dst
194 prefix only, find the hash table chain and go through it to find the
836a87b8
OZ
195 match with the longest matching src prefix. */
196 for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next)
be17805c
OZ
197 {
198 net_addr_ip6_sadr *a = (void *) fn->addr;
199
836a87b8
OZ
200 if (net_equal_dst_ip6_sadr(&n, a) &&
201 net_in_net_src_ip6_sadr(&n, a) &&
be17805c
OZ
202 (a->src_pxlen >= best_pxlen))
203 {
204 best = fib_node_to_user(&t->fib, fn);
205 best_pxlen = a->src_pxlen;
206 }
207 }
208
209 if (best)
210 return best;
211
836a87b8 212 if (!n.dst_pxlen)
be17805c
OZ
213 break;
214
836a87b8
OZ
215 n.dst_pxlen--;
216 ip6_clrbit(&n.dst_prefix, n.dst_pxlen);
be17805c
OZ
217 }
218
219 return NULL;
220}
221
836a87b8 222net *
286e2011 223net_route(rtable *tab, const net_addr *n)
0264ccf6 224{
286e2011 225 ASSERT(tab->addr_type == n->type);
9a91ea52 226 net_addr_union *nu = SKIP_BACK(net_addr_union, n, n);
0264ccf6 227
9a91ea52
MM
228#define TW(ipv, what) \
229 TRIE_WALK_TO_ROOT_IP##ipv(tab->trie, &(nu->ip##ipv), var) \
230 { what(ipv, var); } \
231 TRIE_WALK_TO_ROOT_END; return NULL;
836a87b8 232
9a91ea52
MM
233#define FW(ipv, what) do { \
234 net_addr_union nuc; net_copy(&nuc.n, n); \
235 while (1) { \
236 what(ipv, nuc.ip##ipv); if (!nuc.n.pxlen) return NULL; \
237 nuc.n.pxlen--; ip##ipv##_clrbit(&nuc.ip##ipv.prefix, nuc.ip##ipv.pxlen); \
238 } \
239} while(0); return NULL;
286e2011 240
9a91ea52
MM
241#define FVR_IP(ipv, var) \
242 net *r; if (r = net_find_valid(tab, (net_addr *) &var)) return r;
836a87b8 243
9a91ea52
MM
244#define FVR_VPN(ipv, var) \
245 net_addr_vpn##ipv _var0 = NET_ADDR_VPN##ipv(var.prefix, var.pxlen, nu->vpn##ipv.rd); FVR_IP(ipv, _var0);
286e2011 246
9a91ea52
MM
247 if (tab->trie)
248 switch (n->type) {
249 case NET_IP4: TW(4, FVR_IP);
250 case NET_VPN4: TW(4, FVR_VPN);
251 case NET_IP6: TW(6, FVR_IP);
252 case NET_VPN6: TW(6, FVR_VPN);
253
254 case NET_IP6_SADR:
255 return net_route_ip6_sadr_trie(tab, (net_addr_ip6_sadr *) n);
256 default:
257 return NULL;
258 }
259 else
260 switch (n->type) {
261 case NET_IP4: FW(4, FVR_IP);
262 case NET_VPN4: FW(4, FVR_VPN);
263 case NET_IP6: FW(6, FVR_IP);
264 case NET_VPN6: FW(6, FVR_VPN);
265
266 case NET_IP6_SADR:
267 return net_route_ip6_sadr_fib (tab, (net_addr_ip6_sadr *) n);
268 default:
269 return NULL;
270 }
836a87b8 271
bb094fb6
MM
272#undef TW
273#undef FW
274#undef FVR_IP
275#undef FVR_VPN
836a87b8
OZ
276}
277
0264ccf6 278
286e2011
OZ
279/**
280 * roa_check - check validity of route origination in a ROA table
281 * @tab: ROA table
282 * @n: network prefix to check
283 * @asn: AS number of network prefix
284 *
285 * Implements RFC 6483 route validation for the given network prefix. The
286 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
287 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
288 * a candidate ROA with matching ASN and maxlen field greater than or equal to
289 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
290 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
291 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
292 * must have type NET_IP4 or NET_IP6, respectively.
293 */
294int
0264ccf6
PT
295net_roa_check(rtable *tab, const net_addr *n, u32 asn)
296{
bb094fb6
MM
297 net_addr_union *nu = SKIP_BACK(net_addr_union, n, n);
298 int anything = 0;
299 struct fib_node *fn;
300
301#define TW(ipv) do { \
302 TRIE_WALK_TO_ROOT_IP##ipv(tab->trie, &(nu->ip##ipv), var) { \
303 net_addr_roa##ipv roa0 = NET_ADDR_ROA##ipv(var.prefix, var.pxlen, 0, 0); \
304 ROA_PARTIAL_CHECK(ipv); \
305 } TRIE_WALK_TO_ROOT_END; \
306 return anything ? ROA_INVALID : ROA_UNKNOWN; \
307} while (0)
308
309#define FW(ipv) do { \
310 net_addr_roa##ipv roa0 = NET_ADDR_ROA##ipv(nu->ip##ipv.prefix, nu->ip##ipv.pxlen, 0, 0);\
311 while (1) { \
312 ROA_PARTIAL_CHECK(ipv); \
313 if (roa0.pxlen == 0) break; \
314 roa0.pxlen--; ip##ipv##_clrbit(&roa0.prefix, roa0.pxlen); \
315 } \
316} while (0)
317
318#define ROA_PARTIAL_CHECK(ipv) do { \
319 for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next) \
320 { \
321 net_addr_roa##ipv *roa = (void *) fn->addr; \
322 net *r = fib_node_to_user(&tab->fib, fn); \
323 if (net_equal_prefix_roa##ipv(roa, &roa0) && rte_is_valid(r->routes)) \
324 { \
325 anything = 1; \
326 if (asn && (roa->asn == asn) && (roa->max_pxlen >= nu->ip##ipv.pxlen)) \
327 return ROA_VALID; \
328 } \
329 } \
330} while (0)
331
286e2011 332 if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
836a87b8 333 {
bb094fb6
MM
334 if (tab->trie) TW(4);
335 else FW(4);
836a87b8 336 }
286e2011 337 else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
836a87b8 338 {
bb094fb6
MM
339 if (tab->trie) TW(6);
340 else FW(6);
836a87b8 341 }
bb094fb6
MM
342
343 return anything ? ROA_INVALID : ROA_UNKNOWN;
344#undef ROA_PARTIAL_CHECK
345#undef TW
346#undef FW
d1e146f2 347}
2326b001 348
bc10975a
MM
349/**
350 * aspa_check - check validity of AS Path in an ASPA table
351 * @tab: ASPA table
352 * @path: AS Path to check
353 *
354 * Implements draft-ietf-sidrops-aspa-verification-16.
355 */
38195ac6 356enum aspa_result aspa_check(rtable *tab, const adata *path, bool force_upstream)
bc10975a
MM
357{
358 struct lp_state lps;
359 lp_save(tmp_linpool, &lps);
360
361 /* No support for confed paths */
362 if (as_path_contains_confed(path))
997d2f57 363 return ASPA_INVALID;
bc10975a 364
38195ac6 365 /* Check path length */
bc10975a 366 uint len = as_path_getlen(path);
38195ac6 367 if (len == 0)
997d2f57 368 return ASPA_INVALID;
38195ac6
MM
369
370 /* Normalize the AS Path: drop stuffings */
bc10975a
MM
371 u32 *asns = alloca(sizeof(u32) * len);
372 uint ppos = 0;
38195ac6 373 uint nsz = 0;
bc10975a
MM
374 while (as_path_walk(path, &ppos, &asns[nsz]))
375 if ((nsz == 0) || (asns[nsz] != asns[nsz-1]))
376 nsz++;
377
378 /* Find the provider blocks for every AS on the path
379 * and check allowed directions */
38195ac6 380 uint max_up = 0, min_up = 0, max_down = 0, min_down = 0;
bc10975a 381
38195ac6 382 for (uint ap=0; ap<nsz; ap++)
bc10975a
MM
383 {
384 net_addr_union nau = { .aspa = NET_ADDR_ASPA(asns[ap]), };
385 net *n = net_find(tab, &nau.n);
bc10975a 386
38195ac6 387 bool found = false, down = false, up = false;
bc10975a 388
38195ac6 389 for (rte *e = (n ? n->routes: NULL); e; e = e->next)
bc10975a
MM
390 {
391 if (!rte_is_valid(e))
392 continue;
393
394 eattr *ea = ea_find(e->attrs->eattrs, EA_ASPA_PROVIDERS);
395 if (!ea)
396 continue;
397
38195ac6
MM
398 /* Actually found some ASPA */
399 found = true;
400
bc10975a
MM
401 for (uint i=0; i * sizeof(u32) < ea->u.ptr->length; i++)
402 {
403 if ((ap > 0) && ((u32 *) ea->u.ptr->data)[i] == asns[ap-1])
38195ac6 404 up = true;
bc10975a 405 if ((ap + 1 < nsz) && ((u32 *) ea->u.ptr->data)[i] == asns[ap+1])
38195ac6 406 down = true;
bc10975a 407
38195ac6
MM
408 if (down && up)
409 /* Both peers found */
410 goto end_of_aspa;
bc10975a
MM
411 }
412 }
38195ac6 413end_of_aspa:;
bc10975a 414
38195ac6
MM
415 /* Fast path for the upstream check */
416 if (force_upstream)
417 {
418 if (!found)
419 /* Move min-upstream */
420 min_up = ap;
421 else if (ap && !up)
422 /* Exists but doesn't allow this upstream */
997d2f57 423 return ASPA_INVALID;
38195ac6 424 }
bc10975a 425
38195ac6
MM
426 /* Fast path for no ASPA here */
427 else if (!found)
428 {
429 /* Extend max-downstream (min-downstream is stopped by unknown) */
430 max_down = ap+1;
bc10975a 431
38195ac6
MM
432 /* Move min-upstream (can't include unknown) */
433 min_up = ap;
434 }
bc10975a 435
38195ac6
MM
436 /* ASPA exists and downstream may be extended */
437 else if (down)
438 {
439 /* Extending max-downstream always */
440 max_down = ap+1;
bc10975a 441
38195ac6
MM
442 /* Extending min-downstream unless unknown seen */
443 if (min_down == ap)
444 min_down = ap+1;
445
446 /* Downstream only */
447 if (!up)
448 min_up = max_up = ap;
449 }
450
451 /* No extension for downstream, force upstream only from now */
452 else
453 {
454 force_upstream = 1;
455
456 /* Not even upstream, move the ending here */
457 if (!up)
458 min_up = max_up = ap;
459 }
460 }
461
462 /* Is the path surely valid? */
463 if (min_up <= min_down)
464 return ASPA_VALID;
465
466 /* Is the path maybe valid? */
467 if (max_up <= max_down)
468 return ASPA_UNKNOWN;
469
470 /* Now there is surely a valley there. */
997d2f57 471 return ASPA_INVALID;
bc10975a
MM
472}
473
58740ed4
MM
474/**
475 * rte_find - find a route
476 * @net: network node
094d2bdb 477 * @src: route source
58740ed4
MM
478 *
479 * The rte_find() function returns a route for destination @net
094d2bdb 480 * which is from route source @src.
58740ed4 481 */
2326b001 482rte *
094d2bdb 483rte_find(net *net, struct rte_src *src)
2326b001
MM
484{
485 rte *e = net->routes;
486
5cff1d5f 487 while (e && e->src != src)
2326b001
MM
488 e = e->next;
489 return e;
490}
491
58740ed4
MM
492/**
493 * rte_get_temp - get a temporary &rte
3ce8c610 494 * @a: attributes to assign to the new route (a &rta; in case it's
2e9b2421 495 * un-cached, rte_update() will create a cached copy automatically)
6d1ae197 496 * @src: route source
58740ed4
MM
497 *
498 * Create a temporary &rte and bind it with the attributes @a.
58740ed4 499 */
2326b001 500rte *
5cff1d5f 501rte_get_temp(rta *a, struct rte_src *src)
2326b001
MM
502{
503 rte *e = sl_alloc(rte_slab);
504
505 e->attrs = a;
5ea39eaa 506 e->id = 0;
0cdbd397 507 e->flags = 0;
6d1ae197 508 e->pflags = 0;
5cff1d5f 509 rt_lock_source(e->src = src);
2326b001
MM
510 return e;
511}
512
e2dc2f30
MM
513rte *
514rte_do_cow(rte *r)
515{
516 rte *e = sl_alloc(rte_slab);
517
518 memcpy(e, r, sizeof(rte));
5cff1d5f
MM
519
520 rt_lock_source(e->src);
e2dc2f30
MM
521 e->attrs = rta_clone(r->attrs);
522 e->flags = 0;
523 return e;
524}
525
8d9eef17
OZ
526/**
527 * rte_cow_rta - get a private writable copy of &rte with writable &rta
528 * @r: a route entry to be copied
529 * @lp: a linpool from which to allocate &rta
530 *
531 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
532 * modification. There are three possibilities: First, both &rte and &rta are
533 * private copies, in that case they are returned unchanged. Second, &rte is
534 * private copy, but &rta is cached, in that case &rta is duplicated using
535 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
536 * both structures are duplicated by rte_do_cow() and rta_do_cow().
537 *
538 * Note that in the second case, cached &rta loses one reference, while private
539 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
540 * nexthops, ...) with it. To work properly, original shared &rta should have
541 * another reference during the life of created private copy.
542 *
543 * Result: a pointer to the new writable &rte with writable &rta.
544 */
545rte *
546rte_cow_rta(rte *r, linpool *lp)
547{
548 if (!rta_is_cached(r->attrs))
549 return r;
550
13c0be19 551 r = rte_cow(r);
8d9eef17 552 rta *a = rta_do_cow(r->attrs, lp);
13c0be19
MM
553 rta_free(r->attrs);
554 r->attrs = a;
555 return r;
8d9eef17
OZ
556}
557
2326b001
MM
558static int /* Actually better or at least as good as */
559rte_better(rte *new, rte *old)
560{
d9f330c5
MM
561 int (*better)(rte *, rte *);
562
cf98be7b 563 if (!rte_is_valid(old))
2326b001 564 return 1;
cf98be7b
OZ
565 if (!rte_is_valid(new))
566 return 0;
567
eb937358 568 if (new->attrs->pref > old->attrs->pref)
2326b001 569 return 1;
eb937358 570 if (new->attrs->pref < old->attrs->pref)
2326b001 571 return 0;
5cff1d5f 572 if (new->src->proto->proto != old->src->proto->proto)
4c1b4e1a
MM
573 {
574 /*
575 * If the user has configured protocol preferences, so that two different protocols
576 * have the same preference, try to break the tie by comparing addresses. Not too
577 * useful, but keeps the ordering of routes unambiguous.
578 */
5cff1d5f 579 return new->src->proto->proto > old->src->proto->proto;
4c1b4e1a 580 }
5cff1d5f 581 if (better = new->src->proto->rte_better)
d9f330c5
MM
582 return better(new, old);
583 return 0;
2326b001
MM
584}
585
8d9eef17
OZ
586static int
587rte_mergable(rte *pri, rte *sec)
588{
589 int (*mergable)(rte *, rte *);
590
591 if (!rte_is_valid(pri) || !rte_is_valid(sec))
592 return 0;
593
eb937358 594 if (pri->attrs->pref != sec->attrs->pref)
8d9eef17
OZ
595 return 0;
596
5cff1d5f 597 if (pri->src->proto->proto != sec->src->proto->proto)
8d9eef17
OZ
598 return 0;
599
5cff1d5f 600 if (mergable = pri->src->proto->rte_mergable)
8d9eef17
OZ
601 return mergable(pri, sec);
602
603 return 0;
604}
605
cfd46ee4 606static void
61dae32b 607rte_trace(struct channel *c, rte *e, int dir, char *msg)
cfd46ee4 608{
21213be5 609 log(L_TRACE "%s.%s %c %s %N %luL %uG %s",
60880b53 610 c->proto->name, c->name ?: "?", dir, msg, e->net->n.addr, e->src->private_id, e->src->global_id,
61dae32b 611 rta_dest_name(e->attrs->dest));
cfd46ee4
MM
612}
613
614static inline void
61dae32b 615rte_trace_in(uint flag, struct channel *c, rte *e, char *msg)
cfd46ee4 616{
61dae32b
OZ
617 if ((c->debug & flag) || (c->proto->debug & flag))
618 rte_trace(c, e, '>', msg);
cfd46ee4
MM
619}
620
621static inline void
61dae32b 622rte_trace_out(uint flag, struct channel *c, rte *e, char *msg)
cfd46ee4 623{
61dae32b
OZ
624 if ((c->debug & flag) || (c->proto->debug & flag))
625 rte_trace(c, e, '<', msg);
cfd46ee4
MM
626}
627
00a09f3c 628static rte *
13c0be19 629export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent)
529c4149 630{
f4a60a9b 631 struct proto *p = c->proto;
0b39b1cb 632 const struct filter *filter = c->out_filter;
f4a60a9b 633 struct proto_stats *stats = &c->stats;
00a09f3c
OZ
634 rte *rt;
635 int v;
c0adf7e9 636
00a09f3c
OZ
637 rt = rt0;
638 *rt_free = NULL;
7de45ba4 639
d429bc5c 640 v = p->preexport ? p->preexport(c, rt) : 0;
00a09f3c
OZ
641 if (v < 0)
642 {
643 if (silent)
644 goto reject;
11361a10 645
00a09f3c 646 stats->exp_updates_rejected++;
36da2857 647 if (v == RIC_REJECT)
61dae32b 648 rte_trace_out(D_FILTERS, c, rt, "rejected by protocol");
00a09f3c
OZ
649 goto reject;
650 }
651 if (v > 0)
e2dc2f30 652 {
00a09f3c 653 if (!silent)
61dae32b 654 rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol");
00a09f3c 655 goto accept;
e2dc2f30 656 }
925fe2d3 657
00a09f3c 658 v = filter && ((filter == FILTER_REJECT) ||
13c0be19
MM
659 (f_run(filter, &rt, pool,
660 (silent ? FF_SILENT : 0)) > F_ACCEPT));
00a09f3c
OZ
661 if (v)
662 {
663 if (silent)
664 goto reject;
665
666 stats->exp_updates_filtered++;
61dae32b 667 rte_trace_out(D_FILTERS, c, rt, "filtered out");
00a09f3c 668 goto reject;
e2dc2f30 669 }
925fe2d3 670
00a09f3c
OZ
671 accept:
672 if (rt != rt0)
673 *rt_free = rt;
674 return rt;
675
676 reject:
677 /* Discard temporary rte */
678 if (rt != rt0)
679 rte_free(rt);
680 return NULL;
681}
682
a290da25 683static inline rte *
13c0be19 684export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent)
a290da25 685{
13c0be19 686 return export_filter_(c, rt0, rt_free, rte_update_pool, silent);
a290da25
PT
687}
688
00a09f3c 689static void
13c0be19 690do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed)
00a09f3c 691{
f4a60a9b
OZ
692 struct proto *p = c->proto;
693 struct proto_stats *stats = &c->stats;
925fe2d3 694
5ea39eaa
OZ
695 if (refeed && new)
696 c->refeed_count++;
925fe2d3 697
5ea39eaa 698 /* Apply export limit */
f4a60a9b 699 struct channel_limit *l = &c->out_limit;
5ea39eaa
OZ
700 if (l->action && !old && new)
701 {
702 if (stats->exp_routes >= l->limit)
703 channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
abced4a9 704
5ea39eaa
OZ
705 if (l->state == PLS_BLOCKED)
706 {
707 stats->exp_updates_rejected++;
61dae32b 708 rte_trace_out(D_FILTERS, c, new, "rejected [limit]");
5ea39eaa 709 return;
d9b77cc2 710 }
5ea39eaa 711 }
d9b77cc2 712
5ea39eaa 713 /* Apply export table */
4d48ede5
MM
714 if (c->out_table && !rte_update_out(c, net->n.addr, new, old, refeed))
715 return;
ab758e4f 716
925fe2d3 717 if (new)
9db74169 718 stats->exp_updates_accepted++;
925fe2d3 719 else
9db74169 720 stats->exp_withdraws_accepted++;
925fe2d3 721
5ea39eaa
OZ
722 if (old)
723 {
724 bmap_clear(&c->export_map, old->id);
725 stats->exp_routes--;
726 }
727
925fe2d3 728 if (new)
5ea39eaa
OZ
729 {
730 bmap_set(&c->export_map, new->id);
9db74169 731 stats->exp_routes++;
5ea39eaa 732 }
925fe2d3 733
cfd46ee4 734 if (p->debug & D_ROUTES)
5ea39eaa
OZ
735 {
736 if (new && old)
61dae32b 737 rte_trace_out(D_ROUTES, c, new, "replaced");
5ea39eaa 738 else if (new)
61dae32b 739 rte_trace_out(D_ROUTES, c, new, "added");
5ea39eaa 740 else if (old)
61dae32b 741 rte_trace_out(D_ROUTES, c, old, "removed");
5ea39eaa
OZ
742 }
743
13c0be19 744 p->rt_notify(p, c, net, new, old);
00a09f3c
OZ
745}
746
00a09f3c 747static void
5ea39eaa 748rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed)
00a09f3c 749{
5ea39eaa 750 // struct proto *p = c->proto;
00a09f3c 751 rte *new_free = NULL;
00a09f3c
OZ
752
753 if (new)
f4a60a9b 754 c->stats.exp_updates_received++;
00a09f3c 755 else
f4a60a9b 756 c->stats.exp_withdraws_received++;
00a09f3c 757
00a09f3c 758 if (new)
13c0be19 759 new = export_filter(c, new, &new_free, 0);
00a09f3c 760
5ea39eaa
OZ
761 if (old && !bmap_test(&c->export_map, old->id))
762 old = NULL;
00a09f3c 763
00a09f3c
OZ
764 if (!new && !old)
765 return;
766
13c0be19 767 do_rt_notify(c, net, new, old, refeed);
00a09f3c 768
5ea39eaa 769 /* Discard temporary rte */
00a09f3c
OZ
770 if (new_free)
771 rte_free(new_free);
00a09f3c
OZ
772}
773
774static void
5ea39eaa 775rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed)
00a09f3c 776{
5ea39eaa 777 // struct proto *p = c->proto;
00a09f3c
OZ
778 rte *new_best = NULL;
779 rte *old_best = NULL;
780 rte *new_free = NULL;
5ea39eaa 781 int new_first = 0;
00a09f3c 782
5ea39eaa
OZ
783 /*
784 * We assume that there are no changes in net route order except (added)
785 * new_changed and (removed) old_changed. Therefore, the function is not
786 * compatible with deterministic_med (where nontrivial reordering can happen
787 * as a result of a route change) and with recomputation of recursive routes
788 * due to next hop update (where many routes can be changed in one step).
789 *
790 * Note that we need this assumption just for optimizations, we could just
791 * run full new_best recomputation otherwise.
792 *
793 * There are three cases:
794 * feed or old_best is old_changed -> we need to recompute new_best
795 * old_best is before new_changed -> new_best is old_best, ignore
796 * old_best is after new_changed -> try new_changed, otherwise old_best
797 */
00a09f3c 798
5ea39eaa 799 if (net->routes)
f4a60a9b 800 c->stats.exp_updates_received++;
00a09f3c 801 else
f4a60a9b 802 c->stats.exp_withdraws_received++;
00a09f3c 803
5ea39eaa
OZ
804 /* Find old_best - either old_changed, or route for net->routes */
805 if (old_changed && bmap_test(&c->export_map, old_changed->id))
806 old_best = old_changed;
807 else
808 {
809 for (rte *r = net->routes; rte_is_valid(r); r = r->next)
00a09f3c 810 {
5ea39eaa
OZ
811 if (bmap_test(&c->export_map, r->id))
812 {
813 old_best = r;
00a09f3c 814 break;
5ea39eaa 815 }
00a09f3c 816
5ea39eaa
OZ
817 /* Note if new_changed found before old_best */
818 if (r == new_changed)
819 new_first = 1;
092c4930 820 }
5ea39eaa 821 }
092c4930 822
5ea39eaa
OZ
823 /* Find new_best */
824 if ((new_changed == old_changed) || (old_best == old_changed))
825 {
826 /* Feed or old_best changed -> find first accepted by filters */
827 for (rte *r = net->routes; rte_is_valid(r); r = r->next)
828 if (new_best = export_filter(c, r, &new_free, 0))
829 break;
830 }
831 else
832 {
833 /* Other cases -> either new_changed, or old_best (and nothing changed) */
834 if (new_first && (new_changed = export_filter(c, new_changed, &new_free, 0)))
835 new_best = new_changed;
836 else
00a09f3c 837 return;
5ea39eaa 838 }
00a09f3c 839
5ea39eaa
OZ
840 if (!new_best && !old_best)
841 return;
00a09f3c 842
5ea39eaa 843 do_rt_notify(c, net, new_best, old_best, refeed);
00a09f3c 844
5ea39eaa 845 /* Discard temporary rte */
00a09f3c
OZ
846 if (new_free)
847 rte_free(new_free);
529c4149
MM
848}
849
8d9eef17 850
4e276a89
MM
851static struct nexthop *
852nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
8d9eef17 853{
4e276a89 854 return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
8d9eef17
OZ
855}
856
857rte *
13c0be19 858rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent)
8d9eef17 859{
f4a60a9b 860 // struct proto *p = c->proto;
4e276a89 861 struct nexthop *nhs = NULL;
8d9eef17
OZ
862 rte *best0, *best, *rt0, *rt, *tmp;
863
864 best0 = net->routes;
865 *rt_free = NULL;
866
867 if (!rte_is_valid(best0))
868 return NULL;
869
13c0be19 870 best = export_filter_(c, best0, rt_free, pool, silent);
8d9eef17
OZ
871
872 if (!best || !rte_is_reachable(best))
873 return best;
874
875 for (rt0 = best0->next; rt0; rt0 = rt0->next)
876 {
877 if (!rte_mergable(best0, rt0))
878 continue;
879
13c0be19 880 rt = export_filter_(c, rt0, &tmp, pool, 1);
8d9eef17
OZ
881
882 if (!rt)
883 continue;
884
885 if (rte_is_reachable(rt))
4e276a89 886 nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
8d9eef17
OZ
887
888 if (tmp)
889 rte_free(tmp);
890 }
891
892 if (nhs)
893 {
4e276a89 894 nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
8d9eef17
OZ
895
896 if (nhs->next)
897 {
a290da25 898 best = rte_cow_rta(best, pool);
4e276a89 899 nexthop_link(best->attrs, nhs);
8d9eef17
OZ
900 }
901 }
902
903 if (best != best0)
904 *rt_free = best;
905
906 return best;
907}
908
8d9eef17 909static void
f4a60a9b 910rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
5ea39eaa 911 rte *new_best, rte *old_best, int refeed)
8d9eef17 912{
f4a60a9b 913 // struct proto *p = c->proto;
5ea39eaa 914 rte *new_free = NULL;
8d9eef17
OZ
915
916 /* We assume that all rte arguments are either NULL or rte_is_valid() */
917
918 /* This check should be done by the caller */
919 if (!new_best && !old_best)
920 return;
921
922 /* Check whether the change is relevant to the merged route */
5ea39eaa
OZ
923 if ((new_best == old_best) &&
924 (new_changed != old_changed) &&
925 !rte_mergable(new_best, new_changed) &&
926 !rte_mergable(old_best, old_changed))
927 return;
8d9eef17
OZ
928
929 if (new_best)
f4a60a9b 930 c->stats.exp_updates_received++;
8d9eef17 931 else
f4a60a9b 932 c->stats.exp_withdraws_received++;
8d9eef17
OZ
933
934 /* Prepare new merged route */
935 if (new_best)
5ea39eaa
OZ
936 new_best = rt_export_merged(c, net, &new_free, rte_update_pool, 0);
937
938 /* Check old merged route */
939 if (old_best && !bmap_test(&c->export_map, old_best->id))
940 old_best = NULL;
8d9eef17 941
5ea39eaa
OZ
942 if (!new_best && !old_best)
943 return;
8d9eef17 944
5ea39eaa 945 do_rt_notify(c, net, new_best, old_best, refeed);
8d9eef17 946
5ea39eaa
OZ
947 /* Discard temporary rte */
948 if (new_free)
949 rte_free(new_free);
8d9eef17
OZ
950}
951
952
9a8f20fc
MM
953/**
954 * rte_announce - announce a routing table change
955 * @tab: table the route has been added to
5ea39eaa 956 * @type: type of route announcement (RA_UNDEF or RA_ANY)
9a8f20fc 957 * @net: network in question
5ea39eaa
OZ
958 * @new: the new or changed route
959 * @old: the previous route replaced by the new one
8e433d6a
PT
960 * @new_best: the new best route for the same network
961 * @old_best: the previous best route for the same network
9a8f20fc 962 *
5ea39eaa
OZ
963 * This function gets a routing table update and announces it to all protocols
964 * that are connected to the same table by their channels.
9a8f20fc 965 *
5ea39eaa
OZ
966 * There are two ways of how routing table changes are announced. First, there
967 * is a change of just one route in @net (which may caused a change of the best
968 * route of the network). In this case @new and @old describes the changed route
969 * and @new_best and @old_best describes best routes. Other routes are not
970 * affected, but in sorted table the order of other routes might change.
23ac9e9a 971 *
5ea39eaa
OZ
972 * Second, There is a bulk change of multiple routes in @net, with shared best
973 * route selection. In such case separate route changes are described using
974 * @type of %RA_ANY, with @new and @old specifying the changed route, while
975 * @new_best and @old_best are NULL. After that, another notification is done
976 * where @new_best and @old_best are filled (may be the same), but @new and @old
977 * are NULL.
f98e2915 978 *
5ea39eaa
OZ
979 * The function announces the change to all associated channels. For each
980 * channel, an appropriate preprocessing is done according to channel &ra_mode.
981 * For example, %RA_OPTIMAL channels receive just changes of best routes.
982 *
983 * In general, we first call preexport() hook of a protocol, which performs
984 * basic checks on the route (each protocol has a right to veto or force accept
985 * of the route before any filter is asked). Then we consult an export filter
986 * of the channel and verify the old route in an export map of the channel.
987 * Finally, the rt_notify() hook of the protocol gets called.
988 *
989 * Note that there are also calls of rt_notify() hooks due to feed, but that is
990 * done outside of scope of rte_announce().
9a8f20fc 991 */
e2dc2f30 992static void
5ea39eaa
OZ
993rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old,
994 rte *new_best, rte *old_best)
2326b001 995{
8d9eef17
OZ
996 if (!rte_is_valid(new))
997 new = NULL;
998
cf98be7b 999 if (!rte_is_valid(old))
5ea39eaa 1000 old = NULL;
cf98be7b 1001
8d9eef17
OZ
1002 if (!rte_is_valid(new_best))
1003 new_best = NULL;
1004
1005 if (!rte_is_valid(old_best))
1006 old_best = NULL;
cf98be7b 1007
5ea39eaa 1008 if (!new && !old && !new_best && !old_best)
cf98be7b 1009 return;
2326b001 1010
5ea39eaa 1011 if (new_best != old_best)
e1c275d8 1012 {
5ea39eaa
OZ
1013 if (new_best)
1014 new_best->sender->stats.pref_routes++;
1015 if (old_best)
1016 old_best->sender->stats.pref_routes--;
e1c275d8
OZ
1017
1018 if (tab->hostcache)
1019 rt_notify_hostcache(tab, net);
1f2eb2ac
OZ
1020
1021 if (!EMPTY_LIST(tab->flowspec_links))
1022 rt_flowspec_notify(tab, net);
e1c275d8 1023 }
cfe34a31 1024
00b85905
OZ
1025 rt_schedule_notify(tab);
1026
f4a60a9b
OZ
1027 struct channel *c; node *n;
1028 WALK_LIST2(c, n, tab->channels, table_node)
5ea39eaa
OZ
1029 {
1030 if (c->export_state == ES_DOWN)
1031 continue;
1032
1033 if (type && (type != c->ra_mode))
1034 continue;
1035
1036 switch (c->ra_mode)
0a2e9d9f 1037 {
5ea39eaa
OZ
1038 case RA_OPTIMAL:
1039 if (new_best != old_best)
1040 rt_notify_basic(c, net, new_best, old_best, 0);
1041 break;
1042
1043 case RA_ANY:
1044 if (new != old)
1045 rt_notify_basic(c, net, new, old, 0);
1046 break;
f4a60a9b 1047
5ea39eaa 1048 case RA_ACCEPTED:
e80156d9
OZ
1049 /*
1050 * The (new != old) condition is problematic here, as it would break
1051 * the second usage pattern (announcement after bulk change, used in
1052 * rt_next_hop_update_net(), which sends both new and old as NULL).
1053 *
1054 * But recursive next hops do not work with sorted tables anyways,
1055 * such configuration is forbidden in BGP and not supported in
1056 * rt_notify_accepted().
1057 *
1058 * The condition is needed to eliminate spurious announcements where
1059 * both old and new routes are not valid (so they are NULL).
1060 */
1061 if (new != old)
1062 rt_notify_accepted(c, net, new, old, 0);
5ea39eaa
OZ
1063 break;
1064
1065 case RA_MERGED:
1066 rt_notify_merged(c, net, new, old, new_best, old_best, 0);
1067 break;
0a2e9d9f 1068 }
5ea39eaa 1069 }
2326b001
MM
1070}
1071
421838ff
MM
1072static inline int
1073rte_validate(rte *e)
1074{
1075 int c;
1076 net *n = e->net;
1077
fe9f1a6d
OZ
1078 if (!net_validate(n->n.addr))
1079 {
1080 log(L_WARN "Ignoring bogus prefix %N received via %s",
1081 n->n.addr, e->sender->proto->name);
1082 return 0;
1083 }
ff2857b0 1084
7fc55925
OZ
1085 /* FIXME: better handling different nettypes */
1086 c = !net_is_flow(n->n.addr) ?
1087 net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE);
ff2857b0 1088 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
fe9f1a6d
OZ
1089 {
1090 log(L_WARN "Ignoring bogus route %N received via %s",
1091 n->n.addr, e->sender->proto->name);
1092 return 0;
1093 }
ff2857b0 1094
4278abfe
OZ
1095 if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
1096 {
1f2eb2ac
OZ
1097 /* Exception for flowspec that failed validation */
1098 if (net_is_flow(n->n.addr) && (e->attrs->dest == RTD_UNREACHABLE))
1099 return 1;
1100
4278abfe
OZ
1101 log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
1102 n->n.addr, e->attrs->dest, e->sender->proto->name);
1103 return 0;
1104 }
1105
4e276a89 1106 if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
4278abfe
OZ
1107 {
1108 log(L_WARN "Ignoring unsorted multipath route %N received via %s",
1109 n->n.addr, e->sender->proto->name);
1110 return 0;
1111 }
84cac51a 1112
421838ff
MM
1113 return 1;
1114}
1115
4d48ede5
MM
1116/**
1117 * rte_free - delete a &rte
1118 * @e: &rte to be deleted
1119 *
1120 * rte_free() deletes the given &rte from the routing table it's linked to.
1121 */
1122void
1123rte_free(rte *e)
1124{
1125 rt_unlock_source(e->src);
1126 if (rta_is_cached(e->attrs))
1127 rta_free(e->attrs);
1128 sl_free(e);
1129}
1130
1131static inline void
1132rte_free_quick(rte *e)
1133{
1134 rt_unlock_source(e->src);
1135 rta_free(e->attrs);
1136 sl_free(e);
1137}
1138
977b82fb 1139int
67be5b23
MM
1140rte_same(rte *x, rte *y)
1141{
6d1ae197 1142 /* rte.flags / rte.pflags are not checked, as they are internal to rtable */
67be5b23
MM
1143 return
1144 x->attrs == y->attrs &&
5cff1d5f 1145 x->src == y->src &&
93af78d2 1146 rte_is_filtered(x) == rte_is_filtered(y);
67be5b23
MM
1147}
1148
70577529
OZ
1149static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
1150
e2dc2f30 1151static void
f4a60a9b 1152rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
2326b001 1153{
f4a60a9b
OZ
1154 struct proto *p = c->proto;
1155 struct rtable *table = c->table;
1156 struct proto_stats *stats = &c->stats;
1123e707 1157 static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
00a09f3c 1158 rte *before_old = NULL;
2326b001
MM
1159 rte *old_best = net->routes;
1160 rte *old = NULL;
00a09f3c 1161 rte **k;
2326b001
MM
1162
1163 k = &net->routes; /* Find and remove original route from the same protocol */
1164 while (old = *k)
1165 {
5cff1d5f 1166 if (old->src == src)
2326b001 1167 {
11787b84
OZ
1168 /* If there is the same route in the routing table but from
1169 * a different sender, then there are two paths from the
1170 * source protocol to this routing table through transparent
1171 * pipes, which is not allowed.
1172 *
1173 * We log that and ignore the route. If it is withdraw, we
1174 * ignore it completely (there might be 'spurious withdraws',
1175 * see FIXME in do_rte_announce())
1176 */
c0adf7e9 1177 if (old->sender->proto != p)
11787b84
OZ
1178 {
1179 if (new)
1180 {
fe9f1a6d
OZ
1181 log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
1182 net->n.addr, table->name);
11787b84
OZ
1183 rte_free_quick(new);
1184 }
1185 return;
1186 }
1187
0b761098 1188 if (new && rte_same(old, new))
67be5b23 1189 {
93af78d2
OZ
1190 /* No changes, ignore the new route and refresh the old one */
1191
1192 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
cf98be7b 1193
15550957 1194 if (!rte_is_filtered(new))
cf98be7b
OZ
1195 {
1196 stats->imp_updates_ignored++;
61dae32b 1197 rte_trace_in(D_ROUTES, c, new, "ignored");
cf98be7b
OZ
1198 }
1199
67be5b23 1200 rte_free_quick(new);
67be5b23
MM
1201 return;
1202 }
2326b001 1203 *k = old->next;
67d8665a 1204 table->rt_count--;
2326b001
MM
1205 break;
1206 }
1207 k = &old->next;
00a09f3c 1208 before_old = old;
2326b001
MM
1209 }
1210
c0e1f534
OZ
1211 /* Save the last accessed position */
1212 rte **pos = k;
1213
00a09f3c
OZ
1214 if (!old)
1215 before_old = NULL;
1216
925fe2d3
OZ
1217 if (!old && !new)
1218 {
9db74169 1219 stats->imp_withdraws_ignored++;
925fe2d3
OZ
1220 return;
1221 }
1222
b662290f
OZ
1223 int new_ok = rte_is_ok(new);
1224 int old_ok = rte_is_ok(old);
1225
f4a60a9b 1226 struct channel_limit *l = &c->rx_limit;
67d8665a 1227 if (l->action && !old && new && !c->in_table)
ebecb6f6 1228 {
15550957 1229 u32 all_routes = stats->imp_routes + stats->filt_routes;
cf98be7b
OZ
1230
1231 if (all_routes >= l->limit)
f4a60a9b 1232 channel_notify_limit(c, l, PLD_RX, all_routes);
7d0a31de
OZ
1233
1234 if (l->state == PLS_BLOCKED)
1235 {
b662290f
OZ
1236 /* In receive limit the situation is simple, old is NULL so
1237 we just free new and exit like nothing happened */
1238
7d0a31de 1239 stats->imp_updates_ignored++;
61dae32b 1240 rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
7d0a31de
OZ
1241 rte_free_quick(new);
1242 return;
1243 }
ebecb6f6
OZ
1244 }
1245
f4a60a9b
OZ
1246 l = &c->in_limit;
1247 if (l->action && !old_ok && new_ok)
b662290f
OZ
1248 {
1249 if (stats->imp_routes >= l->limit)
f4a60a9b 1250 channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
b662290f
OZ
1251
1252 if (l->state == PLS_BLOCKED)
1253 {
1254 /* In import limit the situation is more complicated. We
1255 shouldn't just drop the route, we should handle it like
1256 it was filtered. We also have to continue the route
1257 processing if old or new is non-NULL, but we should exit
1258 if both are NULL as this case is probably assumed to be
1259 already handled. */
1260
1261 stats->imp_updates_ignored++;
61dae32b 1262 rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
b662290f 1263
f4a60a9b 1264 if (c->in_keep_filtered)
b662290f
OZ
1265 new->flags |= REF_FILTERED;
1266 else
1267 { rte_free_quick(new); new = NULL; }
1268
1269 /* Note that old && !new could be possible when
f4a60a9b 1270 c->in_keep_filtered changed in the recent past. */
b662290f
OZ
1271
1272 if (!old && !new)
1273 return;
1274
1275 new_ok = 0;
1276 goto skip_stats1;
1277 }
1278 }
70577529
OZ
1279
1280 if (new_ok)
9db74169 1281 stats->imp_updates_accepted++;
70577529 1282 else if (old_ok)
9db74169 1283 stats->imp_withdraws_accepted++;
70577529
OZ
1284 else
1285 stats->imp_withdraws_ignored++;
925fe2d3 1286
00b85905
OZ
1287 if (old_ok || new_ok)
1288 table->last_rt_change = current_time();
1289
b662290f 1290 skip_stats1:
925fe2d3
OZ
1291
1292 if (new)
15550957 1293 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
925fe2d3 1294 if (old)
15550957 1295 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
925fe2d3 1296
26822d8f 1297 if (table->config->sorted)
2326b001 1298 {
26822d8f
OZ
1299 /* If routes are sorted, just insert new route to appropriate position */
1300 if (new)
1301 {
1302 if (before_old && !rte_better(new, before_old))
1303 k = &before_old->next;
1304 else
1305 k = &net->routes;
c0973621 1306
26822d8f
OZ
1307 for (; *k; k=&(*k)->next)
1308 if (rte_better(new, *k))
1309 break;
c0973621 1310
26822d8f
OZ
1311 new->next = *k;
1312 *k = new;
c0e1f534 1313
67d8665a 1314 table->rt_count++;
26822d8f 1315 }
2326b001 1316 }
26822d8f 1317 else
2326b001 1318 {
26822d8f
OZ
1319 /* If routes are not sorted, find the best route and move it on
1320 the first position. There are several optimized cases. */
1321
094d2bdb 1322 if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
26822d8f
OZ
1323 goto do_recalculate;
1324
1325 if (new && rte_better(new, old_best))
2326b001 1326 {
26822d8f
OZ
1327 /* The first case - the new route is cleary optimal,
1328 we link it at the first position */
1329
c0973621
OZ
1330 new->next = net->routes;
1331 net->routes = new;
c0e1f534 1332
67d8665a 1333 table->rt_count++;
c0973621 1334 }
26822d8f 1335 else if (old == old_best)
c0973621 1336 {
26822d8f
OZ
1337 /* The second case - the old best route disappeared, we add the
1338 new route (if we have any) to the list (we don't care about
1339 position) and then we elect the new optimal route and relink
1340 that route at the first position and announce it. New optimal
1341 route might be NULL if there is no more routes */
1342
1343 do_recalculate:
1344 /* Add the new route to the list */
1345 if (new)
2326b001 1346 {
c0e1f534
OZ
1347 new->next = *pos;
1348 *pos = new;
1349
67d8665a 1350 table->rt_count++;
26822d8f
OZ
1351 }
1352
1353 /* Find a new optimal route (if there is any) */
1354 if (net->routes)
1355 {
1356 rte **bp = &net->routes;
1357 for (k=&(*bp)->next; *k; k=&(*k)->next)
1358 if (rte_better(*k, *bp))
1359 bp = k;
1360
1361 /* And relink it */
1362 rte *best = *bp;
1363 *bp = best->next;
1364 best->next = net->routes;
1365 net->routes = best;
2326b001 1366 }
2326b001 1367 }
26822d8f
OZ
1368 else if (new)
1369 {
1370 /* The third case - the new route is not better than the old
1371 best route (therefore old_best != NULL) and the old best
1372 route was not removed (therefore old_best == net->routes).
c0e1f534
OZ
1373 We just link the new route to the old/last position. */
1374
1375 new->next = *pos;
1376 *pos = new;
26822d8f 1377
67d8665a 1378 table->rt_count++;
26822d8f
OZ
1379 }
1380 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
2326b001 1381 }
c0973621 1382
26822d8f 1383 if (new)
5ea39eaa
OZ
1384 {
1385 new->lastmod = current_time();
1386
1387 if (!old)
1388 {
1389 new->id = hmap_first_zero(&table->id_map);
1390 hmap_set(&table->id_map, new->id);
1391 }
1392 else
1393 new->id = old->id;
1394 }
26822d8f
OZ
1395
1396 /* Log the route change */
61dae32b 1397 if ((c->debug & D_ROUTES) || (p->debug & D_ROUTES))
e8b29bdc 1398 {
70577529 1399 if (new_ok)
61dae32b 1400 rte_trace(c, new, '>', new == net->routes ? "added [best]" : "added");
70577529
OZ
1401 else if (old_ok)
1402 {
1403 if (old != old_best)
61dae32b 1404 rte_trace(c, old, '>', "removed");
70577529 1405 else if (rte_is_ok(net->routes))
61dae32b 1406 rte_trace(c, old, '>', "removed [replaced]");
70577529 1407 else
61dae32b 1408 rte_trace(c, old, '>', "removed [sole]");
70577529 1409 }
c0973621
OZ
1410 }
1411
26822d8f 1412 /* Propagate the route change */
5ea39eaa 1413 rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best);
00a09f3c
OZ
1414
1415 if (!net->routes &&
a8a3d95b
OZ
1416 (table->gc_counter++ >= table->config->gc_threshold))
1417 rt_kick_prune_timer(table);
00a09f3c 1418
70577529
OZ
1419 if (old_ok && p->rte_remove)
1420 p->rte_remove(net, old);
1421 if (new_ok && p->rte_insert)
1422 p->rte_insert(net, new);
1423
2326b001 1424 if (old)
5ea39eaa
OZ
1425 {
1426 if (!new)
1427 hmap_clear(&table->id_map, old->id);
1428
1429 rte_free_quick(old);
1430 }
5b22683d
MM
1431}
1432
e2dc2f30
MM
1433static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
1434
1435static inline void
1436rte_update_lock(void)
1437{
1438 rte_update_nest_cnt++;
1439}
1440
1441static inline void
1442rte_update_unlock(void)
1443{
1444 if (!--rte_update_nest_cnt)
1445 lp_flush(rte_update_pool);
1446}
1447
58740ed4
MM
1448/**
1449 * rte_update - enter a new update to a routing table
1450 * @table: table to be updated
f4a60a9b 1451 * @c: channel doing the update
58740ed4
MM
1452 * @net: network node
1453 * @p: protocol submitting the update
f98e2915 1454 * @src: protocol originating the update
58740ed4
MM
1455 * @new: a &rte representing the new route or %NULL for route removal.
1456 *
1457 * This function is called by the routing protocols whenever they discover
1458 * a new route or wish to update/remove an existing route. The right announcement
2e9b2421 1459 * sequence is to build route attributes first (either un-cached with @aflags set
58740ed4
MM
1460 * to zero or a cached one using rta_lookup(); in this case please note that
1461 * you need to increase the use count of the attributes yourself by calling
1462 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1463 * the appropriate data and finally submit the new &rte by calling rte_update().
1464 *
f98e2915
OZ
1465 * @src specifies the protocol that originally created the route and the meaning
1466 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1467 * same value as @new->attrs->proto. @p specifies the protocol that called
1468 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1469 * stores @p in @new->sender;
1470 *
9a8f20fc
MM
1471 * When rte_update() gets any route, it automatically validates it (checks,
1472 * whether the network and next hop address are valid IP addresses and also
1473 * whether a normal routing protocol doesn't try to smuggle a host or link
1474 * scope route to the table), converts all protocol dependent attributes stored
1475 * in the &rte to temporary extended attributes, consults import filters of the
1476 * protocol to see if the route should be accepted and/or its attributes modified,
1477 * stores the temporary attributes back to the &rte.
1478 *
1479 * Now, having a "public" version of the route, we
f98e2915 1480 * automatically find any old route defined by the protocol @src
58740ed4
MM
1481 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1482 * recalculate the optimal route for this destination and finally broadcast
9a8f20fc 1483 * the change (if any) to all routing protocols by calling rte_announce().
3ce8c610
MM
1484 *
1485 * All memory used for attribute lists and other temporary allocations is taken
1486 * from a special linear pool @rte_update_pool and freed when rte_update()
1487 * finishes.
58740ed4 1488 */
23ac9e9a
OZ
1489
1490void
65d2a88d 1491rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
e2dc2f30 1492{
333ddd4f 1493 struct proto *p = c->proto;
f4a60a9b 1494 struct proto_stats *stats = &c->stats;
0b39b1cb 1495 const struct filter *filter = c->in_filter;
333ddd4f 1496 struct mpls_fec *fec = NULL;
2003a184 1497 net *nn;
e2dc2f30 1498
f4a60a9b
OZ
1499 ASSERT(c->channel_state == CS_UP);
1500
e2dc2f30
MM
1501 rte_update_lock();
1502 if (new)
1503 {
c65a9a05
MM
1504 /* Create a temporary table node */
1505 nn = alloca(sizeof(net) + n->length);
1506 memset(nn, 0, sizeof(net) + n->length);
1507 net_copy(nn->n.addr, n);
2003a184
MM
1508
1509 new->net = nn;
f4a60a9b
OZ
1510 new->sender = c;
1511
9db74169 1512 stats->imp_updates_received++;
cfd46ee4
MM
1513 if (!rte_validate(new))
1514 {
61dae32b 1515 rte_trace_in(D_FILTERS, c, new, "invalid");
9db74169 1516 stats->imp_updates_invalid++;
cfd46ee4
MM
1517 goto drop;
1518 }
cf98be7b 1519
40b65f94 1520 if (filter == FILTER_REJECT)
cfd46ee4 1521 {
9db74169 1522 stats->imp_updates_filtered++;
61dae32b 1523 rte_trace_in(D_FILTERS, c, new, "filtered out");
094d2bdb 1524
f4a60a9b 1525 if (! c->in_keep_filtered)
cf98be7b
OZ
1526 goto drop;
1527
1528 /* new is a private copy, i could modify it */
15550957 1529 new->flags |= REF_FILTERED;
cfd46ee4 1530 }
875cc073 1531 else if (filter)
e2dc2f30 1532 {
875cc073
OZ
1533 int fr = f_run(filter, &new, rte_update_pool, 0);
1534 if (fr > F_ACCEPT)
1535 {
1536 stats->imp_updates_filtered++;
61dae32b 1537 rte_trace_in(D_FILTERS, c, new, "filtered out");
cf98be7b 1538
875cc073 1539 if (! c->in_keep_filtered)
875cc073 1540 goto drop;
875cc073
OZ
1541
1542 new->flags |= REF_FILTERED;
1543 }
e2dc2f30 1544 }
333ddd4f
OZ
1545
1546 if (p->mpls_map)
9b775859
OZ
1547 {
1548 if (mpls_handle_rte(p->mpls_map, n, new, rte_update_pool, &fec) < 0)
1549 {
1550 rte_trace_in(D_FILTERS, c, new, "invalid");
1551 stats->imp_updates_invalid++;
1552 goto drop;
1553 }
1554 }
333ddd4f 1555
094d2bdb 1556 if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
e2dc2f30
MM
1557 new->attrs = rta_lookup(new->attrs);
1558 new->flags |= REF_COW;
c65a9a05
MM
1559
1560 /* Use the actual struct network, not the dummy one */
1561 nn = net_get(c->table, n);
1562 new->net = nn;
e2dc2f30 1563 }
925fe2d3 1564 else
094d2bdb
OZ
1565 {
1566 stats->imp_withdraws_received++;
1567
2003a184 1568 if (!(nn = net_find(c->table, n)) || !src)
094d2bdb
OZ
1569 {
1570 stats->imp_withdraws_ignored++;
1571 rte_update_unlock();
1572 return;
1573 }
1574 }
925fe2d3 1575
fad04c75 1576 recalc:
c65a9a05 1577 /* And recalculate the best route */
2003a184 1578 rte_recalculate(c, nn, new, src);
c65a9a05 1579
333ddd4f
OZ
1580 if (p->mpls_map)
1581 mpls_handle_rte_cleanup(p->mpls_map, &fec);
1582
e2dc2f30
MM
1583 rte_update_unlock();
1584 return;
1585
fad04c75 1586 drop:
e2dc2f30 1587 rte_free(new);
fad04c75 1588 new = NULL;
c65a9a05
MM
1589 if (nn = net_find(c->table, n))
1590 goto recalc;
1591
1592 rte_update_unlock();
e2dc2f30
MM
1593}
1594
cfe34a31
OZ
1595/* Independent call to rte_announce(), used from next hop
1596 recalculation, outside of rte_update(). new must be non-NULL */
a82f692e 1597static inline void
5ea39eaa 1598rte_announce_i(rtable *tab, uint type, net *net, rte *new, rte *old,
8d9eef17 1599 rte *new_best, rte *old_best)
cfe34a31 1600{
cfe34a31 1601 rte_update_lock();
5ea39eaa 1602 rte_announce(tab, type, net, new, old, new_best, old_best);
cfe34a31
OZ
1603 rte_update_unlock();
1604}
1605
3e236955
MM
1606static inline void
1607rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */
5b22683d 1608{
e2dc2f30 1609 rte_update_lock();
5cff1d5f 1610 rte_recalculate(old->sender, old->net, NULL, old->src);
e2dc2f30 1611 rte_update_unlock();
2326b001
MM
1612}
1613
5bd73431
OZ
1614/* Modify existing route by protocol hook, used for long-lived graceful restart */
1615static inline void
1616rte_modify(rte *old)
1617{
1618 rte_update_lock();
1619
1620 rte *new = old->sender->proto->rte_modify(old, rte_update_pool);
1621 if (new != old)
1622 {
1623 if (new)
1624 {
1625 if (!rta_is_cached(new->attrs))
1626 new->attrs = rta_lookup(new->attrs);
1627 new->flags = (old->flags & ~REF_MODIFY) | REF_COW;
1628 }
1629
5cff1d5f 1630 rte_recalculate(old->sender, old->net, new, old->src);
5bd73431
OZ
1631 }
1632
1633 rte_update_unlock();
1634}
1635
36da2857
OZ
1636/* Check rtable for best route to given net whether it would be exported do p */
1637int
beb5f78a 1638rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter)
36da2857 1639{
beb5f78a 1640 struct proto *p = c->proto;
fe9f1a6d 1641 net *n = net_find(t, a);
36da2857
OZ
1642 rte *rt = n ? n->routes : NULL;
1643
1644 if (!rte_is_valid(rt))
1645 return 0;
1646
1647 rte_update_lock();
1648
1649 /* Rest is stripped down export_filter() */
d429bc5c 1650 int v = p->preexport ? p->preexport(c, rt) : 0;
36da2857 1651 if (v == RIC_PROCESS)
13c0be19 1652 v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT);
36da2857 1653
95488885 1654 /* Discard temporary rte */
36da2857
OZ
1655 if (rt != n->routes)
1656 rte_free(rt);
1657
1658 rte_update_unlock();
1659
1660 return v > 0;
1661}
1662
6eda3f13
OZ
1663
1664/**
1665 * rt_refresh_begin - start a refresh cycle
1666 * @t: related routing table
f4a60a9b 1667 * @c related channel
6eda3f13
OZ
1668 *
1669 * This function starts a refresh cycle for given routing table and announce
1670 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1671 * routes to the routing table (by rte_update()). After that, all protocol
f4a60a9b 1672 * routes (more precisely routes with @c as @sender) not sent during the
6eda3f13
OZ
1673 * refresh cycle but still in the table from the past are pruned. This is
1674 * implemented by marking all related routes as stale by REF_STALE flag in
1675 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1676 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1677 */
0c791f87 1678void
f4a60a9b 1679rt_refresh_begin(rtable *t, struct channel *c)
0c791f87 1680{
3b48dc9b
MM
1681 if (c->debug & D_EVENTS)
1682 log(L_TRACE "%s.%s: Route refresh begin", c->proto->name, c->name);
1683
600998fc 1684 FIB_WALK(&t->fib, net, n)
0c791f87 1685 {
600998fc 1686 rte *e;
0c791f87 1687 for (e = n->routes; e; e = e->next)
f4a60a9b 1688 if (e->sender == c)
0c791f87
OZ
1689 e->flags |= REF_STALE;
1690 }
1691 FIB_WALK_END;
1692}
1693
6eda3f13
OZ
1694/**
1695 * rt_refresh_end - end a refresh cycle
1696 * @t: related routing table
f4a60a9b 1697 * @c: related channel
6eda3f13 1698 *
f4a60a9b 1699 * This function ends a refresh cycle for given routing table and announce
6eda3f13
OZ
1700 * hook. See rt_refresh_begin() for description of refresh cycles.
1701 */
0c791f87 1702void
f4a60a9b 1703rt_refresh_end(rtable *t, struct channel *c)
0c791f87 1704{
3b48dc9b
MM
1705 if (c->debug & D_EVENTS)
1706 log(L_TRACE "%s.%s: Route refresh end", c->proto->name, c->name);
1707
0c791f87 1708 int prune = 0;
0c791f87 1709
600998fc 1710 FIB_WALK(&t->fib, net, n)
0c791f87 1711 {
600998fc 1712 rte *e;
0c791f87 1713 for (e = n->routes; e; e = e->next)
f4a60a9b 1714 if ((e->sender == c) && (e->flags & REF_STALE))
0c791f87
OZ
1715 {
1716 e->flags |= REF_DISCARD;
1717 prune = 1;
1718 }
1719 }
1720 FIB_WALK_END;
1721
1722 if (prune)
1723 rt_schedule_prune(t);
1724}
1725
5bd73431
OZ
1726void
1727rt_modify_stale(rtable *t, struct channel *c)
1728{
1729 int prune = 0;
1730
1731 FIB_WALK(&t->fib, net, n)
1732 {
1733 rte *e;
1734 for (e = n->routes; e; e = e->next)
1735 if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED))
1736 {
1737 e->flags |= REF_MODIFY;
1738 prune = 1;
1739 }
1740 }
1741 FIB_WALK_END;
1742
1743 if (prune)
1744 rt_schedule_prune(t);
1745}
0c791f87 1746
58740ed4
MM
1747/**
1748 * rte_dump - dump a route
1749 * @e: &rte to be dumped
1750 *
1751 * This functions dumps contents of a &rte to debug output.
1752 */
2326b001 1753void
da8a2327 1754rte_dump(struct dump_request *dreq, rte *e)
2326b001 1755{
a0762910 1756 net *n = e->net;
da8a2327
MM
1757 RDUMP("%-1N ", n->n.addr);
1758 RDUMP("PF=%02x ", e->pflags);
1759 rta_dump(dreq, e->attrs);
1760 RDUMP("\n");
2326b001 1761}
62aa008a 1762
58740ed4
MM
1763/**
1764 * rt_dump - dump a routing table
1765 * @t: routing table to be dumped
1766 *
1767 * This function dumps contents of a given routing table to debug output.
1768 */
2326b001 1769void
da8a2327 1770rt_dump(struct dump_request *dreq, rtable *t)
2326b001 1771{
da8a2327 1772 RDUMP("Dump of routing table <%s>\n", t->name);
e440395d 1773#ifdef DEBUGGING
08e2d625 1774 fib_check(&t->fib);
e440395d 1775#endif
600998fc 1776 FIB_WALK(&t->fib, net, n)
08e2d625 1777 {
600998fc 1778 rte *e;
08e2d625 1779 for(e=n->routes; e; e=e->next)
da8a2327 1780 rte_dump(dreq, e);
0cdbd397 1781 }
08e2d625 1782 FIB_WALK_END;
da8a2327 1783 RDUMP("\n");
2326b001 1784}
62aa008a 1785
58740ed4
MM
1786/**
1787 * rt_dump_all - dump all routing tables
1788 *
1789 * This function dumps contents of all routing tables to debug output.
1790 */
6d45cf21 1791void
da8a2327 1792rt_dump_all(struct dump_request *dreq)
6d45cf21 1793{
0e02abfd 1794 rtable *t;
4635314c 1795 node *n;
0e02abfd 1796
4635314c 1797 WALK_LIST2(t, n, routing_tables, n)
da8a2327 1798 rt_dump(dreq, t);
6d45cf21
MM
1799}
1800
cfe34a31
OZ
1801static inline void
1802rt_schedule_hcu(rtable *tab)
1803{
1804 if (tab->hcu_scheduled)
1805 return;
1806
1807 tab->hcu_scheduled = 1;
1808 ev_schedule(tab->rt_event);
1809}
1810
1811static inline void
1812rt_schedule_nhu(rtable *tab)
1813{
93f50ca3 1814 if (tab->nhu_state == NHU_CLEAN)
cfe34a31
OZ
1815 ev_schedule(tab->rt_event);
1816
93f50ca3
MM
1817 /* state change:
1818 * NHU_CLEAN -> NHU_SCHEDULED
1819 * NHU_RUNNING -> NHU_DIRTY
1820 */
1821 tab->nhu_state |= NHU_SCHEDULED;
cfe34a31
OZ
1822}
1823
f4a60a9b
OZ
1824void
1825rt_schedule_prune(rtable *tab)
fb829de6 1826{
f4a60a9b
OZ
1827 if (tab->prune_state == 0)
1828 ev_schedule(tab->rt_event);
fb829de6 1829
f4a60a9b
OZ
1830 /* state change 0->1, 2->3 */
1831 tab->prune_state |= 1;
fb829de6
OZ
1832}
1833
f4a60a9b 1834
8f6accb5 1835static void
cfe34a31 1836rt_event(void *ptr)
5996da6a 1837{
cfe34a31
OZ
1838 rtable *tab = ptr;
1839
286e2011
OZ
1840 rt_lock_table(tab);
1841
cfe34a31
OZ
1842 if (tab->hcu_scheduled)
1843 rt_update_hostcache(tab);
0e02abfd 1844
cfe34a31
OZ
1845 if (tab->nhu_state)
1846 rt_next_hop_update(tab);
1847
0c791f87 1848 if (tab->prune_state)
f4a60a9b 1849 rt_prune_table(tab);
286e2011
OZ
1850
1851 rt_unlock_table(tab);
5996da6a
MM
1852}
1853
00b85905 1854
a8a3d95b
OZ
1855static void
1856rt_prune_timer(timer *t)
1857{
1858 rtable *tab = t->data;
1859
1860 if (tab->gc_counter >= tab->config->gc_threshold)
1861 rt_schedule_prune(tab);
1862}
1863
1864static void
1865rt_kick_prune_timer(rtable *tab)
1866{
1867 /* Return if prune is already scheduled */
1868 if (tm_active(tab->prune_timer) || (tab->prune_state & 1))
1869 return;
1870
1871 /* Randomize GC period to +/- 50% */
1872 btime gc_period = tab->config->gc_period;
1873 gc_period = (gc_period / 2) + (random_u32() % (uint) gc_period);
1874 tm_start(tab->prune_timer, gc_period);
1875}
1876
1877
00b85905
OZ
1878static inline btime
1879rt_settled_time(rtable *tab)
1880{
1881 ASSUME(tab->base_settle_time != 0);
1882
1883 return MIN(tab->last_rt_change + tab->config->min_settle_time,
1884 tab->base_settle_time + tab->config->max_settle_time);
1885}
1886
1887static void
1888rt_settle_timer(timer *t)
1889{
1890 rtable *tab = t->data;
1891
1892 if (!tab->base_settle_time)
1893 return;
1894
1895 btime settled_time = rt_settled_time(tab);
1896 if (current_time() < settled_time)
1897 {
1898 tm_set(tab->settle_timer, settled_time);
1899 return;
1900 }
1901
1902 /* Settled */
1903 tab->base_settle_time = 0;
1904
1905 struct rt_subscription *s;
1906 WALK_LIST(s, tab->subscribers)
1907 s->hook(s);
1908}
1909
1910static void
1911rt_kick_settle_timer(rtable *tab)
1912{
1913 tab->base_settle_time = current_time();
1914
1915 if (!tab->settle_timer)
ff397df7 1916 tab->settle_timer = tm_new_init(tab->rp, rt_settle_timer, tab, 0, 0);
00b85905
OZ
1917
1918 if (!tm_active(tab->settle_timer))
1919 tm_set(tab->settle_timer, rt_settled_time(tab));
1920}
1921
1922static inline void
1923rt_schedule_notify(rtable *tab)
1924{
1925 if (EMPTY_LIST(tab->subscribers))
1926 return;
1927
1928 if (tab->base_settle_time)
1929 return;
1930
1931 rt_kick_settle_timer(tab);
1932}
1933
1934void
1935rt_subscribe(rtable *tab, struct rt_subscription *s)
1936{
1937 s->tab = tab;
1938 rt_lock_table(tab);
1939 add_tail(&tab->subscribers, &s->n);
1940}
1941
1942void
1943rt_unsubscribe(struct rt_subscription *s)
1944{
1945 rem_node(&s->n);
1946 rt_unlock_table(s->tab);
1947}
1948
1f2eb2ac
OZ
1949static struct rt_flowspec_link *
1950rt_flowspec_find_link(rtable *src, rtable *dst)
1951{
1952 struct rt_flowspec_link *ln;
1953 WALK_LIST(ln, src->flowspec_links)
1954 if ((ln->src == src) && (ln->dst == dst))
1955 return ln;
1956
1957 return NULL;
1958}
1959
1960void
1961rt_flowspec_link(rtable *src, rtable *dst)
1962{
1963 ASSERT(rt_is_ip(src));
1964 ASSERT(rt_is_flow(dst));
1965
1966 struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst);
1967
1968 if (!ln)
1969 {
1970 rt_lock_table(src);
1971 rt_lock_table(dst);
1972
1973 ln = mb_allocz(src->rp, sizeof(struct rt_flowspec_link));
1974 ln->src = src;
1975 ln->dst = dst;
1976 add_tail(&src->flowspec_links, &ln->n);
1977 }
1978
1979 ln->uc++;
1980}
1981
1982void
1983rt_flowspec_unlink(rtable *src, rtable *dst)
1984{
1985 struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst);
1986
1987 ASSERT(ln && (ln->uc > 0));
1988
1989 ln->uc--;
1990
1991 if (!ln->uc)
1992 {
1993 rem_node(&ln->n);
1994 mb_free(ln);
1995
1996 rt_unlock_table(src);
1997 rt_unlock_table(dst);
1998 }
1999}
2000
2001static void
2002rt_flowspec_notify(rtable *src, net *net)
2003{
2004 /* Only IP tables are src links */
2005 ASSERT(rt_is_ip(src));
2006
2007 struct rt_flowspec_link *ln;
2008 WALK_LIST(ln, src->flowspec_links)
2009 {
2010 rtable *dst = ln->dst;
2011 ASSERT(rt_is_flow(dst));
2012
2013 /* No need to inspect it further if recalculation is already active */
2014 if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY))
2015 continue;
2016
2017 if (trie_match_net(dst->flowspec_trie, net->n.addr))
2018 rt_schedule_nhu(dst);
2019 }
2020}
2021
2022static void
2023rt_flowspec_reset_trie(rtable *tab)
2024{
2025 linpool *lp = tab->flowspec_trie->lp;
2026 int ipv4 = tab->flowspec_trie->ipv4;
2027
2028 lp_flush(lp);
2029 tab->flowspec_trie = f_new_trie(lp, 0);
2030 tab->flowspec_trie->ipv4 = ipv4;
2031}
2032
ff397df7
MM
2033static void
2034rt_free(resource *_r)
2035{
2036 rtable *r = (rtable *) _r;
2037
2038 DBG("Deleting routing table %s\n", r->name);
2039 ASSERT_DIE(r->use_count == 0);
2040
3d90241f
MM
2041 if (r->internal)
2042 return;
2043
ff397df7
MM
2044 r->config->table = NULL;
2045 rem_node(&r->n);
2046
2047 if (r->hostcache)
2048 rt_free_hostcache(r);
2049
2050 /* Freed automagically by the resource pool
2051 fib_free(&r->fib);
2052 hmap_free(&r->id_map);
2053 rfree(r->rt_event);
2054 rfree(r->settle_timer);
2055 mb_free(r);
2056 */
2057}
2058
2059static void
da8a2327 2060rt_res_dump(struct dump_request *dreq, resource *_r)
ff397df7
MM
2061{
2062 rtable *r = (rtable *) _r;
da8a2327 2063 RDUMP("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n",
ff397df7
MM
2064 r->name, net_label[r->addr_type], r->rt_count, r->use_count);
2065}
2066
2067static struct resclass rt_class = {
2068 .name = "Routing table",
2069 .size = sizeof(struct rtable),
2070 .free = rt_free,
2071 .dump = rt_res_dump,
2072 .lookup = NULL,
2073 .memsize = NULL,
2074};
2075
2076rtable *
2077rt_setup(pool *pp, struct rtable_config *cf)
b9626ec6 2078{
c53f547a 2079 pool *p = rp_newf(pp, "Routing table %s", cf->name);
ff397df7
MM
2080
2081 rtable *t = ralloc(p, &rt_class);
2082 t->rp = p;
2083
28b3b551 2084 t->name = cf->name;
b9626ec6 2085 t->config = cf;
28b3b551 2086 t->addr_type = cf->addr_type;
54ddf90f 2087 t->debug = cf->debug;
ff397df7 2088
fe9f1a6d 2089 fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
f4a60a9b 2090
836a87b8
OZ
2091 if (cf->trie_used)
2092 {
2093 t->trie = f_new_trie(lp_new_default(p), 0);
2094 t->trie->ipv4 = net_val_match(t->addr_type, NB_IP4 | NB_VPN4 | NB_ROA4);
2095
2096 t->fib.init = net_init_with_trie;
2097 }
2098
1f2eb2ac
OZ
2099 init_list(&t->channels);
2100 init_list(&t->flowspec_links);
2101 init_list(&t->subscribers);
2102
ef6ab5ce
OZ
2103 hmap_init(&t->id_map, p, 1024);
2104 hmap_set(&t->id_map, 0);
2105
3d90241f 2106 if (!(t->internal = cf->internal))
ff397df7 2107 {
ff397df7 2108 t->rt_event = ev_new_init(p, rt_event, t);
a8a3d95b 2109 t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0);
ff397df7 2110 t->last_rt_change = t->gc_time = current_time();
1f2eb2ac
OZ
2111
2112 if (rt_is_flow(t))
2113 {
2114 t->flowspec_trie = f_new_trie(lp_new_default(p), 0);
2115 t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4);
2116 }
ff397df7 2117 }
00b85905 2118
ff397df7 2119 return t;
b9626ec6
MM
2120}
2121
58740ed4
MM
2122/**
2123 * rt_init - initialize routing tables
2124 *
2125 * This function is called during BIRD startup. It initializes the
2126 * routing table module.
2127 */
2326b001
MM
2128void
2129rt_init(void)
2130{
2131 rta_init();
5996da6a 2132 rt_table_pool = rp_new(&root_pool, "Routing tables");
05d47bd5 2133 rte_update_pool = lp_new_default(rt_table_pool);
5996da6a 2134 rte_slab = sl_new(rt_table_pool, sizeof(rte));
0e02abfd 2135 init_list(&routing_tables);
2326b001 2136}
1a54b1c6 2137
fb829de6 2138
f4a60a9b
OZ
2139/**
2140 * rt_prune_table - prune a routing table
2141 *
2142 * The prune loop scans routing tables and removes routes belonging to flushing
2143 * protocols, discarded routes and also stale network entries. It is called from
2144 * rt_event(). The event is rescheduled if the current iteration do not finish
2145 * the table. The pruning is directed by the prune state (@prune_state),
2146 * specifying whether the prune cycle is scheduled or running, and there
2147 * is also a persistent pruning iterator (@prune_fit).
2148 *
2149 * The prune loop is used also for channel flushing. For this purpose, the
2150 * channels to flush are marked before the iteration and notified after the
2151 * iteration.
2152 */
2153static void
2154rt_prune_table(rtable *tab)
fb829de6
OZ
2155{
2156 struct fib_iterator *fit = &tab->prune_fit;
ba2a0760 2157 int limit = 2000;
f4a60a9b
OZ
2158
2159 struct channel *c;
2160 node *n, *x;
1a54b1c6
MM
2161
2162 DBG("Pruning route table %s\n", tab->name);
0521e4f6
MM
2163#ifdef DEBUGGING
2164 fib_check(&tab->fib);
2165#endif
fb829de6 2166
f4a60a9b
OZ
2167 if (tab->prune_state == 0)
2168 return;
fb829de6 2169
f4a60a9b
OZ
2170 if (tab->prune_state == 1)
2171 {
2172 /* Mark channels to flush */
2173 WALK_LIST2(c, n, tab->channels, table_node)
2174 if (c->channel_state == CS_FLUSHING)
2175 c->flush_active = 1;
2176
2177 FIB_ITERATE_INIT(fit, &tab->fib);
2178 tab->prune_state = 2;
de6318f7 2179
a8a3d95b
OZ
2180 tab->gc_counter = 0;
2181 tab->gc_time = current_time();
2182
de6318f7
OZ
2183 if (tab->prune_trie)
2184 {
2185 /* Init prefix trie pruning */
2186 tab->trie_new = f_new_trie(lp_new_default(tab->rp), 0);
2187 tab->trie_new->ipv4 = tab->trie->ipv4;
2188 }
f4a60a9b 2189 }
fb829de6 2190
08e2d625 2191again:
600998fc 2192 FIB_ITERATE_START(&tab->fib, fit, net, n)
1a54b1c6 2193 {
08e2d625 2194 rte *e;
fb829de6 2195
08e2d625 2196 rescan:
de6318f7
OZ
2197 if (limit <= 0)
2198 {
2199 FIB_ITERATE_PUT(fit);
2200 ev_schedule(tab->rt_event);
2201 return;
2202 }
2203
fb829de6 2204 for (e=n->routes; e; e=e->next)
5bd73431 2205 {
f4a60a9b 2206 if (e->sender->flush_active || (e->flags & REF_DISCARD))
08e2d625 2207 {
3e236955 2208 rte_discard(e);
f4a60a9b 2209 limit--;
fb829de6 2210
08e2d625
MM
2211 goto rescan;
2212 }
f4a60a9b 2213
5bd73431
OZ
2214 if (e->flags & REF_MODIFY)
2215 {
5bd73431
OZ
2216 rte_modify(e);
2217 limit--;
2218
2219 goto rescan;
2220 }
2221 }
2222
fb829de6 2223 if (!n->routes) /* Orphaned FIB entry */
1a54b1c6 2224 {
600998fc
OZ
2225 FIB_ITERATE_PUT(fit);
2226 fib_delete(&tab->fib, n);
08e2d625 2227 goto again;
1a54b1c6 2228 }
de6318f7
OZ
2229
2230 if (tab->trie_new)
2231 {
2232 trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen);
2233 limit--;
2234 }
1a54b1c6 2235 }
600998fc 2236 FIB_ITERATE_END;
fb829de6 2237
0521e4f6
MM
2238#ifdef DEBUGGING
2239 fib_check(&tab->fib);
2240#endif
fb829de6 2241
f4a60a9b
OZ
2242 /* state change 2->0, 3->1 */
2243 tab->prune_state &= 1;
0c791f87 2244
de6318f7
OZ
2245 if (tab->trie_new)
2246 {
2247 /* Finish prefix trie pruning */
5a89edc6
OZ
2248
2249 if (!tab->trie_lock_count)
2250 {
2251 rfree(tab->trie->lp);
2252 }
2253 else
2254 {
2255 ASSERT(!tab->trie_old);
2256 tab->trie_old = tab->trie;
2257 tab->trie_old_lock_count = tab->trie_lock_count;
2258 tab->trie_lock_count = 0;
2259 }
2260
de6318f7
OZ
2261 tab->trie = tab->trie_new;
2262 tab->trie_new = NULL;
2263 tab->prune_trie = 0;
2264 }
2265 else
2266 {
2267 /* Schedule prefix trie pruning */
5a89edc6 2268 if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries)))
de6318f7
OZ
2269 {
2270 /* state change 0->1, 2->3 */
2271 tab->prune_state |= 1;
2272 tab->prune_trie = 1;
2273 }
2274 }
2275
f4a60a9b
OZ
2276 if (tab->prune_state > 0)
2277 ev_schedule(tab->rt_event);
0e02abfd 2278
f4a60a9b
OZ
2279 /* FIXME: This should be handled in a better way */
2280 rt_prune_sources();
fb829de6 2281
f4a60a9b
OZ
2282 /* Close flushed channels */
2283 WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
2284 if (c->flush_active)
2285 {
2286 c->flush_active = 0;
286e2011 2287 channel_set_state(c, CS_DOWN);
f4a60a9b
OZ
2288 }
2289
2290 return;
0e02abfd
MM
2291}
2292
5a89edc6
OZ
2293/**
2294 * rt_lock_trie - lock a prefix trie of a routing table
2295 * @tab: routing table with prefix trie to be locked
2296 *
2297 * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state
2298 * structures. Therefore, asynchronous walks should lock the prefix trie using
2299 * this function. That allows the prune loop to rebuild the trie, but postpones
2300 * its freeing until all walks are done (unlocked by rt_unlock_trie()).
2301 *
2302 * Return a current trie that will be locked, the value should be passed back to
2303 * rt_unlock_trie() for unlocking.
2304 *
2305 */
2306struct f_trie *
2307rt_lock_trie(rtable *tab)
2308{
2309 ASSERT(tab->trie);
2310
2311 tab->trie_lock_count++;
2312 return tab->trie;
2313}
2314
2315/**
2316 * rt_unlock_trie - unlock a prefix trie of a routing table
2317 * @tab: routing table with prefix trie to be locked
2318 * @trie: value returned by matching rt_lock_trie()
2319 *
2320 * Done for trie locked by rt_lock_trie() after walk over the trie is done.
2321 * It may free the trie and schedule next trie pruning.
2322 */
2323void
2324rt_unlock_trie(rtable *tab, struct f_trie *trie)
2325{
2326 ASSERT(trie);
2327
2328 if (trie == tab->trie)
2329 {
2330 /* Unlock the current prefix trie */
2331 ASSERT(tab->trie_lock_count);
2332 tab->trie_lock_count--;
2333 }
2334 else if (trie == tab->trie_old)
2335 {
2336 /* Unlock the old prefix trie */
2337 ASSERT(tab->trie_old_lock_count);
2338 tab->trie_old_lock_count--;
2339
2340 /* Free old prefix trie that is no longer needed */
2341 if (!tab->trie_old_lock_count)
2342 {
2343 rfree(tab->trie_old->lp);
2344 tab->trie_old = NULL;
2345
2346 /* Kick prefix trie pruning that was postponed */
2347 if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries)))
2348 {
2349 tab->prune_trie = 1;
2350 rt_schedule_prune(tab);
2351 }
2352 }
2353 }
2354 else
2355 log(L_BUG "Invalid arg to rt_unlock_trie()");
2356}
2357
2358
cfe34a31
OZ
2359void
2360rt_preconfig(struct config *c)
2361{
cfe34a31 2362 init_list(&c->tables);
f4a60a9b 2363
51f2e7af
MM
2364 rt_new_table(cf_get_symbol(c, "master4"), NET_IP4);
2365 rt_new_table(cf_get_symbol(c, "master6"), NET_IP6);
cfe34a31
OZ
2366}
2367
a8a3d95b
OZ
2368void
2369rt_postconfig(struct config *c)
2370{
2371 uint num_tables = list_length(&c->tables);
2372 btime def_gc_period = 400 MS * num_tables;
2373 def_gc_period = MAX(def_gc_period, 10 S);
2374 def_gc_period = MIN(def_gc_period, 600 S);
2375
2376 struct rtable_config *rc;
2377 WALK_LIST(rc, c->tables)
2378 if (rc->gc_period == (uint) -1)
2379 rc->gc_period = (uint) def_gc_period;
2380}
2381
cfe34a31 2382
f4a60a9b 2383/*
cfe34a31
OZ
2384 * Some functions for handing internal next hop updates
2385 * triggered by rt_schedule_nhu().
2386 */
2387
1e37e35c 2388void
3c744164 2389rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
cfe34a31
OZ
2390{
2391 a->hostentry = he;
cfe34a31 2392 a->dest = he->dest;
d1e146f2 2393 a->igp_metric = he->igp_metric;
d47c3d64 2394
3c744164 2395 if (a->dest != RTD_UNICAST)
d47c3d64 2396 {
3c744164
MM
2397 /* No nexthop */
2398no_nexthop:
2399 a->nh = (struct nexthop) {};
2400 if (mls)
2401 { /* Store the label stack for later changes */
2402 a->nh.labels_orig = a->nh.labels = mls->len;
2403 memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
2404 }
d47c3d64
MM
2405 return;
2406 }
2407
3c744164
MM
2408 if (((!mls) || (!mls->len)) && he->nexthop_linkable)
2409 { /* Just link the nexthop chain, no label append happens. */
2410 memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
2411 return;
2412 }
2413
2414 struct nexthop *nhp = NULL, *nhr = NULL;
2415 int skip_nexthop = 0;
1e37e35c 2416
3c744164 2417 for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
d47c3d64 2418 {
3c744164
MM
2419 if (skip_nexthop)
2420 skip_nexthop--;
2421 else
2422 {
2423 nhr = nhp;
cb2b6e04 2424 nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
3c744164 2425 }
039a65d0 2426
cb2b6e04 2427 memset(nhp, 0, NEXTHOP_MAX_SIZE);
3c744164
MM
2428 nhp->iface = nh->iface;
2429 nhp->weight = nh->weight;
843b10c8 2430
3c744164 2431 if (mls)
d47c3d64 2432 {
3c744164
MM
2433 nhp->labels = nh->labels + mls->len;
2434 nhp->labels_orig = mls->len;
039a65d0
MM
2435 if (nhp->labels <= MPLS_MAX_LABEL_STACK)
2436 {
2437 memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
3c744164 2438 memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
039a65d0
MM
2439 }
2440 else
2441 {
2442 log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
3c744164
MM
2443 nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
2444 skip_nexthop++;
039a65d0
MM
2445 continue;
2446 }
d47c3d64 2447 }
843b10c8
OZ
2448 else if (nh->labels)
2449 {
2450 nhp->labels = nh->labels;
2451 nhp->labels_orig = 0;
2452 memcpy(nhp->label, nh->label, nh->labels * sizeof(u32));
2453 }
2454
3c744164 2455 if (ipa_nonzero(nh->gw))
a1f5e514
OZ
2456 {
2457 nhp->gw = nh->gw; /* Router nexthop */
2458 nhp->flags |= (nh->flags & RNF_ONLINK);
2459 }
9eace843
OZ
2460 else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK))
2461 nhp->gw = IPA_NONE; /* PtP link - no need for nexthop */
3c744164
MM
2462 else if (ipa_nonzero(he->link))
2463 nhp->gw = he->link; /* Device nexthop with link-local address known */
2464 else
2465 nhp->gw = he->addr; /* Device nexthop with link-local address unknown */
d47c3d64 2466 }
039a65d0 2467
3c744164
MM
2468 if (skip_nexthop)
2469 if (nhr)
2470 nhr->next = NULL;
2471 else
2472 {
2473 a->dest = RTD_UNREACHABLE;
2474 log(L_WARN "No valid nexthop remaining, setting route unreachable");
2475 goto no_nexthop;
2476 }
cfe34a31
OZ
2477}
2478
1f2eb2ac
OZ
2479static inline int
2480rta_next_hop_outdated(rta *a)
2481{
2482 struct hostentry *he = a->hostentry;
2483
2484 if (!he)
2485 return 0;
2486
2487 if (!he->src)
2488 return a->dest != RTD_UNREACHABLE;
2489
2490 return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
2491 (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
2492}
2493
cfe34a31 2494static inline rte *
3e236955 2495rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
cfe34a31 2496{
1f2eb2ac
OZ
2497 if (!rta_next_hop_outdated(old->attrs))
2498 return NULL;
2499
62e64905
OZ
2500 rta *a = alloca(RTA_MAX_SIZE);
2501 memcpy(a, old->attrs, rta_size(old->attrs));
3c744164
MM
2502
2503 mpls_label_stack mls = { .len = a->nh.labels_orig };
2504 memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
2505
2506 rta_apply_hostentry(a, old->attrs->hostentry, &mls);
eb937358 2507 a->cached = 0;
cfe34a31
OZ
2508
2509 rte *e = sl_alloc(rte_slab);
2510 memcpy(e, old, sizeof(rte));
62e64905 2511 e->attrs = rta_lookup(a);
5cff1d5f 2512 rt_lock_source(e->src);
cfe34a31
OZ
2513
2514 return e;
2515}
2516
1f2eb2ac
OZ
2517
2518#ifdef CONFIG_BGP
2519
2520static inline int
2521net_flow_has_dst_prefix(const net_addr *n)
2522{
2523 ASSUME(net_is_flow(n));
2524
2525 if (n->pxlen)
2526 return 1;
2527
2528 if (n->type == NET_FLOW4)
2529 {
2530 const net_addr_flow4 *n4 = (void *) n;
2531 return (n4->length > sizeof(net_addr_flow4)) && (n4->data[0] == FLOW_TYPE_DST_PREFIX);
2532 }
2533 else
2534 {
2535 const net_addr_flow6 *n6 = (void *) n;
2536 return (n6->length > sizeof(net_addr_flow6)) && (n6->data[0] == FLOW_TYPE_DST_PREFIX);
2537 }
2538}
2539
2540static inline int
2541rta_as_path_is_empty(rta *a)
2542{
2543 eattr *e = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2544 return !e || (as_path_getlen(e->u.ptr) == 0);
2545}
2546
2547static inline u32
2548rta_get_first_asn(rta *a)
2549{
2550 eattr *e = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2551 u32 asn;
2552
2553 return (e && as_path_get_first_regular(e->u.ptr, &asn)) ? asn : 0;
2554}
2555
2556int
2557rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior)
2558{
2559 ASSERT(rt_is_ip(tab_ip));
2560 ASSERT(rt_is_flow(tab_flow));
2561 ASSERT(tab_ip->trie);
2562
2563 /* RFC 8955 6. a) Flowspec has defined dst prefix */
2564 if (!net_flow_has_dst_prefix(n))
2565 return 0;
2566
2567 /* RFC 9117 4.1. Accept AS_PATH is empty (fr */
2568 if (interior && rta_as_path_is_empty(a))
2569 return 1;
2570
2571
2572 /* RFC 8955 6. b) Flowspec and its best-match route have the same originator */
2573
2574 /* Find flowspec dst prefix */
2575 net_addr dst;
2576 if (n->type == NET_FLOW4)
2577 net_fill_ip4(&dst, net4_prefix(n), net4_pxlen(n));
2578 else
2579 net_fill_ip6(&dst, net6_prefix(n), net6_pxlen(n));
2580
2581 /* Find best-match BGP unicast route for flowspec dst prefix */
2582 net *nb = net_route(tab_ip, &dst);
2583 rte *rb = nb ? nb->routes : NULL;
2584
2585 /* Register prefix to trie for tracking further changes */
2586 int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH;
2587 trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen);
2588
2589 /* No best-match BGP route -> no flowspec */
2590 if (!rb || (rb->attrs->source != RTS_BGP))
2591 return 0;
2592
2593 /* Find ORIGINATOR_ID values */
2594 u32 orig_a = ea_get_int(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID), 0);
2595 u32 orig_b = ea_get_int(rb->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID), 0);
2596
2597 /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */
2598 if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal(a->from, rb->attrs->from)))
2599 return 0;
2600
2601
2602 /* Find ASN of the best-match route, for use in next checks */
2603 u32 asn_b = rta_get_first_asn(rb->attrs);
2604 if (!asn_b)
2605 return 0;
2606
2607 /* RFC 9117 4.2. For EBGP, flowspec and its best-match route are from the same AS */
2608 if (!interior && (rta_get_first_asn(a) != asn_b))
2609 return 0;
2610
2611 /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */
2612 TRIE_WALK(tab_ip->trie, subnet, &dst)
2613 {
2614 net *nc = net_find_valid(tab_ip, &subnet);
2615 if (!nc)
2616 continue;
2617
2618 rte *rc = nc->routes;
2619 if (rc->attrs->source != RTS_BGP)
2620 return 0;
2621
2622 if (rta_get_first_asn(rc->attrs) != asn_b)
2623 return 0;
2624 }
2625 TRIE_WALK_END;
2626
2627 return 1;
2628}
2629
2630#endif /* CONFIG_BGP */
2631
2632static rte *
2633rt_flowspec_update_rte(rtable *tab, rte *r)
2634{
2635#ifdef CONFIG_BGP
b5c8fce2 2636 if ((r->attrs->source != RTS_BGP) || (r->sender->proto != r->src->proto))
8a4bc4fd
MM
2637 return NULL;
2638
2639 struct bgp_channel *bc = (struct bgp_channel *) r->sender;
2640 if (!bc->base_table)
1f2eb2ac
OZ
2641 return NULL;
2642
2643 const net_addr *n = r->net->n.addr;
83d9920f 2644 struct bgp_proto *p = (void *) r->src->proto;
8a4bc4fd 2645 int valid = rt_flowspec_check(bc->base_table, tab, n, r->attrs, p->is_interior);
1f2eb2ac
OZ
2646 int dest = valid ? RTD_NONE : RTD_UNREACHABLE;
2647
2648 if (dest == r->attrs->dest)
2649 return NULL;
2650
2651 rta *a = alloca(RTA_MAX_SIZE);
2652 memcpy(a, r->attrs, rta_size(r->attrs));
2653 a->dest = dest;
83d9920f 2654 a->cached = 0;
1f2eb2ac
OZ
2655
2656 rte *new = sl_alloc(rte_slab);
2657 memcpy(new, r, sizeof(rte));
2658 new->attrs = rta_lookup(a);
b5c8fce2 2659 rt_lock_source(new->src);
1f2eb2ac
OZ
2660
2661 return new;
2662#else
2663 return NULL;
2664#endif
2665}
2666
2667
cfe34a31
OZ
2668static inline int
2669rt_next_hop_update_net(rtable *tab, net *n)
2670{
2671 rte **k, *e, *new, *old_best, **new_best;
2672 int count = 0;
2673 int free_old_best = 0;
2674
2675 old_best = n->routes;
2676 if (!old_best)
2677 return 0;
2678
cfe34a31 2679 for (k = &n->routes; e = *k; k = &e->next)
1f2eb2ac
OZ
2680 {
2681 if (!net_is_flow(n->n.addr))
2682 new = rt_next_hop_update_rte(tab, e);
2683 else
2684 new = rt_flowspec_update_rte(tab, e);
2685
2686 if (new)
be4cd99a 2687 {
be4cd99a 2688 *k = new;
cfe34a31 2689
61dae32b 2690 rte_trace_in(D_ROUTES, new->sender, new, "updated");
5ea39eaa 2691 rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
cfe34a31 2692
be4cd99a
OZ
2693 /* Call a pre-comparison hook */
2694 /* Not really an efficient way to compute this */
5cff1d5f
MM
2695 if (e->src->proto->rte_recalculate)
2696 e->src->proto->rte_recalculate(tab, n, new, e, NULL);
cfe34a31 2697
be4cd99a
OZ
2698 if (e != old_best)
2699 rte_free_quick(e);
2700 else /* Freeing of the old best rte is postponed */
2701 free_old_best = 1;
cfe34a31 2702
be4cd99a
OZ
2703 e = new;
2704 count++;
2705 }
1f2eb2ac 2706 }
be4cd99a
OZ
2707
2708 if (!count)
2709 return 0;
2710
2711 /* Find the new best route */
2712 new_best = NULL;
2713 for (k = &n->routes; e = *k; k = &e->next)
2714 {
cfe34a31
OZ
2715 if (!new_best || rte_better(e, *new_best))
2716 new_best = k;
2717 }
2718
2719 /* Relink the new best route to the first position */
2720 new = *new_best;
2721 if (new != n->routes)
2722 {
2723 *new_best = new->next;
2724 new->next = n->routes;
2725 n->routes = new;
2726 }
2727
2728 /* Announce the new best route */
2729 if (new != old_best)
61dae32b 2730 rte_trace_in(D_ROUTES, new->sender, new, "updated [best]");
cfe34a31 2731
5ea39eaa
OZ
2732 /* Propagate changes */
2733 rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best);
8d9eef17 2734
d107ef78 2735 if (free_old_best)
cfe34a31
OZ
2736 rte_free_quick(old_best);
2737
2738 return count;
2739}
2740
2741static void
2742rt_next_hop_update(rtable *tab)
2743{
2744 struct fib_iterator *fit = &tab->nhu_fit;
2745 int max_feed = 32;
2746
93f50ca3 2747 if (tab->nhu_state == NHU_CLEAN)
cfe34a31
OZ
2748 return;
2749
93f50ca3 2750 if (tab->nhu_state == NHU_SCHEDULED)
cfe34a31
OZ
2751 {
2752 FIB_ITERATE_INIT(fit, &tab->fib);
93f50ca3 2753 tab->nhu_state = NHU_RUNNING;
1f2eb2ac
OZ
2754
2755 if (tab->flowspec_trie)
2756 rt_flowspec_reset_trie(tab);
cfe34a31
OZ
2757 }
2758
600998fc 2759 FIB_ITERATE_START(&tab->fib, fit, net, n)
cfe34a31
OZ
2760 {
2761 if (max_feed <= 0)
2762 {
600998fc 2763 FIB_ITERATE_PUT(fit);
cfe34a31
OZ
2764 ev_schedule(tab->rt_event);
2765 return;
2766 }
600998fc 2767 max_feed -= rt_next_hop_update_net(tab, n);
cfe34a31 2768 }
600998fc 2769 FIB_ITERATE_END;
cfe34a31 2770
93f50ca3
MM
2771 /* State change:
2772 * NHU_DIRTY -> NHU_SCHEDULED
2773 * NHU_RUNNING -> NHU_CLEAN
2774 */
cfe34a31
OZ
2775 tab->nhu_state &= 1;
2776
93f50ca3 2777 if (tab->nhu_state != NHU_CLEAN)
cfe34a31
OZ
2778 ev_schedule(tab->rt_event);
2779}
2780
2781
b9626ec6 2782struct rtable_config *
fe9f1a6d 2783rt_new_table(struct symbol *s, uint addr_type)
b9626ec6 2784{
36415e4b 2785 /* Hack that allows to 'redefine' the master table */
f4a60a9b 2786 if ((s->class == SYM_TABLE) &&
0b39b1cb 2787 (s->table == new_config->def_tables[addr_type]) &&
f4a60a9b 2788 ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
0b39b1cb 2789 return s->table;
36415e4b 2790
b9626ec6
MM
2791 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
2792
51f2e7af 2793 cf_define_symbol(new_config, s, SYM_TABLE, table, c);
b9626ec6 2794 c->name = s->name;
fe9f1a6d 2795 c->addr_type = addr_type;
a8a3d95b
OZ
2796 c->gc_threshold = 1000;
2797 c->gc_period = (uint) -1; /* set in rt_postconfig() */
00b85905
OZ
2798 c->min_settle_time = 1 S;
2799 c->max_settle_time = 20 S;
54ddf90f 2800 c->debug = new_config->table_default_debug;
f4a60a9b
OZ
2801
2802 add_tail(&new_config->tables, &c->n);
2803
2804 /* First table of each type is kept as default */
2805 if (! new_config->def_tables[addr_type])
2806 new_config->def_tables[addr_type] = c;
2807
b9626ec6
MM
2808 return c;
2809}
2810
58740ed4
MM
2811/**
2812 * rt_lock_table - lock a routing table
2813 * @r: routing table to be locked
2814 *
2815 * Lock a routing table, because it's in use by a protocol,
2816 * preventing it from being freed when it gets undefined in a new
2817 * configuration.
2818 */
0e02abfd 2819void
50fe90ed 2820rt_lock_table(rtable *r)
0e02abfd 2821{
50fe90ed
MM
2822 r->use_count++;
2823}
2824
58740ed4
MM
2825/**
2826 * rt_unlock_table - unlock a routing table
2827 * @r: routing table to be unlocked
2828 *
2829 * Unlock a routing table formerly locked by rt_lock_table(),
2830 * that is decrease its use count and delete it if it's scheduled
2831 * for deletion by configuration changes.
2832 */
50fe90ed
MM
2833void
2834rt_unlock_table(rtable *r)
2835{
2836 if (!--r->use_count && r->deleted)
2837 {
2838 struct config *conf = r->deleted;
ff397df7
MM
2839
2840 /* Delete the routing table by freeing its pool */
2841 rt_shutdown(r);
50fe90ed
MM
2842 config_del_obstacle(conf);
2843 }
2844}
2845
1ae42e52
OZ
2846static int
2847rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old)
2848{
2849 if ((new->addr_type != old->addr_type) ||
2850 (new->sorted != old->sorted) ||
2851 (new->trie_used != old->trie_used))
2852 return 0;
2853
2854 DBG("\t%s: same\n", new->name);
2855 new->table = tab;
2856 tab->name = new->name;
2857 tab->config = new;
54ddf90f 2858 tab->debug = new->debug;
1ae42e52
OZ
2859
2860 return 1;
2861}
2862
bcb4af81
OZ
2863static struct rtable_config *
2864rt_find_table_config(struct config *cf, char *name)
2865{
2866 struct symbol *sym = cf_find_symbol(cf, name);
0b39b1cb 2867 return (sym && (sym->class == SYM_TABLE)) ? sym->table : NULL;
bcb4af81
OZ
2868}
2869
58740ed4
MM
2870/**
2871 * rt_commit - commit new routing table configuration
2872 * @new: new configuration
2873 * @old: original configuration or %NULL if it's boot time config
2874 *
2875 * Scan differences between @old and @new configuration and modify
2876 * the routing tables according to these changes. If @new defines a
2877 * previously unknown table, create it, if it omits a table existing
2878 * in @old, schedule it for deletion (it gets deleted when all protocols
2879 * disconnect from it by calling rt_unlock_table()), if it exists
2880 * in both configurations, leave it unchanged.
2881 */
50fe90ed
MM
2882void
2883rt_commit(struct config *new, struct config *old)
2884{
2885 struct rtable_config *o, *r;
0e02abfd 2886
50fe90ed
MM
2887 DBG("rt_commit:\n");
2888 if (old)
0e02abfd 2889 {
50fe90ed
MM
2890 WALK_LIST(o, old->tables)
2891 {
1ae42e52
OZ
2892 rtable *tab = o->table;
2893 if (tab->deleted)
2894 continue;
2895
2896 r = rt_find_table_config(new, o->name);
2897 if (r && !new->shutdown && rt_reconfigure(tab, r, o))
2898 continue;
2899
2900 DBG("\t%s: deleted\n", o->name);
2901 tab->deleted = old;
2902 config_add_obstacle(old);
2903 rt_lock_table(tab);
2904 rt_unlock_table(tab);
50fe90ed 2905 }
0e02abfd 2906 }
50fe90ed
MM
2907
2908 WALK_LIST(r, new->tables)
2909 if (!r->table)
2910 {
ff397df7 2911 r->table = rt_setup(rt_table_pool, r);
50fe90ed 2912 DBG("\t%s: created\n", r->name);
ff397df7 2913 add_tail(&routing_tables, &r->table->n);
50fe90ed
MM
2914 }
2915 DBG("\tdone\n");
0e02abfd 2916}
730f2e2c 2917
23ac9e9a 2918static inline void
f4a60a9b 2919do_feed_channel(struct channel *c, net *n, rte *e)
23ac9e9a 2920{
23ac9e9a 2921 rte_update_lock();
f4a60a9b 2922 if (c->ra_mode == RA_ACCEPTED)
5ea39eaa 2923 rt_notify_accepted(c, n, NULL, NULL, c->refeeding);
f4a60a9b 2924 else if (c->ra_mode == RA_MERGED)
5ea39eaa 2925 rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding);
f4a60a9b 2926 else /* RA_BASIC */
5ea39eaa 2927 rt_notify_basic(c, n, e, e, c->refeeding);
23ac9e9a
OZ
2928 rte_update_unlock();
2929}
2930
58740ed4 2931/**
f4a60a9b
OZ
2932 * rt_feed_channel - advertise all routes to a channel
2933 * @c: channel to be fed
58740ed4 2934 *
f4a60a9b
OZ
2935 * This function performs one pass of advertisement of routes to a channel that
2936 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2937 * has something to do. (We avoid transferring all the routes in single pass in
2938 * order not to monopolize CPU time.)
58740ed4 2939 */
ac5d8012 2940int
f4a60a9b 2941rt_feed_channel(struct channel *c)
ac5d8012 2942{
f4a60a9b 2943 struct fib_iterator *fit = &c->feed_fit;
76dfda9e 2944 int max_feed = 256;
ac5d8012 2945
f4a60a9b
OZ
2946 ASSERT(c->export_state == ES_FEEDING);
2947
2948 if (!c->feed_active)
ac5d8012 2949 {
f4a60a9b
OZ
2950 FIB_ITERATE_INIT(fit, &c->table->fib);
2951 c->feed_active = 1;
ac5d8012 2952 }
ac5d8012 2953
f4a60a9b 2954 FIB_ITERATE_START(&c->table->fib, fit, net, n)
ac5d8012 2955 {
258d0ad4 2956 rte *e = n->routes;
76dfda9e
MM
2957 if (max_feed <= 0)
2958 {
600998fc 2959 FIB_ITERATE_PUT(fit);
76dfda9e
MM
2960 return 0;
2961 }
23ac9e9a 2962
f4a60a9b
OZ
2963 if ((c->ra_mode == RA_OPTIMAL) ||
2964 (c->ra_mode == RA_ACCEPTED) ||
2965 (c->ra_mode == RA_MERGED))
cf98be7b 2966 if (rte_is_valid(e))
23ac9e9a 2967 {
f4a60a9b
OZ
2968 /* In the meantime, the protocol may fell down */
2969 if (c->export_state != ES_FEEDING)
2970 goto done;
ca34698c 2971
f4a60a9b 2972 do_feed_channel(c, n, e);
23ac9e9a
OZ
2973 max_feed--;
2974 }
2975
f4a60a9b 2976 if (c->ra_mode == RA_ANY)
ca34698c 2977 for(e = n->routes; e; e = e->next)
23ac9e9a 2978 {
f4a60a9b
OZ
2979 /* In the meantime, the protocol may fell down */
2980 if (c->export_state != ES_FEEDING)
2981 goto done;
ca34698c
OZ
2982
2983 if (!rte_is_valid(e))
2984 continue;
2985
f4a60a9b 2986 do_feed_channel(c, n, e);
23ac9e9a
OZ
2987 max_feed--;
2988 }
ac5d8012 2989 }
600998fc 2990 FIB_ITERATE_END;
ac5d8012 2991
f4a60a9b
OZ
2992done:
2993 c->feed_active = 0;
2994 return 1;
ac5d8012
MM
2995}
2996
58740ed4
MM
2997/**
2998 * rt_feed_baby_abort - abort protocol feeding
f4a60a9b 2999 * @c: channel
58740ed4 3000 *
f4a60a9b
OZ
3001 * This function is called by the protocol code when the protocol stops or
3002 * ceases to exist during the feeding.
58740ed4 3003 */
ac5d8012 3004void
f4a60a9b 3005rt_feed_channel_abort(struct channel *c)
ac5d8012 3006{
f4a60a9b 3007 if (c->feed_active)
ac5d8012 3008 {
f4a60a9b
OZ
3009 /* Unlink the iterator */
3010 fit_get(&c->table->fib, &c->feed_fit);
3011 c->feed_active = 0;
ac5d8012
MM
3012 }
3013}
3014
682d3f7d 3015
b7d7599c
OZ
3016/*
3017 * Import table
3018 */
3019
682d3f7d
OZ
3020int
3021rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
3022{
67d8665a 3023 struct rtable *tab = c->in_table;
682d3f7d
OZ
3024 rte *old, **pos;
3025 net *net;
3026
3027 if (new)
3028 {
67d8665a 3029 net = net_get(tab, n);
682d3f7d 3030
682d3f7d
OZ
3031 if (!rta_is_cached(new->attrs))
3032 new->attrs = rta_lookup(new->attrs);
3033 }
3034 else
3035 {
67d8665a 3036 net = net_find(tab, n);
682d3f7d
OZ
3037
3038 if (!net)
67d8665a 3039 goto drop_withdraw;
682d3f7d
OZ
3040 }
3041
3042 /* Find the old rte */
3043 for (pos = &net->routes; old = *pos; pos = &old->next)
5cff1d5f 3044 if (old->src == src)
682d3f7d
OZ
3045 {
3046 if (new && rte_same(old, new))
93af78d2
OZ
3047 {
3048 /* Refresh the old rte, continue with update to main rtable */
3049 if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
3050 {
3051 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
a848dad4 3052
93af78d2
OZ
3053 return 1;
3054 }
3055
67d8665a 3056 goto drop_update;
93af78d2 3057 }
682d3f7d 3058
32a25405
MM
3059 /* Move iterator if needed */
3060 if (old == c->reload_next_rte)
3061 c->reload_next_rte = old->next;
3062
682d3f7d
OZ
3063 /* Remove the old rte */
3064 *pos = old->next;
67d8665a 3065 tab->rt_count--;
682d3f7d
OZ
3066 break;
3067 }
3068
f4deef89
OZ
3069 if (!old && !new)
3070 goto drop_withdraw;
67d8665a
OZ
3071
3072 struct channel_limit *l = &c->rx_limit;
f4deef89 3073 if (l->action && !old && new)
67d8665a
OZ
3074 {
3075 if (tab->rt_count >= l->limit)
3076 channel_notify_limit(c, l, PLD_RX, tab->rt_count);
3077
3078 if (l->state == PLS_BLOCKED)
3079 {
b962967e
OZ
3080 /* Required by rte_trace_in() */
3081 new->net = net;
3082
61dae32b 3083 rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
67d8665a
OZ
3084 goto drop_update;
3085 }
3086 }
682d3f7d 3087
f4deef89
OZ
3088 if (new)
3089 {
3090 /* Insert the new rte */
3091 rte *e = rte_do_cow(new);
3092 e->flags |= REF_COW;
3093 e->net = net;
3094 e->sender = c;
3095 e->lastmod = current_time();
3096 e->next = *pos;
3097 *pos = new = e;
3098 tab->rt_count++;
ef6ab5ce
OZ
3099
3100 if (!old)
3101 {
3102 new->id = hmap_first_zero(&tab->id_map);
3103 hmap_set(&tab->id_map, new->id);
3104 }
3105 else
3106 new->id = old->id;
f4deef89
OZ
3107 }
3108
ef6ab5ce 3109 rte_announce(tab, RA_ANY, net, new, old, NULL, NULL);
f4deef89
OZ
3110
3111 if (old)
ef6ab5ce
OZ
3112 {
3113 if (!new)
3114 hmap_clear(&tab->id_map, old->id);
3115
f4deef89 3116 rte_free_quick(old);
ef6ab5ce 3117 }
f4deef89
OZ
3118
3119 if (!net->routes)
3120 fib_delete(&tab->fib, net);
a848dad4 3121
682d3f7d 3122 return 1;
67d8665a
OZ
3123
3124drop_update:
3125 c->stats.imp_updates_received++;
3126 c->stats.imp_updates_ignored++;
3127 rte_free(new);
ff397df7
MM
3128
3129 if (!net->routes)
3130 fib_delete(&tab->fib, net);
3131
67d8665a
OZ
3132 return 0;
3133
3134drop_withdraw:
3135 c->stats.imp_withdraws_received++;
3136 c->stats.imp_withdraws_ignored++;
3137 return 0;
682d3f7d
OZ
3138}
3139
3140int
3141rt_reload_channel(struct channel *c)
3142{
3143 struct rtable *tab = c->in_table;
3144 struct fib_iterator *fit = &c->reload_fit;
3145 int max_feed = 64;
3146
3147 ASSERT(c->channel_state == CS_UP);
3148
3149 if (!c->reload_active)
3150 {
3151 FIB_ITERATE_INIT(fit, &tab->fib);
3152 c->reload_active = 1;
3153 }
3154
32a25405
MM
3155 do {
3156 for (rte *e = c->reload_next_rte; e; e = e->next)
682d3f7d 3157 {
32a25405
MM
3158 if (max_feed-- <= 0)
3159 {
3160 c->reload_next_rte = e;
3161 debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed);
3162 return 0;
3163 }
3164
5cff1d5f 3165 rte_update2(c, e->net->n.addr, rte_do_cow(e), e->src);
682d3f7d
OZ
3166 }
3167
32a25405
MM
3168 c->reload_next_rte = NULL;
3169
3170 FIB_ITERATE_START(&tab->fib, fit, net, n)
682d3f7d 3171 {
32a25405
MM
3172 if (c->reload_next_rte = n->routes)
3173 {
3174 FIB_ITERATE_PUT_NEXT(fit, &tab->fib);
3175 break;
3176 }
682d3f7d 3177 }
32a25405 3178 FIB_ITERATE_END;
682d3f7d 3179 }
32a25405 3180 while (c->reload_next_rte);
682d3f7d
OZ
3181
3182 c->reload_active = 0;
3183 return 1;
3184}
3185
3186void
3187rt_reload_channel_abort(struct channel *c)
3188{
3189 if (c->reload_active)
3190 {
3191 /* Unlink the iterator */
3192 fit_get(&c->in_table->fib, &c->reload_fit);
32a25405 3193 c->reload_next_rte = NULL;
682d3f7d
OZ
3194 c->reload_active = 0;
3195 }
3196}
3197
3198void
3199rt_prune_sync(rtable *t, int all)
3200{
ff397df7
MM
3201 struct fib_iterator fit;
3202
3203 FIB_ITERATE_INIT(&fit, &t->fib);
3204
3205again:
3206 FIB_ITERATE_START(&t->fib, &fit, net, n)
682d3f7d
OZ
3207 {
3208 rte *e, **ee = &n->routes;
ff397df7 3209
682d3f7d
OZ
3210 while (e = *ee)
3211 {
3212 if (all || (e->flags & (REF_STALE | REF_DISCARD)))
3213 {
3214 *ee = e->next;
3215 rte_free_quick(e);
67d8665a 3216 t->rt_count--;
682d3f7d
OZ
3217 }
3218 else
3219 ee = &e->next;
3220 }
ff397df7
MM
3221
3222 if (all || !n->routes)
3223 {
3224 FIB_ITERATE_PUT(&fit);
3225 fib_delete(&t->fib, n);
3226 goto again;
3227 }
682d3f7d 3228 }
ff397df7 3229 FIB_ITERATE_END;
682d3f7d
OZ
3230}
3231
3232
b7d7599c
OZ
3233/*
3234 * Export table
3235 */
3236
3237int
4d48ede5 3238rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed)
b7d7599c
OZ
3239{
3240 struct rtable *tab = c->out_table;
3241 struct rte_src *src;
3242 rte *old, **pos;
3243 net *net;
3244
3245 if (new)
3246 {
3247 net = net_get(tab, n);
5cff1d5f 3248 src = new->src;
ca2dacfc 3249
ca2dacfc
OZ
3250 if (!rta_is_cached(new->attrs))
3251 new->attrs = rta_lookup(new->attrs);
b7d7599c
OZ
3252 }
3253 else
3254 {
3255 net = net_find(tab, n);
5cff1d5f 3256 src = old0->src;
b7d7599c
OZ
3257
3258 if (!net)
3259 goto drop_withdraw;
3260 }
3261
3262 /* Find the old rte */
3263 for (pos = &net->routes; old = *pos; pos = &old->next)
5cff1d5f 3264 if ((c->ra_mode != RA_ANY) || (old->src == src))
b7d7599c
OZ
3265 {
3266 if (new && rte_same(old, new))
3267 {
3268 /* REF_STALE / REF_DISCARD not used in export table */
3269 /*
3270 if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
3271 {
3272 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
3273 return 1;
3274 }
3275 */
3276
3277 goto drop_update;
3278 }
3279
3280 /* Remove the old rte */
3281 *pos = old->next;
4d48ede5 3282 rte_free_quick(old);
b7d7599c
OZ
3283 tab->rt_count--;
3284
3285 break;
3286 }
3287
3288 if (!new)
3289 {
3290 if (!old)
3291 goto drop_withdraw;
3292
ff397df7
MM
3293 if (!net->routes)
3294 fib_delete(&tab->fib, net);
3295
b7d7599c
OZ
3296 return 1;
3297 }
3298
3299 /* Insert the new rte */
3300 rte *e = rte_do_cow(new);
3301 e->flags |= REF_COW;
3302 e->net = net;
3303 e->sender = c;
3304 e->lastmod = current_time();
3305 e->next = *pos;
3306 *pos = e;
3307 tab->rt_count++;
3308 return 1;
3309
3310drop_update:
3311 return refeed;
3312
3313drop_withdraw:
3314 return 0;
3315}
3316
3317
3318/*
3319 * Hostcache
3320 */
3321
04632fd7 3322static inline u32
f2b76f2c
OZ
3323hc_hash(ip_addr a, rtable *dep)
3324{
04632fd7 3325 return ipa_hash(a) ^ ptr_hash(dep);
f2b76f2c
OZ
3326}
3327
3328static inline void
3329hc_insert(struct hostcache *hc, struct hostentry *he)
3330{
ae80a2de 3331 uint k = he->hash_key >> hc->hash_shift;
f2b76f2c
OZ
3332 he->next = hc->hash_table[k];
3333 hc->hash_table[k] = he;
3334}
3335
3336static inline void
3337hc_remove(struct hostcache *hc, struct hostentry *he)
3338{
3339 struct hostentry **hep;
ae80a2de 3340 uint k = he->hash_key >> hc->hash_shift;
f2b76f2c
OZ
3341
3342 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
3343 *hep = he->next;
3344}
3345
3346#define HC_DEF_ORDER 10
3347#define HC_HI_MARK *4
3348#define HC_HI_STEP 2
3349#define HC_HI_ORDER 16 /* Must be at most 16 */
3350#define HC_LO_MARK /5
3351#define HC_LO_STEP 2
3352#define HC_LO_ORDER 10
3353
3354static void
ff397df7 3355hc_alloc_table(struct hostcache *hc, pool *p, unsigned order)
f2b76f2c 3356{
3e236955 3357 uint hsize = 1 << order;
f2b76f2c 3358 hc->hash_order = order;
04632fd7 3359 hc->hash_shift = 32 - order;
3e236955
MM
3360 hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
3361 hc->hash_min = (order <= HC_LO_ORDER) ? 0U : (hsize HC_LO_MARK);
f2b76f2c 3362
ff397df7 3363 hc->hash_table = mb_allocz(p, hsize * sizeof(struct hostentry *));
f2b76f2c
OZ
3364}
3365
cfe34a31 3366static void
ff397df7 3367hc_resize(struct hostcache *hc, pool *p, unsigned new_order)
cfe34a31 3368{
f2b76f2c
OZ
3369 struct hostentry **old_table = hc->hash_table;
3370 struct hostentry *he, *hen;
3e236955
MM
3371 uint old_size = 1 << hc->hash_order;
3372 uint i;
f2b76f2c 3373
ff397df7 3374 hc_alloc_table(hc, p, new_order);
f2b76f2c
OZ
3375 for (i = 0; i < old_size; i++)
3376 for (he = old_table[i]; he != NULL; he=hen)
3377 {
3378 hen = he->next;
3379 hc_insert(hc, he);
3380 }
3381 mb_free(old_table);
3382}
3383
3384static struct hostentry *
ff397df7 3385hc_new_hostentry(struct hostcache *hc, pool *p, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
f2b76f2c
OZ
3386{
3387 struct hostentry *he = sl_alloc(hc->slab);
3388
039a65d0
MM
3389 *he = (struct hostentry) {
3390 .addr = a,
3391 .link = ll,
3392 .tab = dep,
3393 .hash_key = k,
3394 };
f2b76f2c
OZ
3395
3396 add_tail(&hc->hostentries, &he->ln);
3397 hc_insert(hc, he);
3398
3399 hc->hash_items++;
3400 if (hc->hash_items > hc->hash_max)
ff397df7 3401 hc_resize(hc, p, hc->hash_order + HC_HI_STEP);
f2b76f2c
OZ
3402
3403 return he;
3404}
3405
3406static void
ff397df7 3407hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he)
f2b76f2c 3408{
7e95c05d
OZ
3409 rta_free(he->src);
3410
f2b76f2c
OZ
3411 rem_node(&he->ln);
3412 hc_remove(hc, he);
ebd807c0 3413 sl_free(he);
f2b76f2c
OZ
3414
3415 hc->hash_items--;
3416 if (hc->hash_items < hc->hash_min)
ff397df7 3417 hc_resize(hc, p, hc->hash_order - HC_LO_STEP);
cfe34a31
OZ
3418}
3419
3420static void
3421rt_init_hostcache(rtable *tab)
3422{
ff397df7 3423 struct hostcache *hc = mb_allocz(tab->rp, sizeof(struct hostcache));
cfe34a31 3424 init_list(&hc->hostentries);
f2b76f2c
OZ
3425
3426 hc->hash_items = 0;
ff397df7
MM
3427 hc_alloc_table(hc, tab->rp, HC_DEF_ORDER);
3428 hc->slab = sl_new(tab->rp, sizeof(struct hostentry));
f2b76f2c 3429
7e86ff20 3430 hc->lp = lp_new(tab->rp);
27550028 3431 hc->trie = f_new_trie(hc->lp, 0);
c477f489 3432
cfe34a31
OZ
3433 tab->hostcache = hc;
3434}
3435
3436static void
3437rt_free_hostcache(rtable *tab)
3438{
3439 struct hostcache *hc = tab->hostcache;
3440
3441 node *n;
3442 WALK_LIST(n, hc->hostentries)
3443 {
3444 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
7e95c05d
OZ
3445 rta_free(he->src);
3446
cfe34a31
OZ
3447 if (he->uc)
3448 log(L_ERR "Hostcache is not empty in table %s", tab->name);
3449 }
3450
ff397df7 3451 /* Freed automagically by the resource pool
f2b76f2c 3452 rfree(hc->slab);
c477f489 3453 rfree(hc->lp);
f2b76f2c 3454 mb_free(hc->hash_table);
cfe34a31 3455 mb_free(hc);
ff397df7 3456 */
cfe34a31
OZ
3457}
3458
3459static void
3460rt_notify_hostcache(rtable *tab, net *net)
3461{
cfe34a31
OZ
3462 if (tab->hcu_scheduled)
3463 return;
3464
04632fd7
OZ
3465 if (trie_match_net(tab->hostcache->trie, net->n.addr))
3466 rt_schedule_hcu(tab);
cfe34a31
OZ
3467}
3468
3469static int
3470if_local_addr(ip_addr a, struct iface *i)
3471{
3472 struct ifa *b;
3473
3474 WALK_LIST(b, i->addrs)
3475 if (ipa_equal(a, b->ip))
3476 return 1;
3477
3478 return 0;
3479}
3480
09ee846d 3481u32
d1e146f2
OZ
3482rt_get_igp_metric(rte *rt)
3483{
ba5e5940
OZ
3484 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
3485
3486 if (ea)
3487 return ea->u.data;
3488
d471d5fc 3489 if (rt->attrs->source == RTS_DEVICE)
d1e146f2
OZ
3490 return 0;
3491
d471d5fc
MM
3492 if (rt->src->proto->rte_igp_metric)
3493 return rt->src->proto->rte_igp_metric(rt);
3494
d1e146f2
OZ
3495 return IGP_METRIC_UNKNOWN;
3496}
3497
cfe34a31
OZ
3498static int
3499rt_update_hostentry(rtable *tab, struct hostentry *he)
3500{
7e95c05d 3501 rta *old_src = he->src;
85ad5855 3502 int direct = 0;
c477f489 3503 int pxlen = 0;
cfe34a31 3504
04632fd7 3505 /* Reset the hostentry */
7e95c05d 3506 he->src = NULL;
7e95c05d 3507 he->dest = RTD_UNREACHABLE;
85ad5855 3508 he->nexthop_linkable = 0;
7e95c05d
OZ
3509 he->igp_metric = 0;
3510
04632fd7
OZ
3511 net_addr he_addr;
3512 net_fill_ip_host(&he_addr, he->addr);
3513 net *n = net_route(tab, &he_addr);
d1e146f2 3514 if (n)
cfe34a31 3515 {
cf98be7b
OZ
3516 rte *e = n->routes;
3517 rta *a = e->attrs;
71b3456e 3518 word pref = a->pref;
cfe34a31 3519
71b3456e
MM
3520 for (rte *ee = n->routes; ee; ee = ee->next)
3521 if ((ee->attrs->pref >= pref) && ee->attrs->hostentry)
2c9033af
OZ
3522 {
3523 /* Recursive route should not depend on another recursive route */
fe9f1a6d
OZ
3524 log(L_WARN "Next hop address %I resolvable through recursive route for %N",
3525 he->addr, n->n.addr);
7e95c05d 3526 goto done;
2c9033af 3527 }
7e95c05d 3528
71b3456e
MM
3529 pxlen = n->n.addr->pxlen;
3530
85ad5855 3531 if (a->dest == RTD_UNICAST)
3c744164
MM
3532 {
3533 for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
3534 if (ipa_zero(nh->gw))
3535 {
3536 if (if_local_addr(he->addr, nh->iface))
3537 {
3538 /* The host address is a local address, this is not valid */
3539 log(L_WARN "Next hop address %I is a local address of iface %s",
3540 he->addr, nh->iface->name);
3541 goto done;
3542 }
3543
85ad5855 3544 direct++;
3c744164
MM
3545 }
3546 }
665be7f6 3547
7e95c05d 3548 he->src = rta_clone(a);
85ad5855
OZ
3549 he->dest = a->dest;
3550 he->nexthop_linkable = !direct;
cf98be7b 3551 he->igp_metric = rt_get_igp_metric(e);
cfe34a31
OZ
3552 }
3553
665be7f6 3554done:
c477f489 3555 /* Add a prefix range to the trie */
04632fd7 3556 trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
c477f489 3557
7e95c05d
OZ
3558 rta_free(old_src);
3559 return old_src != he->src;
cfe34a31
OZ
3560}
3561
3562static void
3563rt_update_hostcache(rtable *tab)
3564{
3565 struct hostcache *hc = tab->hostcache;
3566 struct hostentry *he;
3567 node *n, *x;
3568
c477f489
OZ
3569 /* Reset the trie */
3570 lp_flush(hc->lp);
27550028 3571 hc->trie = f_new_trie(hc->lp, 0);
c477f489 3572
cfe34a31
OZ
3573 WALK_LIST_DELSAFE(n, x, hc->hostentries)
3574 {
3575 he = SKIP_BACK(struct hostentry, ln, n);
3576 if (!he->uc)
3577 {
ff397df7 3578 hc_delete_hostentry(hc, tab->rp, he);
cfe34a31
OZ
3579 continue;
3580 }
3581
3582 if (rt_update_hostentry(tab, he))
3583 rt_schedule_nhu(he->tab);
3584 }
3585
3586 tab->hcu_scheduled = 0;
3587}
3588
1e37e35c 3589struct hostentry *
094d2bdb 3590rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
cfe34a31 3591{
8f79e6b9 3592 ip_addr link = ipa_zero(ll) ? a : ll;
cfe34a31
OZ
3593 struct hostentry *he;
3594
3595 if (!tab->hostcache)
3596 rt_init_hostcache(tab);
3597
04632fd7 3598 u32 k = hc_hash(a, dep);
f2b76f2c
OZ
3599 struct hostcache *hc = tab->hostcache;
3600 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
8f79e6b9 3601 if (ipa_equal(he->addr, a) && ipa_equal(he->link, link) && (he->tab == dep))
f2b76f2c 3602 return he;
cfe34a31 3603
8f79e6b9 3604 he = hc_new_hostentry(hc, tab->rp, a, link, dep, k);
b5e9e519 3605 he->owner = tab;
f2b76f2c 3606 rt_update_hostentry(tab, he);
cfe34a31
OZ
3607 return he;
3608}
3609
094d2bdb 3610
3ce8c610
MM
3611/*
3612 * Documentation for functions declared inline in route.h
3613 */
3614#if 0
3615
3616/**
3617 * net_find - find a network entry
3618 * @tab: a routing table
3619 * @addr: address of the network
3ce8c610
MM
3620 *
3621 * net_find() looks up the given network in routing table @tab and
3622 * returns a pointer to its &net entry or %NULL if no such network
3623 * exists.
3624 */
fe9f1a6d 3625static inline net *net_find(rtable *tab, net_addr *addr)
3ce8c610
MM
3626{ DUMMY; }
3627
3628/**
3629 * net_get - obtain a network entry
3630 * @tab: a routing table
3631 * @addr: address of the network
3ce8c610
MM
3632 *
3633 * net_get() looks up the given network in routing table @tab and
3634 * returns a pointer to its &net entry. If no such entry exists, it's
3635 * created.
3636 */
fe9f1a6d 3637static inline net *net_get(rtable *tab, net_addr *addr)
3ce8c610
MM
3638{ DUMMY; }
3639
3640/**
3641 * rte_cow - copy a route for writing
3642 * @r: a route entry to be copied
3643 *
3644 * rte_cow() takes a &rte and prepares it for modification. The exact action
3645 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
3646 * just returned unchanged, else a new temporary entry with the same contents
3647 * is created.
3648 *
3649 * The primary use of this function is inside the filter machinery -- when
3650 * a filter wants to modify &rte contents (to change the preference or to
3651 * attach another set of attributes), it must ensure that the &rte is not
3652 * shared with anyone else (and especially that it isn't stored in any routing
3653 * table).
3654 *
2e9b2421 3655 * Result: a pointer to the new writable &rte.
3ce8c610
MM
3656 */
3657static inline rte * rte_cow(rte *r)
3658{ DUMMY; }
3659
3660#endif