]> git.ipfire.org Git - thirdparty/bird.git/blame - nest/rt-table.c
OSPF: Improved handling of tmpattrs
[thirdparty/bird.git] / nest / rt-table.c
CommitLineData
62aa008a 1/*
58740ed4 2 * BIRD -- Routing Tables
62aa008a 3 *
50fe90ed 4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
62aa008a
MM
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
58740ed4
MM
9/**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
725270cb 16 * There are multiple routing tables (a primary one together with any
58740ed4
MM
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
58f7d004 19 * destination networks. For each network (represented by structure &net),
725270cb
MM
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
58740ed4
MM
22 * for routing), the order of the other ones is undetermined.
23 *
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
29 */
30
6b9fa320 31#undef LOCAL_DEBUG
1a54b1c6 32
62aa008a
MM
33#include "nest/bird.h"
34#include "nest/route.h"
2326b001 35#include "nest/protocol.h"
730f2e2c 36#include "nest/iface.h"
2326b001 37#include "lib/resource.h"
5996da6a 38#include "lib/event.h"
730f2e2c 39#include "lib/string.h"
0e02abfd 40#include "conf/conf.h"
529c4149 41#include "filter/filter.h"
586c1800 42#include "lib/hash.h"
221135d6 43#include "lib/string.h"
10af3676 44#include "lib/alloca.h"
7d875e09 45
acb60628
OZ
46pool *rt_table_pool;
47
2326b001 48static slab *rte_slab;
e2dc2f30 49static linpool *rte_update_pool;
2326b001 50
863ecfc7 51list routing_tables;
5996da6a 52
cfe34a31
OZ
53static void rt_free_hostcache(rtable *tab);
54static void rt_notify_hostcache(rtable *tab, net *net);
55static void rt_update_hostcache(rtable *tab);
56static void rt_next_hop_update(rtable *tab);
f4a60a9b 57static inline void rt_prune_table(rtable *tab);
0c791f87 58
cfd46ee4 59
d1e146f2 60/* Like fib_route(), but skips empty net entries */
04632fd7 61static inline void *
7ee07a3c 62net_route_ip4(rtable *t, net_addr_ip4 *n)
d1e146f2 63{
04632fd7 64 net *r;
d1e146f2 65
7ee07a3c 66 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
04632fd7
OZ
67 {
68 n->pxlen--;
69 ip4_clrbit(&n->prefix, n->pxlen);
70 }
71
72 return r;
73}
74
75static inline void *
7ee07a3c 76net_route_ip6(rtable *t, net_addr_ip6 *n)
04632fd7
OZ
77{
78 net *r;
79
7ee07a3c 80 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
04632fd7
OZ
81 {
82 n->pxlen--;
83 ip6_clrbit(&n->prefix, n->pxlen);
84 }
85
86 return r;
87}
88
be17805c
OZ
89static inline void *
90net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n)
91{
92 struct fib_node *fn;
93
94 while (1)
95 {
96 net *best = NULL;
97 int best_pxlen = 0;
98
99 /* We need to do dst first matching. Since sadr addresses are hashed on dst
100 prefix only, find the hash table chain and go through it to find the
101 match with the smallest matching src prefix. */
102 for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next)
103 {
104 net_addr_ip6_sadr *a = (void *) fn->addr;
105
106 if (net_equal_dst_ip6_sadr(n, a) &&
107 net_in_net_src_ip6_sadr(n, a) &&
108 (a->src_pxlen >= best_pxlen))
109 {
110 best = fib_node_to_user(&t->fib, fn);
111 best_pxlen = a->src_pxlen;
112 }
113 }
114
115 if (best)
116 return best;
117
118 if (!n->dst_pxlen)
119 break;
120
121 n->dst_pxlen--;
122 ip6_clrbit(&n->dst_prefix, n->dst_pxlen);
123 }
124
125 return NULL;
126}
127
286e2011
OZ
128void *
129net_route(rtable *tab, const net_addr *n)
0264ccf6 130{
286e2011 131 ASSERT(tab->addr_type == n->type);
0264ccf6 132
286e2011
OZ
133 net_addr *n0 = alloca(n->length);
134 net_copy(n0, n);
135
136 switch (n->type)
137 {
138 case NET_IP4:
139 case NET_VPN4:
140 case NET_ROA4:
7ee07a3c 141 return net_route_ip4(tab, (net_addr_ip4 *) n0);
286e2011
OZ
142
143 case NET_IP6:
144 case NET_VPN6:
145 case NET_ROA6:
7ee07a3c 146 return net_route_ip6(tab, (net_addr_ip6 *) n0);
286e2011 147
be17805c
OZ
148 case NET_IP6_SADR:
149 return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0);
150
286e2011
OZ
151 default:
152 return NULL;
153 }
154}
155
156
157static int
158net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
159{
160 struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
0264ccf6 161 struct fib_node *fn;
286e2011
OZ
162 int anything = 0;
163
0264ccf6
PT
164 while (1)
165 {
166 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
167 {
286e2011 168 net_addr_roa4 *roa = (void *) fn->addr;
0264ccf6 169 net *r = fib_node_to_user(&tab->fib, fn);
286e2011
OZ
170
171 if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
0264ccf6 172 {
0264ccf6
PT
173 anything = 1;
174 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
175 return ROA_VALID;
176 }
177 }
178
179 if (n.pxlen == 0)
180 break;
181
182 n.pxlen--;
183 ip4_clrbit(&n.prefix, n.pxlen);
184 }
185
186 return anything ? ROA_INVALID : ROA_UNKNOWN;
187}
188
286e2011
OZ
189static int
190net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
0264ccf6
PT
191{
192 struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
0264ccf6 193 struct fib_node *fn;
286e2011
OZ
194 int anything = 0;
195
0264ccf6
PT
196 while (1)
197 {
198 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
199 {
286e2011 200 net_addr_roa6 *roa = (void *) fn->addr;
0264ccf6 201 net *r = fib_node_to_user(&tab->fib, fn);
286e2011
OZ
202
203 if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
0264ccf6 204 {
0264ccf6
PT
205 anything = 1;
206 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
207 return ROA_VALID;
208 }
209 }
210
211 if (n.pxlen == 0)
212 break;
213
214 n.pxlen--;
215 ip6_clrbit(&n.prefix, n.pxlen);
216 }
217
218 return anything ? ROA_INVALID : ROA_UNKNOWN;
219}
220
286e2011
OZ
221/**
222 * roa_check - check validity of route origination in a ROA table
223 * @tab: ROA table
224 * @n: network prefix to check
225 * @asn: AS number of network prefix
226 *
227 * Implements RFC 6483 route validation for the given network prefix. The
228 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
229 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
230 * a candidate ROA with matching ASN and maxlen field greater than or equal to
231 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
232 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
233 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
234 * must have type NET_IP4 or NET_IP6, respectively.
235 */
236int
0264ccf6
PT
237net_roa_check(rtable *tab, const net_addr *n, u32 asn)
238{
286e2011
OZ
239 if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
240 return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
241 else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
242 return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
0264ccf6 243 else
286e2011 244 return ROA_UNKNOWN; /* Should not happen */
d1e146f2 245}
2326b001 246
58740ed4
MM
247/**
248 * rte_find - find a route
249 * @net: network node
094d2bdb 250 * @src: route source
58740ed4
MM
251 *
252 * The rte_find() function returns a route for destination @net
094d2bdb 253 * which is from route source @src.
58740ed4 254 */
2326b001 255rte *
094d2bdb 256rte_find(net *net, struct rte_src *src)
2326b001
MM
257{
258 rte *e = net->routes;
259
094d2bdb 260 while (e && e->attrs->src != src)
2326b001
MM
261 e = e->next;
262 return e;
263}
264
58740ed4
MM
265/**
266 * rte_get_temp - get a temporary &rte
3ce8c610 267 * @a: attributes to assign to the new route (a &rta; in case it's
2e9b2421 268 * un-cached, rte_update() will create a cached copy automatically)
58740ed4
MM
269 *
270 * Create a temporary &rte and bind it with the attributes @a.
271 * Also set route preference to the default preference set for
272 * the protocol.
273 */
2326b001
MM
274rte *
275rte_get_temp(rta *a)
276{
277 rte *e = sl_alloc(rte_slab);
278
279 e->attrs = a;
0cdbd397 280 e->flags = 0;
f4a60a9b 281 e->pref = 0;
2326b001
MM
282 return e;
283}
284
e2dc2f30
MM
285rte *
286rte_do_cow(rte *r)
287{
288 rte *e = sl_alloc(rte_slab);
289
290 memcpy(e, r, sizeof(rte));
291 e->attrs = rta_clone(r->attrs);
292 e->flags = 0;
293 return e;
294}
295
8d9eef17
OZ
296/**
297 * rte_cow_rta - get a private writable copy of &rte with writable &rta
298 * @r: a route entry to be copied
299 * @lp: a linpool from which to allocate &rta
300 *
301 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
302 * modification. There are three possibilities: First, both &rte and &rta are
303 * private copies, in that case they are returned unchanged. Second, &rte is
304 * private copy, but &rta is cached, in that case &rta is duplicated using
305 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
306 * both structures are duplicated by rte_do_cow() and rta_do_cow().
307 *
308 * Note that in the second case, cached &rta loses one reference, while private
309 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
310 * nexthops, ...) with it. To work properly, original shared &rta should have
311 * another reference during the life of created private copy.
312 *
313 * Result: a pointer to the new writable &rte with writable &rta.
314 */
315rte *
316rte_cow_rta(rte *r, linpool *lp)
317{
318 if (!rta_is_cached(r->attrs))
319 return r;
320
13c0be19 321 r = rte_cow(r);
8d9eef17 322 rta *a = rta_do_cow(r->attrs, lp);
13c0be19
JMM
323 rta_free(r->attrs);
324 r->attrs = a;
325 return r;
8d9eef17
OZ
326}
327
9aa77fcc
OZ
328
329/* Note that rte_make_tmp_attr() requires free eattr in ea_list */
330void
331rte_make_tmp_attr(rte *r, ea_list *e, uint id, uint type, u32 val)
332{
333 if (r->pflags & EA_ID_FLAG(id))
334 {
335 eattr *a = &e->attrs[e->count++];
336 a->id = id;
337 a->type = type | EAF_TEMP;
338 a->flags = 0;
339 a->u.data = val;
340 }
341}
342
343/* Note that rte has to be writable */
344uint
345rte_store_tmp_attr(rte *r, uint id)
346{
347 eattr *a;
348 if (a = ea_find(r->attrs->eattrs, id))
349 {
350 r->pflags |= EA_ID_FLAG(id);
351 return a->u.data;
352 }
353 else
354 {
355 r->pflags &= ~EA_ID_FLAG(id);
356 return 0;
357 }
358}
359
360
2326b001
MM
361static int /* Actually better or at least as good as */
362rte_better(rte *new, rte *old)
363{
d9f330c5
MM
364 int (*better)(rte *, rte *);
365
cf98be7b 366 if (!rte_is_valid(old))
2326b001 367 return 1;
cf98be7b
OZ
368 if (!rte_is_valid(new))
369 return 0;
370
2326b001
MM
371 if (new->pref > old->pref)
372 return 1;
373 if (new->pref < old->pref)
374 return 0;
094d2bdb 375 if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
4c1b4e1a
MM
376 {
377 /*
378 * If the user has configured protocol preferences, so that two different protocols
379 * have the same preference, try to break the tie by comparing addresses. Not too
380 * useful, but keeps the ordering of routes unambiguous.
381 */
094d2bdb 382 return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
4c1b4e1a 383 }
094d2bdb 384 if (better = new->attrs->src->proto->rte_better)
d9f330c5
MM
385 return better(new, old);
386 return 0;
2326b001
MM
387}
388
8d9eef17
OZ
389static int
390rte_mergable(rte *pri, rte *sec)
391{
392 int (*mergable)(rte *, rte *);
393
394 if (!rte_is_valid(pri) || !rte_is_valid(sec))
395 return 0;
396
397 if (pri->pref != sec->pref)
398 return 0;
399
400 if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
401 return 0;
402
403 if (mergable = pri->attrs->src->proto->rte_mergable)
404 return mergable(pri, sec);
405
406 return 0;
407}
408
cfd46ee4
MM
409static void
410rte_trace(struct proto *p, rte *e, int dir, char *msg)
411{
665be7f6 412 log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest));
cfd46ee4
MM
413}
414
415static inline void
ae80a2de 416rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
cfd46ee4
MM
417{
418 if (p->debug & flag)
b0a47440 419 rte_trace(p, e, '>', msg);
cfd46ee4
MM
420}
421
422static inline void
ae80a2de 423rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
cfd46ee4
MM
424{
425 if (p->debug & flag)
b0a47440 426 rte_trace(p, e, '<', msg);
cfd46ee4
MM
427}
428
00a09f3c 429static rte *
13c0be19 430export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent)
529c4149 431{
f4a60a9b
OZ
432 struct proto *p = c->proto;
433 struct filter *filter = c->out_filter;
434 struct proto_stats *stats = &c->stats;
00a09f3c
OZ
435 rte *rt;
436 int v;
c0adf7e9 437
00a09f3c
OZ
438 rt = rt0;
439 *rt_free = NULL;
7de45ba4 440
14375237 441 v = p->preexport ? p->preexport(p, &rt, pool) : 0;
00a09f3c
OZ
442 if (v < 0)
443 {
444 if (silent)
445 goto reject;
11361a10 446
00a09f3c 447 stats->exp_updates_rejected++;
36da2857
OZ
448 if (v == RIC_REJECT)
449 rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
00a09f3c
OZ
450 goto reject;
451 }
452 if (v > 0)
e2dc2f30 453 {
00a09f3c
OZ
454 if (!silent)
455 rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
456 goto accept;
e2dc2f30 457 }
925fe2d3 458
95488885
JMM
459 rte_make_tmp_attrs(&rt, pool);
460
00a09f3c 461 v = filter && ((filter == FILTER_REJECT) ||
13c0be19
JMM
462 (f_run(filter, &rt, pool,
463 (silent ? FF_SILENT : 0)) > F_ACCEPT));
00a09f3c
OZ
464 if (v)
465 {
466 if (silent)
467 goto reject;
468
469 stats->exp_updates_filtered++;
470 rte_trace_out(D_FILTERS, p, rt, "filtered out");
471 goto reject;
e2dc2f30 472 }
925fe2d3 473
00a09f3c
OZ
474 accept:
475 if (rt != rt0)
476 *rt_free = rt;
477 return rt;
478
479 reject:
480 /* Discard temporary rte */
481 if (rt != rt0)
482 rte_free(rt);
483 return NULL;
484}
485
a290da25 486static inline rte *
13c0be19 487export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent)
a290da25 488{
13c0be19 489 return export_filter_(c, rt0, rt_free, rte_update_pool, silent);
a290da25
PT
490}
491
00a09f3c 492static void
13c0be19 493do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed)
00a09f3c 494{
f4a60a9b
OZ
495 struct proto *p = c->proto;
496 struct proto_stats *stats = &c->stats;
925fe2d3 497
abced4a9 498
ab758e4f 499 /*
abced4a9
OZ
500 * First, apply export limit.
501 *
ab758e4f
OZ
502 * Export route limits has several problems. Because exp_routes
503 * counter is reset before refeed, we don't really know whether
abced4a9 504 * limit is breached and whether the update is new or not. Therefore
ab758e4f
OZ
505 * the number of really exported routes may exceed the limit
506 * temporarily (routes exported before and new routes in refeed).
507 *
508 * Minor advantage is that if the limit is decreased and refeed is
509 * requested, the number of exported routes really decrease.
510 *
511 * Second problem is that with export limits, we don't know whether
512 * old was really exported (it might be blocked by limit). When a
513 * withdraw is exported, we announce it even when the previous
514 * update was blocked. This is not a big issue, but the same problem
515 * is in updating exp_routes counter. Therefore, to be consistent in
516 * increases and decreases of exp_routes, we count exported routes
517 * regardless of blocking by limits.
518 *
519 * Similar problem is in handling updates - when a new route is
520 * received and blocking is active, the route would be blocked, but
521 * when an update for the route will be received later, the update
522 * would be propagated (as old != NULL). Therefore, we have to block
523 * also non-new updates (contrary to import blocking).
524 */
925fe2d3 525
f4a60a9b
OZ
526 struct channel_limit *l = &c->out_limit;
527 if (l->action && new)
d9b77cc2 528 {
ab758e4f 529 if ((!old || refeed) && (stats->exp_routes >= l->limit))
f4a60a9b 530 channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
d9b77cc2
OZ
531
532 if (l->state == PLS_BLOCKED)
533 {
ab758e4f 534 stats->exp_routes++; /* see note above */
d9b77cc2
OZ
535 stats->exp_updates_rejected++;
536 rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
ab758e4f 537 new = NULL;
abced4a9
OZ
538
539 if (!old)
540 return;
d9b77cc2
OZ
541 }
542 }
543
ab758e4f 544
925fe2d3 545 if (new)
9db74169 546 stats->exp_updates_accepted++;
925fe2d3 547 else
9db74169 548 stats->exp_withdraws_accepted++;
925fe2d3 549
8a7fb885
OZ
550 /* Hack: We do not decrease exp_routes during refeed, we instead
551 reset exp_routes at the start of refeed. */
925fe2d3 552 if (new)
9db74169 553 stats->exp_routes++;
8a7fb885 554 if (old && !refeed)
9db74169 555 stats->exp_routes--;
925fe2d3 556
cfd46ee4
MM
557 if (p->debug & D_ROUTES)
558 {
559 if (new && old)
560 rte_trace_out(D_ROUTES, p, new, "replaced");
561 else if (new)
562 rte_trace_out(D_ROUTES, p, new, "added");
349e21bb 563 else if (old)
cfd46ee4
MM
564 rte_trace_out(D_ROUTES, p, old, "removed");
565 }
13c0be19 566 p->rt_notify(p, c, net, new, old);
00a09f3c
OZ
567}
568
00a09f3c 569static void
f4a60a9b 570rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
00a09f3c 571{
f4a60a9b 572 struct proto *p = c->proto;
00a09f3c 573
86f567e1
OZ
574 rte *new = new0;
575 rte *old = old0;
00a09f3c
OZ
576 rte *new_free = NULL;
577 rte *old_free = NULL;
578
579 if (new)
f4a60a9b 580 c->stats.exp_updates_received++;
00a09f3c 581 else
f4a60a9b 582 c->stats.exp_withdraws_received++;
00a09f3c
OZ
583
584 /*
a81e18da
OZ
585 * This is a tricky part - we don't know whether route 'old' was exported to
586 * protocol 'p' or was filtered by the export filter. We try to run the export
587 * filter to know this to have a correct value in 'old' argument of rte_update
588 * (and proper filter value).
00a09f3c 589 *
a81e18da
OZ
590 * This is broken because 'configure soft' may change filters but keep routes.
591 * Refeed cycle is expected to be called after change of the filters and with
592 * old == new, therefore we do not even try to run the filter on an old route.
593 * This may lead to 'spurious withdraws' but ensure that there are no 'missing
00a09f3c
OZ
594 * withdraws'.
595 *
a81e18da
OZ
596 * This is not completely safe as there is a window between reconfiguration
597 * and the end of refeed - if a newly filtered route disappears during this
598 * period, proper withdraw is not sent (because old would be also filtered)
599 * and the route is not refeeded (because it disappeared before that).
6e8fb668 600 * This is handled below as a special case.
00a09f3c
OZ
601 */
602
603 if (new)
13c0be19 604 new = export_filter(c, new, &new_free, 0);
00a09f3c 605
6e8fb668 606 if (old && !refeed)
13c0be19 607 old = export_filter(c, old, &old_free, 1);
00a09f3c 608
00a09f3c 609 if (!new && !old)
86f567e1
OZ
610 {
611 /*
612 * As mentioned above, 'old' value may be incorrect in some race conditions.
6e8fb668
OZ
613 * We generally ignore it with two exceptions:
614 *
615 * First, withdraw to pipe protocol. In that case we rather propagate
616 * unfiltered withdraws regardless of export filters to ensure that when a
617 * protocol is flushed, its routes are removed from all tables. Possible
618 * spurious unfiltered withdraws are not problem here as they are ignored if
619 * there is no corresponding route at the other end of the pipe.
620 *
621 * Second, recent filter change. If old route is older than filter change,
622 * then it was previously evaluated by a different filter and we do not know
623 * whether it was really propagated. In that case we rather send spurious
624 * withdraw than do nothing and possibly cause phantom routes.
625 *
626 * In both cases wqe directly call rt_notify() hook instead of
86f567e1
OZ
627 * do_rt_notify() to avoid logging and stat counters.
628 */
629
6e8fb668 630 int pipe_withdraw = 0, filter_change = 0;
86f567e1 631#ifdef CONFIG_PIPE
6e8fb668 632 pipe_withdraw = (p->proto == &proto_pipe) && !new0;
86f567e1 633#endif
6e8fb668
OZ
634 filter_change = old0 && (old0->lastmod <= c->last_tx_filter_change);
635
636 if ((pipe_withdraw || filter_change) && (p != old0->sender->proto))
637 {
638 c->stats.exp_withdraws_accepted++;
639 p->rt_notify(p, c, net, NULL, old0);
640 }
86f567e1 641
00a09f3c 642 return;
86f567e1 643 }
00a09f3c 644
13c0be19 645 do_rt_notify(c, net, new, old, refeed);
00a09f3c
OZ
646
647 /* Discard temporary rte's */
648 if (new_free)
649 rte_free(new_free);
650 if (old_free)
651 rte_free(old_free);
652}
653
654static void
f4a60a9b 655rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
00a09f3c 656{
3a8ca7ab 657 struct proto *p = c->proto;
00a09f3c 658
db027a41 659 rte *r;
00a09f3c
OZ
660 rte *new_best = NULL;
661 rte *old_best = NULL;
662 rte *new_free = NULL;
663 rte *old_free = NULL;
00a09f3c 664
cf98be7b
OZ
665 /* Used to track whether we met old_changed position. If before_old is NULL
666 old_changed was the first and we met it implicitly before current best route. */
667 int old_meet = old_changed && !before_old;
668
669 /* Note that before_old is either NULL or valid (not rejected) route.
670 If old_changed is valid, before_old have to be too. If old changed route
671 was not valid, caller must use NULL for both old_changed and before_old. */
00a09f3c
OZ
672
673 if (new_changed)
f4a60a9b 674 c->stats.exp_updates_received++;
00a09f3c 675 else
f4a60a9b 676 c->stats.exp_withdraws_received++;
00a09f3c
OZ
677
678 /* First, find the new_best route - first accepted by filters */
cf98be7b 679 for (r=net->routes; rte_is_valid(r); r=r->next)
00a09f3c 680 {
13c0be19 681 if (new_best = export_filter(c, r, &new_free, 0))
00a09f3c
OZ
682 break;
683
684 /* Note if we walked around the position of old_changed route */
685 if (r == before_old)
686 old_meet = 1;
687 }
688
a82f692e 689 /*
00a09f3c 690 * Second, handle the feed case. That means we do not care for
a82f692e 691 * old_best. It is NULL for feed, and the new_best for refeed.
00a09f3c
OZ
692 * For refeed, there is a hack similar to one in rt_notify_basic()
693 * to ensure withdraws in case of changed filters
694 */
695 if (feed)
696 {
697 if (feed == 2) /* refeed */
cf98be7b
OZ
698 old_best = new_best ? new_best :
699 (rte_is_valid(net->routes) ? net->routes : NULL);
00a09f3c
OZ
700 else
701 old_best = NULL;
702
703 if (!new_best && !old_best)
704 return;
705
706 goto found;
707 }
708
709 /*
710 * Now, we find the old_best route. Generally, it is the same as the
711 * new_best, unless new_best is the same as new_changed or
712 * old_changed is accepted before new_best.
713 *
714 * There are four cases:
715 *
716 * - We would find and accept old_changed before new_best, therefore
717 * old_changed is old_best. In remaining cases we suppose this
718 * is not true.
719 *
720 * - We found no new_best, therefore there is also no old_best and
721 * we ignore this withdraw.
722 *
723 * - We found new_best different than new_changed, therefore
724 * old_best is the same as new_best and we ignore this update.
725 *
726 * - We found new_best the same as new_changed, therefore it cannot
727 * be old_best and we have to continue search for old_best.
092c4930
OZ
728 *
729 * There is also a hack to ensure consistency in case of changed filters.
730 * It does not find the proper old_best, just selects a non-NULL route.
00a09f3c
OZ
731 */
732
092c4930 733 /* Hack for changed filters */
3a8ca7ab
OZ
734 if (old_changed &&
735 (p != old_changed->sender->proto) &&
736 (old_changed->lastmod <= c->last_tx_filter_change))
092c4930
OZ
737 {
738 old_best = old_changed;
739 goto found;
740 }
741
00a09f3c
OZ
742 /* First case */
743 if (old_meet)
13c0be19 744 if (old_best = export_filter(c, old_changed, &old_free, 1))
00a09f3c
OZ
745 goto found;
746
747 /* Second case */
748 if (!new_best)
749 return;
d9b77cc2 750
26822d8f 751 /* Third case, we use r instead of new_best, because export_filter() could change it */
00a09f3c
OZ
752 if (r != new_changed)
753 {
754 if (new_free)
755 rte_free(new_free);
756 return;
757 }
758
759 /* Fourth case */
cf98be7b 760 for (r=r->next; rte_is_valid(r); r=r->next)
00a09f3c 761 {
13c0be19 762 if (old_best = export_filter(c, r, &old_free, 1))
00a09f3c
OZ
763 goto found;
764
765 if (r == before_old)
13c0be19 766 if (old_best = export_filter(c, old_changed, &old_free, 1))
00a09f3c
OZ
767 goto found;
768 }
769
770 /* Implicitly, old_best is NULL and new_best is non-NULL */
771
772 found:
13c0be19 773 do_rt_notify(c, net, new_best, old_best, (feed == 2));
00a09f3c
OZ
774
775 /* Discard temporary rte's */
776 if (new_free)
777 rte_free(new_free);
778 if (old_free)
779 rte_free(old_free);
529c4149
MM
780}
781
8d9eef17 782
4e276a89
JMM
783static struct nexthop *
784nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
8d9eef17 785{
4e276a89 786 return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
8d9eef17
OZ
787}
788
789rte *
13c0be19 790rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent)
8d9eef17 791{
f4a60a9b 792 // struct proto *p = c->proto;
4e276a89 793 struct nexthop *nhs = NULL;
8d9eef17
OZ
794 rte *best0, *best, *rt0, *rt, *tmp;
795
796 best0 = net->routes;
797 *rt_free = NULL;
798
799 if (!rte_is_valid(best0))
800 return NULL;
801
13c0be19 802 best = export_filter_(c, best0, rt_free, pool, silent);
8d9eef17
OZ
803
804 if (!best || !rte_is_reachable(best))
805 return best;
806
807 for (rt0 = best0->next; rt0; rt0 = rt0->next)
808 {
809 if (!rte_mergable(best0, rt0))
810 continue;
811
13c0be19 812 rt = export_filter_(c, rt0, &tmp, pool, 1);
8d9eef17
OZ
813
814 if (!rt)
815 continue;
816
817 if (rte_is_reachable(rt))
4e276a89 818 nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
8d9eef17
OZ
819
820 if (tmp)
821 rte_free(tmp);
822 }
823
824 if (nhs)
825 {
4e276a89 826 nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
8d9eef17
OZ
827
828 if (nhs->next)
829 {
a290da25 830 best = rte_cow_rta(best, pool);
4e276a89 831 nexthop_link(best->attrs, nhs);
8d9eef17
OZ
832 }
833 }
834
835 if (best != best0)
836 *rt_free = best;
837
838 return best;
839}
840
841
842static void
f4a60a9b 843rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
8d9eef17
OZ
844 rte *new_best, rte*old_best, int refeed)
845{
f4a60a9b 846 // struct proto *p = c->proto;
8d9eef17
OZ
847
848 rte *new_best_free = NULL;
849 rte *old_best_free = NULL;
850 rte *new_changed_free = NULL;
851 rte *old_changed_free = NULL;
8d9eef17
OZ
852
853 /* We assume that all rte arguments are either NULL or rte_is_valid() */
854
855 /* This check should be done by the caller */
856 if (!new_best && !old_best)
857 return;
858
859 /* Check whether the change is relevant to the merged route */
860 if ((new_best == old_best) && !refeed)
861 {
862 new_changed = rte_mergable(new_best, new_changed) ?
13c0be19 863 export_filter(c, new_changed, &new_changed_free, 1) : NULL;
8d9eef17
OZ
864
865 old_changed = rte_mergable(old_best, old_changed) ?
13c0be19 866 export_filter(c, old_changed, &old_changed_free, 1) : NULL;
8d9eef17
OZ
867
868 if (!new_changed && !old_changed)
869 return;
870 }
871
872 if (new_best)
f4a60a9b 873 c->stats.exp_updates_received++;
8d9eef17 874 else
f4a60a9b 875 c->stats.exp_withdraws_received++;
8d9eef17
OZ
876
877 /* Prepare new merged route */
878 if (new_best)
13c0be19 879 new_best = rt_export_merged(c, net, &new_best_free, rte_update_pool, 0);
8d9eef17
OZ
880
881 /* Prepare old merged route (without proper merged next hops) */
882 /* There are some issues with running filter on old route - see rt_notify_basic() */
883 if (old_best && !refeed)
13c0be19 884 old_best = export_filter(c, old_best, &old_best_free, 1);
8d9eef17
OZ
885
886 if (new_best || old_best)
13c0be19 887 do_rt_notify(c, net, new_best, old_best, refeed);
8d9eef17
OZ
888
889 /* Discard temporary rte's */
890 if (new_best_free)
891 rte_free(new_best_free);
892 if (old_best_free)
893 rte_free(old_best_free);
894 if (new_changed_free)
895 rte_free(new_changed_free);
896 if (old_changed_free)
897 rte_free(old_changed_free);
898}
899
900
9a8f20fc
MM
901/**
902 * rte_announce - announce a routing table change
903 * @tab: table the route has been added to
23ac9e9a 904 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
9a8f20fc
MM
905 * @net: network in question
906 * @new: the new route to be announced
23ac9e9a 907 * @old: the previous route for the same network
8e433d6a
PT
908 * @new_best: the new best route for the same network
909 * @old_best: the previous best route for the same network
910 * @before_old: The previous route before @old for the same network.
a82f692e 911 * If @before_old is NULL @old was the first.
9a8f20fc
MM
912 *
913 * This function gets a routing table update and announces it
f98e2915
OZ
914 * to all protocols that acccepts given type of route announcement
915 * and are connected to the same table by their announcement hooks.
9a8f20fc 916 *
8e433d6a 917 * Route announcement of type %RA_OPTIMAL si generated when optimal
f98e2915
OZ
918 * route (in routing table @tab) changes. In that case @old stores the
919 * old optimal route.
23ac9e9a 920 *
8e433d6a 921 * Route announcement of type %RA_ANY si generated when any route (in
f98e2915
OZ
922 * routing table @tab) changes In that case @old stores the old route
923 * from the same protocol.
924 *
14375237 925 * For each appropriate protocol, we first call its preexport()
f98e2915
OZ
926 * hook which performs basic checks on the route (each protocol has a
927 * right to veto or force accept of the route before any filter is
928 * asked) and adds default values of attributes specific to the new
929 * protocol (metrics, tags etc.). Then it consults the protocol's
930 * export filter and if it accepts the route, the rt_notify() hook of
931 * the protocol gets called.
9a8f20fc 932 */
e2dc2f30 933static void
8d9eef17
OZ
934rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
935 rte *new_best, rte *old_best, rte *before_old)
2326b001 936{
8d9eef17
OZ
937 if (!rte_is_valid(new))
938 new = NULL;
939
cf98be7b
OZ
940 if (!rte_is_valid(old))
941 old = before_old = NULL;
942
8d9eef17
OZ
943 if (!rte_is_valid(new_best))
944 new_best = NULL;
945
946 if (!rte_is_valid(old_best))
947 old_best = NULL;
cf98be7b
OZ
948
949 if (!old && !new)
950 return;
2326b001 951
e1c275d8
OZ
952 if (type == RA_OPTIMAL)
953 {
954 if (new)
955 new->sender->stats.pref_routes++;
956 if (old)
957 old->sender->stats.pref_routes--;
958
959 if (tab->hostcache)
960 rt_notify_hostcache(tab, net);
961 }
cfe34a31 962
f4a60a9b
OZ
963 struct channel *c; node *n;
964 WALK_LIST2(c, n, tab->channels, table_node)
0a2e9d9f 965 {
f4a60a9b
OZ
966 if (c->export_state == ES_DOWN)
967 continue;
968
969 if (c->ra_mode == type)
00a09f3c 970 if (type == RA_ACCEPTED)
f4a60a9b 971 rt_notify_accepted(c, net, new, old, before_old, 0);
8d9eef17 972 else if (type == RA_MERGED)
f4a60a9b 973 rt_notify_merged(c, net, new, old, new_best, old_best, 0);
00a09f3c 974 else
f4a60a9b 975 rt_notify_basic(c, net, new, old, 0);
0a2e9d9f 976 }
2326b001
MM
977}
978
421838ff
MM
979static inline int
980rte_validate(rte *e)
981{
982 int c;
983 net *n = e->net;
984
fe9f1a6d
OZ
985 if (!net_validate(n->n.addr))
986 {
987 log(L_WARN "Ignoring bogus prefix %N received via %s",
988 n->n.addr, e->sender->proto->name);
989 return 0;
990 }
ff2857b0 991
7fc55925
OZ
992 /* FIXME: better handling different nettypes */
993 c = !net_is_flow(n->n.addr) ?
994 net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE);
ff2857b0 995 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
fe9f1a6d
OZ
996 {
997 log(L_WARN "Ignoring bogus route %N received via %s",
998 n->n.addr, e->sender->proto->name);
999 return 0;
1000 }
ff2857b0 1001
4278abfe
OZ
1002 if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
1003 {
1004 log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
1005 n->n.addr, e->attrs->dest, e->sender->proto->name);
1006 return 0;
1007 }
1008
4e276a89 1009 if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
4278abfe
OZ
1010 {
1011 log(L_WARN "Ignoring unsorted multipath route %N received via %s",
1012 n->n.addr, e->sender->proto->name);
1013 return 0;
1014 }
84cac51a 1015
421838ff
MM
1016 return 1;
1017}
1018
58740ed4
MM
1019/**
1020 * rte_free - delete a &rte
1021 * @e: &rte to be deleted
1022 *
1023 * rte_free() deletes the given &rte from the routing table it's linked to.
1024 */
04925e90 1025void
2326b001 1026rte_free(rte *e)
04925e90 1027{
094d2bdb 1028 if (rta_is_cached(e->attrs))
04925e90
MM
1029 rta_free(e->attrs);
1030 sl_free(rte_slab, e);
1031}
1032
1033static inline void
1034rte_free_quick(rte *e)
2326b001
MM
1035{
1036 rta_free(e->attrs);
1037 sl_free(rte_slab, e);
1038}
1039
67be5b23
MM
1040static int
1041rte_same(rte *x, rte *y)
1042{
93af78d2 1043 /* rte.flags are not checked, as they are mostly internal to rtable */
67be5b23
MM
1044 return
1045 x->attrs == y->attrs &&
67be5b23
MM
1046 x->pflags == y->pflags &&
1047 x->pref == y->pref &&
93af78d2
OZ
1048 (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y)) &&
1049 rte_is_filtered(x) == rte_is_filtered(y);
67be5b23
MM
1050}
1051
70577529
OZ
1052static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
1053
e2dc2f30 1054static void
f4a60a9b 1055rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
2326b001 1056{
f4a60a9b
OZ
1057 struct proto *p = c->proto;
1058 struct rtable *table = c->table;
1059 struct proto_stats *stats = &c->stats;
1123e707 1060 static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
00a09f3c 1061 rte *before_old = NULL;
2326b001
MM
1062 rte *old_best = net->routes;
1063 rte *old = NULL;
00a09f3c 1064 rte **k;
2326b001
MM
1065
1066 k = &net->routes; /* Find and remove original route from the same protocol */
1067 while (old = *k)
1068 {
094d2bdb 1069 if (old->attrs->src == src)
2326b001 1070 {
11787b84
OZ
1071 /* If there is the same route in the routing table but from
1072 * a different sender, then there are two paths from the
1073 * source protocol to this routing table through transparent
1074 * pipes, which is not allowed.
1075 *
1076 * We log that and ignore the route. If it is withdraw, we
1077 * ignore it completely (there might be 'spurious withdraws',
1078 * see FIXME in do_rte_announce())
1079 */
c0adf7e9 1080 if (old->sender->proto != p)
11787b84
OZ
1081 {
1082 if (new)
1083 {
fe9f1a6d
OZ
1084 log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
1085 net->n.addr, table->name);
11787b84
OZ
1086 rte_free_quick(new);
1087 }
1088 return;
1089 }
1090
0b761098 1091 if (new && rte_same(old, new))
67be5b23 1092 {
93af78d2
OZ
1093 /* No changes, ignore the new route and refresh the old one */
1094
1095 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
cf98be7b 1096
15550957 1097 if (!rte_is_filtered(new))
cf98be7b
OZ
1098 {
1099 stats->imp_updates_ignored++;
1100 rte_trace_in(D_ROUTES, p, new, "ignored");
1101 }
1102
67be5b23 1103 rte_free_quick(new);
67be5b23
MM
1104 return;
1105 }
2326b001 1106 *k = old->next;
67d8665a 1107 table->rt_count--;
2326b001
MM
1108 break;
1109 }
1110 k = &old->next;
00a09f3c 1111 before_old = old;
2326b001
MM
1112 }
1113
00a09f3c
OZ
1114 if (!old)
1115 before_old = NULL;
1116
925fe2d3
OZ
1117 if (!old && !new)
1118 {
9db74169 1119 stats->imp_withdraws_ignored++;
925fe2d3
OZ
1120 return;
1121 }
1122
b662290f
OZ
1123 int new_ok = rte_is_ok(new);
1124 int old_ok = rte_is_ok(old);
1125
f4a60a9b 1126 struct channel_limit *l = &c->rx_limit;
67d8665a 1127 if (l->action && !old && new && !c->in_table)
ebecb6f6 1128 {
15550957 1129 u32 all_routes = stats->imp_routes + stats->filt_routes;
cf98be7b
OZ
1130
1131 if (all_routes >= l->limit)
f4a60a9b 1132 channel_notify_limit(c, l, PLD_RX, all_routes);
7d0a31de
OZ
1133
1134 if (l->state == PLS_BLOCKED)
1135 {
b662290f
OZ
1136 /* In receive limit the situation is simple, old is NULL so
1137 we just free new and exit like nothing happened */
1138
7d0a31de
OZ
1139 stats->imp_updates_ignored++;
1140 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1141 rte_free_quick(new);
1142 return;
1143 }
ebecb6f6
OZ
1144 }
1145
f4a60a9b
OZ
1146 l = &c->in_limit;
1147 if (l->action && !old_ok && new_ok)
b662290f
OZ
1148 {
1149 if (stats->imp_routes >= l->limit)
f4a60a9b 1150 channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
b662290f
OZ
1151
1152 if (l->state == PLS_BLOCKED)
1153 {
1154 /* In import limit the situation is more complicated. We
1155 shouldn't just drop the route, we should handle it like
1156 it was filtered. We also have to continue the route
1157 processing if old or new is non-NULL, but we should exit
1158 if both are NULL as this case is probably assumed to be
1159 already handled. */
1160
1161 stats->imp_updates_ignored++;
1162 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1163
f4a60a9b 1164 if (c->in_keep_filtered)
b662290f
OZ
1165 new->flags |= REF_FILTERED;
1166 else
1167 { rte_free_quick(new); new = NULL; }
1168
1169 /* Note that old && !new could be possible when
f4a60a9b 1170 c->in_keep_filtered changed in the recent past. */
b662290f
OZ
1171
1172 if (!old && !new)
1173 return;
1174
1175 new_ok = 0;
1176 goto skip_stats1;
1177 }
1178 }
70577529
OZ
1179
1180 if (new_ok)
9db74169 1181 stats->imp_updates_accepted++;
70577529 1182 else if (old_ok)
9db74169 1183 stats->imp_withdraws_accepted++;
70577529
OZ
1184 else
1185 stats->imp_withdraws_ignored++;
925fe2d3 1186
b662290f 1187 skip_stats1:
925fe2d3
OZ
1188
1189 if (new)
15550957 1190 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
925fe2d3 1191 if (old)
15550957 1192 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
925fe2d3 1193
26822d8f 1194 if (table->config->sorted)
2326b001 1195 {
26822d8f
OZ
1196 /* If routes are sorted, just insert new route to appropriate position */
1197 if (new)
1198 {
1199 if (before_old && !rte_better(new, before_old))
1200 k = &before_old->next;
1201 else
1202 k = &net->routes;
c0973621 1203
26822d8f
OZ
1204 for (; *k; k=&(*k)->next)
1205 if (rte_better(new, *k))
1206 break;
c0973621 1207
26822d8f
OZ
1208 new->next = *k;
1209 *k = new;
67d8665a 1210 table->rt_count++;
26822d8f 1211 }
2326b001 1212 }
26822d8f 1213 else
2326b001 1214 {
26822d8f
OZ
1215 /* If routes are not sorted, find the best route and move it on
1216 the first position. There are several optimized cases. */
1217
094d2bdb 1218 if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
26822d8f
OZ
1219 goto do_recalculate;
1220
1221 if (new && rte_better(new, old_best))
2326b001 1222 {
26822d8f
OZ
1223 /* The first case - the new route is cleary optimal,
1224 we link it at the first position */
1225
c0973621
OZ
1226 new->next = net->routes;
1227 net->routes = new;
67d8665a 1228 table->rt_count++;
c0973621 1229 }
26822d8f 1230 else if (old == old_best)
c0973621 1231 {
26822d8f
OZ
1232 /* The second case - the old best route disappeared, we add the
1233 new route (if we have any) to the list (we don't care about
1234 position) and then we elect the new optimal route and relink
1235 that route at the first position and announce it. New optimal
1236 route might be NULL if there is no more routes */
1237
1238 do_recalculate:
1239 /* Add the new route to the list */
1240 if (new)
2326b001 1241 {
26822d8f
OZ
1242 new->next = net->routes;
1243 net->routes = new;
67d8665a 1244 table->rt_count++;
26822d8f
OZ
1245 }
1246
1247 /* Find a new optimal route (if there is any) */
1248 if (net->routes)
1249 {
1250 rte **bp = &net->routes;
1251 for (k=&(*bp)->next; *k; k=&(*k)->next)
1252 if (rte_better(*k, *bp))
1253 bp = k;
1254
1255 /* And relink it */
1256 rte *best = *bp;
1257 *bp = best->next;
1258 best->next = net->routes;
1259 net->routes = best;
2326b001 1260 }
2326b001 1261 }
26822d8f
OZ
1262 else if (new)
1263 {
1264 /* The third case - the new route is not better than the old
1265 best route (therefore old_best != NULL) and the old best
1266 route was not removed (therefore old_best == net->routes).
1267 We just link the new route after the old best route. */
1268
1269 ASSERT(net->routes != NULL);
1270 new->next = net->routes->next;
1271 net->routes->next = new;
67d8665a 1272 table->rt_count++;
26822d8f
OZ
1273 }
1274 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
2326b001 1275 }
c0973621 1276
26822d8f 1277 if (new)
f047271c 1278 new->lastmod = current_time();
26822d8f
OZ
1279
1280 /* Log the route change */
70577529 1281 if (p->debug & D_ROUTES)
e8b29bdc 1282 {
70577529
OZ
1283 if (new_ok)
1284 rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1285 else if (old_ok)
1286 {
1287 if (old != old_best)
1288 rte_trace(p, old, '>', "removed");
1289 else if (rte_is_ok(net->routes))
1290 rte_trace(p, old, '>', "removed [replaced]");
1291 else
1292 rte_trace(p, old, '>', "removed [sole]");
1293 }
c0973621
OZ
1294 }
1295
26822d8f 1296 /* Propagate the route change */
8d9eef17 1297 rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
26822d8f 1298 if (net->routes != old_best)
8d9eef17 1299 rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
26822d8f 1300 if (table->config->sorted)
8d9eef17
OZ
1301 rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1302 rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
00a09f3c
OZ
1303
1304 if (!net->routes &&
1305 (table->gc_counter++ >= table->config->gc_max_ops) &&
f047271c 1306 (table->gc_time + table->config->gc_min_time <= current_time()))
f4a60a9b 1307 rt_schedule_prune(table);
00a09f3c 1308
70577529
OZ
1309 if (old_ok && p->rte_remove)
1310 p->rte_remove(net, old);
1311 if (new_ok && p->rte_insert)
1312 p->rte_insert(net, new);
1313
2326b001 1314 if (old)
70577529 1315 rte_free_quick(old);
5b22683d
MM
1316}
1317
e2dc2f30
MM
1318static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
1319
1320static inline void
1321rte_update_lock(void)
1322{
1323 rte_update_nest_cnt++;
1324}
1325
1326static inline void
1327rte_update_unlock(void)
1328{
1329 if (!--rte_update_nest_cnt)
1330 lp_flush(rte_update_pool);
1331}
1332
fad04c75
OZ
1333static inline void
1334rte_hide_dummy_routes(net *net, rte **dummy)
1335{
1336 if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1337 {
1338 *dummy = net->routes;
1339 net->routes = (*dummy)->next;
1340 }
1341}
1342
1343static inline void
1344rte_unhide_dummy_routes(net *net, rte **dummy)
1345{
1346 if (*dummy)
1347 {
1348 (*dummy)->next = net->routes;
1349 net->routes = *dummy;
1350 }
1351}
1352
58740ed4
MM
1353/**
1354 * rte_update - enter a new update to a routing table
1355 * @table: table to be updated
f4a60a9b 1356 * @c: channel doing the update
58740ed4
MM
1357 * @net: network node
1358 * @p: protocol submitting the update
f98e2915 1359 * @src: protocol originating the update
58740ed4
MM
1360 * @new: a &rte representing the new route or %NULL for route removal.
1361 *
1362 * This function is called by the routing protocols whenever they discover
1363 * a new route or wish to update/remove an existing route. The right announcement
2e9b2421 1364 * sequence is to build route attributes first (either un-cached with @aflags set
58740ed4
MM
1365 * to zero or a cached one using rta_lookup(); in this case please note that
1366 * you need to increase the use count of the attributes yourself by calling
1367 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1368 * the appropriate data and finally submit the new &rte by calling rte_update().
1369 *
f98e2915
OZ
1370 * @src specifies the protocol that originally created the route and the meaning
1371 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1372 * same value as @new->attrs->proto. @p specifies the protocol that called
1373 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1374 * stores @p in @new->sender;
1375 *
9a8f20fc
MM
1376 * When rte_update() gets any route, it automatically validates it (checks,
1377 * whether the network and next hop address are valid IP addresses and also
1378 * whether a normal routing protocol doesn't try to smuggle a host or link
1379 * scope route to the table), converts all protocol dependent attributes stored
1380 * in the &rte to temporary extended attributes, consults import filters of the
1381 * protocol to see if the route should be accepted and/or its attributes modified,
1382 * stores the temporary attributes back to the &rte.
1383 *
1384 * Now, having a "public" version of the route, we
f98e2915 1385 * automatically find any old route defined by the protocol @src
58740ed4
MM
1386 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1387 * recalculate the optimal route for this destination and finally broadcast
9a8f20fc 1388 * the change (if any) to all routing protocols by calling rte_announce().
3ce8c610
MM
1389 *
1390 * All memory used for attribute lists and other temporary allocations is taken
1391 * from a special linear pool @rte_update_pool and freed when rte_update()
1392 * finishes.
58740ed4 1393 */
23ac9e9a
OZ
1394
1395void
65d2a88d 1396rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
e2dc2f30 1397{
f4a60a9b
OZ
1398 struct proto *p = c->proto;
1399 struct proto_stats *stats = &c->stats;
1400 struct filter *filter = c->in_filter;
fad04c75 1401 rte *dummy = NULL;
2003a184 1402 net *nn;
e2dc2f30 1403
f4a60a9b
OZ
1404 ASSERT(c->channel_state == CS_UP);
1405
e2dc2f30
MM
1406 rte_update_lock();
1407 if (new)
1408 {
c65a9a05
MM
1409 /* Create a temporary table node */
1410 nn = alloca(sizeof(net) + n->length);
1411 memset(nn, 0, sizeof(net) + n->length);
1412 net_copy(nn->n.addr, n);
2003a184
JMM
1413
1414 new->net = nn;
f4a60a9b
OZ
1415 new->sender = c;
1416
1417 if (!new->pref)
1418 new->pref = c->preference;
40b65f94 1419
9db74169 1420 stats->imp_updates_received++;
cfd46ee4
MM
1421 if (!rte_validate(new))
1422 {
1423 rte_trace_in(D_FILTERS, p, new, "invalid");
9db74169 1424 stats->imp_updates_invalid++;
cfd46ee4
MM
1425 goto drop;
1426 }
cf98be7b 1427
40b65f94 1428 if (filter == FILTER_REJECT)
cfd46ee4 1429 {
9db74169 1430 stats->imp_updates_filtered++;
cfd46ee4 1431 rte_trace_in(D_FILTERS, p, new, "filtered out");
094d2bdb 1432
f4a60a9b 1433 if (! c->in_keep_filtered)
cf98be7b
OZ
1434 goto drop;
1435
1436 /* new is a private copy, i could modify it */
15550957 1437 new->flags |= REF_FILTERED;
cfd46ee4 1438 }
cf98be7b 1439 else
e2dc2f30 1440 {
13c0be19 1441 rte_make_tmp_attrs(&new, rte_update_pool);
cf98be7b 1442 if (filter && (filter != FILTER_REJECT))
cfd46ee4 1443 {
13c0be19
JMM
1444 ea_list *oldea = new->attrs->eattrs;
1445 int fr = f_run(filter, &new, rte_update_pool, 0);
cf98be7b
OZ
1446 if (fr > F_ACCEPT)
1447 {
1448 stats->imp_updates_filtered++;
1449 rte_trace_in(D_FILTERS, p, new, "filtered out");
1450
f4a60a9b 1451 if (! c->in_keep_filtered)
cf98be7b
OZ
1452 goto drop;
1453
15550957 1454 new->flags |= REF_FILTERED;
cf98be7b 1455 }
13c0be19
JMM
1456 if (new->attrs->eattrs != oldea && src->proto->store_tmp_attrs)
1457 src->proto->store_tmp_attrs(new);
cfd46ee4 1458 }
e2dc2f30 1459 }
094d2bdb 1460 if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
e2dc2f30
MM
1461 new->attrs = rta_lookup(new->attrs);
1462 new->flags |= REF_COW;
c65a9a05
MM
1463
1464 /* Use the actual struct network, not the dummy one */
1465 nn = net_get(c->table, n);
1466 new->net = nn;
e2dc2f30 1467 }
925fe2d3 1468 else
094d2bdb
OZ
1469 {
1470 stats->imp_withdraws_received++;
1471
2003a184 1472 if (!(nn = net_find(c->table, n)) || !src)
094d2bdb
OZ
1473 {
1474 stats->imp_withdraws_ignored++;
1475 rte_update_unlock();
1476 return;
1477 }
1478 }
925fe2d3 1479
fad04c75 1480 recalc:
c65a9a05 1481 /* And recalculate the best route */
2003a184
JMM
1482 rte_hide_dummy_routes(nn, &dummy);
1483 rte_recalculate(c, nn, new, src);
1484 rte_unhide_dummy_routes(nn, &dummy);
c65a9a05 1485
e2dc2f30
MM
1486 rte_update_unlock();
1487 return;
1488
fad04c75 1489 drop:
e2dc2f30 1490 rte_free(new);
fad04c75 1491 new = NULL;
c65a9a05
MM
1492 if (nn = net_find(c->table, n))
1493 goto recalc;
1494
1495 rte_update_unlock();
e2dc2f30
MM
1496}
1497
cfe34a31
OZ
1498/* Independent call to rte_announce(), used from next hop
1499 recalculation, outside of rte_update(). new must be non-NULL */
a82f692e 1500static inline void
8d9eef17
OZ
1501rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1502 rte *new_best, rte *old_best)
cfe34a31 1503{
cfe34a31 1504 rte_update_lock();
8d9eef17 1505 rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
cfe34a31
OZ
1506 rte_update_unlock();
1507}
1508
3e236955
JMM
1509static inline void
1510rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */
5b22683d 1511{
e2dc2f30 1512 rte_update_lock();
db027a41 1513 rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
e2dc2f30 1514 rte_update_unlock();
2326b001
MM
1515}
1516
5bd73431
OZ
1517/* Modify existing route by protocol hook, used for long-lived graceful restart */
1518static inline void
1519rte_modify(rte *old)
1520{
1521 rte_update_lock();
1522
1523 rte *new = old->sender->proto->rte_modify(old, rte_update_pool);
1524 if (new != old)
1525 {
1526 if (new)
1527 {
1528 if (!rta_is_cached(new->attrs))
1529 new->attrs = rta_lookup(new->attrs);
1530 new->flags = (old->flags & ~REF_MODIFY) | REF_COW;
1531 }
1532
1533 rte_recalculate(old->sender, old->net, new, old->attrs->src);
1534 }
1535
1536 rte_update_unlock();
1537}
1538
36da2857
OZ
1539/* Check rtable for best route to given net whether it would be exported do p */
1540int
fe9f1a6d 1541rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
36da2857 1542{
fe9f1a6d 1543 net *n = net_find(t, a);
36da2857
OZ
1544 rte *rt = n ? n->routes : NULL;
1545
1546 if (!rte_is_valid(rt))
1547 return 0;
1548
1549 rte_update_lock();
1550
1551 /* Rest is stripped down export_filter() */
14375237 1552 int v = p->preexport ? p->preexport(p, &rt, rte_update_pool) : 0;
36da2857 1553 if (v == RIC_PROCESS)
95488885
JMM
1554 {
1555 rte_make_tmp_attrs(&rt, rte_update_pool);
13c0be19 1556 v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT);
95488885 1557 }
36da2857 1558
95488885 1559 /* Discard temporary rte */
36da2857
OZ
1560 if (rt != n->routes)
1561 rte_free(rt);
1562
1563 rte_update_unlock();
1564
1565 return v > 0;
1566}
1567
6eda3f13
OZ
1568
1569/**
1570 * rt_refresh_begin - start a refresh cycle
1571 * @t: related routing table
f4a60a9b 1572 * @c related channel
6eda3f13
OZ
1573 *
1574 * This function starts a refresh cycle for given routing table and announce
1575 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1576 * routes to the routing table (by rte_update()). After that, all protocol
f4a60a9b 1577 * routes (more precisely routes with @c as @sender) not sent during the
6eda3f13
OZ
1578 * refresh cycle but still in the table from the past are pruned. This is
1579 * implemented by marking all related routes as stale by REF_STALE flag in
1580 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1581 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1582 */
0c791f87 1583void
f4a60a9b 1584rt_refresh_begin(rtable *t, struct channel *c)
0c791f87 1585{
600998fc 1586 FIB_WALK(&t->fib, net, n)
0c791f87 1587 {
600998fc 1588 rte *e;
0c791f87 1589 for (e = n->routes; e; e = e->next)
f4a60a9b 1590 if (e->sender == c)
0c791f87
OZ
1591 e->flags |= REF_STALE;
1592 }
1593 FIB_WALK_END;
1594}
1595
6eda3f13
OZ
1596/**
1597 * rt_refresh_end - end a refresh cycle
1598 * @t: related routing table
f4a60a9b 1599 * @c: related channel
6eda3f13 1600 *
f4a60a9b 1601 * This function ends a refresh cycle for given routing table and announce
6eda3f13
OZ
1602 * hook. See rt_refresh_begin() for description of refresh cycles.
1603 */
0c791f87 1604void
f4a60a9b 1605rt_refresh_end(rtable *t, struct channel *c)
0c791f87
OZ
1606{
1607 int prune = 0;
0c791f87 1608
600998fc 1609 FIB_WALK(&t->fib, net, n)
0c791f87 1610 {
600998fc 1611 rte *e;
0c791f87 1612 for (e = n->routes; e; e = e->next)
f4a60a9b 1613 if ((e->sender == c) && (e->flags & REF_STALE))
0c791f87
OZ
1614 {
1615 e->flags |= REF_DISCARD;
1616 prune = 1;
1617 }
1618 }
1619 FIB_WALK_END;
1620
1621 if (prune)
1622 rt_schedule_prune(t);
1623}
1624
5bd73431
OZ
1625void
1626rt_modify_stale(rtable *t, struct channel *c)
1627{
1628 int prune = 0;
1629
1630 FIB_WALK(&t->fib, net, n)
1631 {
1632 rte *e;
1633 for (e = n->routes; e; e = e->next)
1634 if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED))
1635 {
1636 e->flags |= REF_MODIFY;
1637 prune = 1;
1638 }
1639 }
1640 FIB_WALK_END;
1641
1642 if (prune)
1643 rt_schedule_prune(t);
1644}
0c791f87 1645
58740ed4
MM
1646/**
1647 * rte_dump - dump a route
1648 * @e: &rte to be dumped
1649 *
1650 * This functions dumps contents of a &rte to debug output.
1651 */
2326b001 1652void
a0762910 1653rte_dump(rte *e)
2326b001 1654{
a0762910 1655 net *n = e->net;
fe9f1a6d 1656 debug("%-1N ", n->n.addr);
f047271c 1657 debug("KF=%02x PF=%02x pref=%d ", n->n.flags, e->pflags, e->pref);
0cdbd397 1658 rta_dump(e->attrs);
094d2bdb
OZ
1659 if (e->attrs->src->proto->proto->dump_attrs)
1660 e->attrs->src->proto->proto->dump_attrs(e);
0cdbd397 1661 debug("\n");
2326b001 1662}
62aa008a 1663
58740ed4
MM
1664/**
1665 * rt_dump - dump a routing table
1666 * @t: routing table to be dumped
1667 *
1668 * This function dumps contents of a given routing table to debug output.
1669 */
2326b001
MM
1670void
1671rt_dump(rtable *t)
1672{
0cdbd397 1673 debug("Dump of routing table <%s>\n", t->name);
e440395d 1674#ifdef DEBUGGING
08e2d625 1675 fib_check(&t->fib);
e440395d 1676#endif
600998fc 1677 FIB_WALK(&t->fib, net, n)
08e2d625 1678 {
600998fc 1679 rte *e;
08e2d625
MM
1680 for(e=n->routes; e; e=e->next)
1681 rte_dump(e);
0cdbd397 1682 }
08e2d625 1683 FIB_WALK_END;
0cdbd397 1684 debug("\n");
2326b001 1685}
62aa008a 1686
58740ed4
MM
1687/**
1688 * rt_dump_all - dump all routing tables
1689 *
1690 * This function dumps contents of all routing tables to debug output.
1691 */
6d45cf21
MM
1692void
1693rt_dump_all(void)
1694{
0e02abfd
MM
1695 rtable *t;
1696
1697 WALK_LIST(t, routing_tables)
1698 rt_dump(t);
6d45cf21
MM
1699}
1700
cfe34a31
OZ
1701static inline void
1702rt_schedule_hcu(rtable *tab)
1703{
1704 if (tab->hcu_scheduled)
1705 return;
1706
1707 tab->hcu_scheduled = 1;
1708 ev_schedule(tab->rt_event);
1709}
1710
1711static inline void
1712rt_schedule_nhu(rtable *tab)
1713{
93f50ca3 1714 if (tab->nhu_state == NHU_CLEAN)
cfe34a31
OZ
1715 ev_schedule(tab->rt_event);
1716
93f50ca3
JMM
1717 /* state change:
1718 * NHU_CLEAN -> NHU_SCHEDULED
1719 * NHU_RUNNING -> NHU_DIRTY
1720 */
1721 tab->nhu_state |= NHU_SCHEDULED;
cfe34a31
OZ
1722}
1723
f4a60a9b
OZ
1724void
1725rt_schedule_prune(rtable *tab)
fb829de6 1726{
f4a60a9b
OZ
1727 if (tab->prune_state == 0)
1728 ev_schedule(tab->rt_event);
fb829de6 1729
f4a60a9b
OZ
1730 /* state change 0->1, 2->3 */
1731 tab->prune_state |= 1;
fb829de6
OZ
1732}
1733
f4a60a9b 1734
8f6accb5 1735static void
cfe34a31 1736rt_event(void *ptr)
5996da6a 1737{
cfe34a31
OZ
1738 rtable *tab = ptr;
1739
286e2011
OZ
1740 rt_lock_table(tab);
1741
cfe34a31
OZ
1742 if (tab->hcu_scheduled)
1743 rt_update_hostcache(tab);
0e02abfd 1744
cfe34a31
OZ
1745 if (tab->nhu_state)
1746 rt_next_hop_update(tab);
1747
0c791f87 1748 if (tab->prune_state)
f4a60a9b 1749 rt_prune_table(tab);
286e2011
OZ
1750
1751 rt_unlock_table(tab);
5996da6a
MM
1752}
1753
b9626ec6 1754void
28b3b551 1755rt_setup(pool *p, rtable *t, struct rtable_config *cf)
b9626ec6
MM
1756{
1757 bzero(t, sizeof(*t));
28b3b551 1758 t->name = cf->name;
b9626ec6 1759 t->config = cf;
28b3b551 1760 t->addr_type = cf->addr_type;
fe9f1a6d 1761 fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
f4a60a9b
OZ
1762 init_list(&t->channels);
1763
961671c0 1764 t->rt_event = ev_new_init(p, rt_event, t);
28b3b551 1765 t->gc_time = current_time();
b9626ec6
MM
1766}
1767
58740ed4
MM
1768/**
1769 * rt_init - initialize routing tables
1770 *
1771 * This function is called during BIRD startup. It initializes the
1772 * routing table module.
1773 */
2326b001
MM
1774void
1775rt_init(void)
1776{
1777 rta_init();
5996da6a 1778 rt_table_pool = rp_new(&root_pool, "Routing tables");
05d47bd5 1779 rte_update_pool = lp_new_default(rt_table_pool);
5996da6a 1780 rte_slab = sl_new(rt_table_pool, sizeof(rte));
0e02abfd 1781 init_list(&routing_tables);
2326b001 1782}
1a54b1c6 1783
fb829de6 1784
f4a60a9b
OZ
1785/**
1786 * rt_prune_table - prune a routing table
1787 *
1788 * The prune loop scans routing tables and removes routes belonging to flushing
1789 * protocols, discarded routes and also stale network entries. It is called from
1790 * rt_event(). The event is rescheduled if the current iteration do not finish
1791 * the table. The pruning is directed by the prune state (@prune_state),
1792 * specifying whether the prune cycle is scheduled or running, and there
1793 * is also a persistent pruning iterator (@prune_fit).
1794 *
1795 * The prune loop is used also for channel flushing. For this purpose, the
1796 * channels to flush are marked before the iteration and notified after the
1797 * iteration.
1798 */
1799static void
1800rt_prune_table(rtable *tab)
fb829de6
OZ
1801{
1802 struct fib_iterator *fit = &tab->prune_fit;
f4a60a9b
OZ
1803 int limit = 512;
1804
1805 struct channel *c;
1806 node *n, *x;
1a54b1c6
MM
1807
1808 DBG("Pruning route table %s\n", tab->name);
0521e4f6
MM
1809#ifdef DEBUGGING
1810 fib_check(&tab->fib);
1811#endif
fb829de6 1812
f4a60a9b
OZ
1813 if (tab->prune_state == 0)
1814 return;
fb829de6 1815
f4a60a9b
OZ
1816 if (tab->prune_state == 1)
1817 {
1818 /* Mark channels to flush */
1819 WALK_LIST2(c, n, tab->channels, table_node)
1820 if (c->channel_state == CS_FLUSHING)
1821 c->flush_active = 1;
1822
1823 FIB_ITERATE_INIT(fit, &tab->fib);
1824 tab->prune_state = 2;
1825 }
fb829de6 1826
08e2d625 1827again:
600998fc 1828 FIB_ITERATE_START(&tab->fib, fit, net, n)
1a54b1c6 1829 {
08e2d625 1830 rte *e;
fb829de6 1831
08e2d625 1832 rescan:
fb829de6 1833 for (e=n->routes; e; e=e->next)
5bd73431 1834 {
f4a60a9b 1835 if (e->sender->flush_active || (e->flags & REF_DISCARD))
08e2d625 1836 {
f4a60a9b 1837 if (limit <= 0)
fb829de6 1838 {
600998fc 1839 FIB_ITERATE_PUT(fit);
f4a60a9b
OZ
1840 ev_schedule(tab->rt_event);
1841 return;
fb829de6
OZ
1842 }
1843
3e236955 1844 rte_discard(e);
f4a60a9b 1845 limit--;
fb829de6 1846
08e2d625
MM
1847 goto rescan;
1848 }
f4a60a9b 1849
5bd73431
OZ
1850 if (e->flags & REF_MODIFY)
1851 {
1852 if (limit <= 0)
1853 {
1854 FIB_ITERATE_PUT(fit);
1855 ev_schedule(tab->rt_event);
1856 return;
1857 }
1858
1859 rte_modify(e);
1860 limit--;
1861
1862 goto rescan;
1863 }
1864 }
1865
fb829de6 1866 if (!n->routes) /* Orphaned FIB entry */
1a54b1c6 1867 {
600998fc
OZ
1868 FIB_ITERATE_PUT(fit);
1869 fib_delete(&tab->fib, n);
08e2d625 1870 goto again;
1a54b1c6 1871 }
1a54b1c6 1872 }
600998fc 1873 FIB_ITERATE_END;
fb829de6 1874
0521e4f6
MM
1875#ifdef DEBUGGING
1876 fib_check(&tab->fib);
1877#endif
fb829de6 1878
f4a60a9b 1879 tab->gc_counter = 0;
f047271c 1880 tab->gc_time = current_time();
0e02abfd 1881
f4a60a9b
OZ
1882 /* state change 2->0, 3->1 */
1883 tab->prune_state &= 1;
0c791f87 1884
f4a60a9b
OZ
1885 if (tab->prune_state > 0)
1886 ev_schedule(tab->rt_event);
0e02abfd 1887
f4a60a9b
OZ
1888 /* FIXME: This should be handled in a better way */
1889 rt_prune_sources();
fb829de6 1890
f4a60a9b
OZ
1891 /* Close flushed channels */
1892 WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1893 if (c->flush_active)
1894 {
1895 c->flush_active = 0;
286e2011 1896 channel_set_state(c, CS_DOWN);
f4a60a9b
OZ
1897 }
1898
1899 return;
0e02abfd
MM
1900}
1901
cfe34a31
OZ
1902void
1903rt_preconfig(struct config *c)
1904{
cfe34a31 1905 init_list(&c->tables);
f4a60a9b
OZ
1906
1907 rt_new_table(cf_get_symbol("master4"), NET_IP4);
1908 rt_new_table(cf_get_symbol("master6"), NET_IP6);
cfe34a31
OZ
1909}
1910
1911
f4a60a9b 1912/*
cfe34a31
OZ
1913 * Some functions for handing internal next hop updates
1914 * triggered by rt_schedule_nhu().
1915 */
1916
cfe34a31
OZ
1917static inline int
1918rta_next_hop_outdated(rta *a)
1919{
1920 struct hostentry *he = a->hostentry;
7e95c05d
OZ
1921
1922 if (!he)
1923 return 0;
1924
1925 if (!he->src)
1926 return a->dest != RTD_UNREACHABLE;
1927
4e276a89 1928 return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
039a65d0 1929 (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
cfe34a31
OZ
1930}
1931
1e37e35c 1932void
3c744164 1933rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
cfe34a31
OZ
1934{
1935 a->hostentry = he;
cfe34a31 1936 a->dest = he->dest;
d1e146f2 1937 a->igp_metric = he->igp_metric;
d47c3d64 1938
3c744164 1939 if (a->dest != RTD_UNICAST)
d47c3d64 1940 {
3c744164
JMM
1941 /* No nexthop */
1942no_nexthop:
1943 a->nh = (struct nexthop) {};
1944 if (mls)
1945 { /* Store the label stack for later changes */
1946 a->nh.labels_orig = a->nh.labels = mls->len;
1947 memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
1948 }
d47c3d64
JMM
1949 return;
1950 }
1951
3c744164
JMM
1952 if (((!mls) || (!mls->len)) && he->nexthop_linkable)
1953 { /* Just link the nexthop chain, no label append happens. */
1954 memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
1955 return;
1956 }
1957
1958 struct nexthop *nhp = NULL, *nhr = NULL;
1959 int skip_nexthop = 0;
1e37e35c 1960
3c744164 1961 for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
d47c3d64 1962 {
3c744164
JMM
1963 if (skip_nexthop)
1964 skip_nexthop--;
1965 else
1966 {
1967 nhr = nhp;
1968 nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
1969 }
039a65d0 1970
3c744164
JMM
1971 nhp->iface = nh->iface;
1972 nhp->weight = nh->weight;
1973 if (mls)
d47c3d64 1974 {
3c744164
JMM
1975 nhp->labels = nh->labels + mls->len;
1976 nhp->labels_orig = mls->len;
039a65d0
JMM
1977 if (nhp->labels <= MPLS_MAX_LABEL_STACK)
1978 {
1979 memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
3c744164 1980 memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
039a65d0
JMM
1981 }
1982 else
1983 {
1984 log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
3c744164
JMM
1985 nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
1986 skip_nexthop++;
039a65d0
JMM
1987 continue;
1988 }
d47c3d64 1989 }
3c744164 1990 if (ipa_nonzero(nh->gw))
a1f5e514
OZ
1991 {
1992 nhp->gw = nh->gw; /* Router nexthop */
1993 nhp->flags |= (nh->flags & RNF_ONLINK);
1994 }
3c744164
JMM
1995 else if (ipa_nonzero(he->link))
1996 nhp->gw = he->link; /* Device nexthop with link-local address known */
1997 else
1998 nhp->gw = he->addr; /* Device nexthop with link-local address unknown */
d47c3d64 1999 }
039a65d0 2000
3c744164
JMM
2001 if (skip_nexthop)
2002 if (nhr)
2003 nhr->next = NULL;
2004 else
2005 {
2006 a->dest = RTD_UNREACHABLE;
2007 log(L_WARN "No valid nexthop remaining, setting route unreachable");
2008 goto no_nexthop;
2009 }
cfe34a31
OZ
2010}
2011
2012static inline rte *
3e236955 2013rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
cfe34a31 2014{
62e64905
OZ
2015 rta *a = alloca(RTA_MAX_SIZE);
2016 memcpy(a, old->attrs, rta_size(old->attrs));
3c744164
JMM
2017
2018 mpls_label_stack mls = { .len = a->nh.labels_orig };
2019 memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
2020
2021 rta_apply_hostentry(a, old->attrs->hostentry, &mls);
62e64905 2022 a->aflags = 0;
cfe34a31
OZ
2023
2024 rte *e = sl_alloc(rte_slab);
2025 memcpy(e, old, sizeof(rte));
62e64905 2026 e->attrs = rta_lookup(a);
cfe34a31
OZ
2027
2028 return e;
2029}
2030
2031static inline int
2032rt_next_hop_update_net(rtable *tab, net *n)
2033{
2034 rte **k, *e, *new, *old_best, **new_best;
2035 int count = 0;
2036 int free_old_best = 0;
2037
2038 old_best = n->routes;
2039 if (!old_best)
2040 return 0;
2041
cfe34a31 2042 for (k = &n->routes; e = *k; k = &e->next)
be4cd99a
OZ
2043 if (rta_next_hop_outdated(e->attrs))
2044 {
2045 new = rt_next_hop_update_rte(tab, e);
2046 *k = new;
cfe34a31 2047
8d9eef17 2048 rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
c0adf7e9 2049 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
cfe34a31 2050
be4cd99a
OZ
2051 /* Call a pre-comparison hook */
2052 /* Not really an efficient way to compute this */
094d2bdb
OZ
2053 if (e->attrs->src->proto->rte_recalculate)
2054 e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
cfe34a31 2055
be4cd99a
OZ
2056 if (e != old_best)
2057 rte_free_quick(e);
2058 else /* Freeing of the old best rte is postponed */
2059 free_old_best = 1;
cfe34a31 2060
be4cd99a
OZ
2061 e = new;
2062 count++;
2063 }
2064
2065 if (!count)
2066 return 0;
2067
2068 /* Find the new best route */
2069 new_best = NULL;
2070 for (k = &n->routes; e = *k; k = &e->next)
2071 {
cfe34a31
OZ
2072 if (!new_best || rte_better(e, *new_best))
2073 new_best = k;
2074 }
2075
2076 /* Relink the new best route to the first position */
2077 new = *new_best;
2078 if (new != n->routes)
2079 {
2080 *new_best = new->next;
2081 new->next = n->routes;
2082 n->routes = new;
2083 }
2084
2085 /* Announce the new best route */
2086 if (new != old_best)
2087 {
8d9eef17 2088 rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
c0adf7e9 2089 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
cfe34a31
OZ
2090 }
2091
8d9eef17
OZ
2092 /* FIXME: Better announcement of merged routes */
2093 rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
2094
d107ef78 2095 if (free_old_best)
cfe34a31
OZ
2096 rte_free_quick(old_best);
2097
2098 return count;
2099}
2100
2101static void
2102rt_next_hop_update(rtable *tab)
2103{
2104 struct fib_iterator *fit = &tab->nhu_fit;
2105 int max_feed = 32;
2106
93f50ca3 2107 if (tab->nhu_state == NHU_CLEAN)
cfe34a31
OZ
2108 return;
2109
93f50ca3 2110 if (tab->nhu_state == NHU_SCHEDULED)
cfe34a31
OZ
2111 {
2112 FIB_ITERATE_INIT(fit, &tab->fib);
93f50ca3 2113 tab->nhu_state = NHU_RUNNING;
cfe34a31
OZ
2114 }
2115
600998fc 2116 FIB_ITERATE_START(&tab->fib, fit, net, n)
cfe34a31
OZ
2117 {
2118 if (max_feed <= 0)
2119 {
600998fc 2120 FIB_ITERATE_PUT(fit);
cfe34a31
OZ
2121 ev_schedule(tab->rt_event);
2122 return;
2123 }
600998fc 2124 max_feed -= rt_next_hop_update_net(tab, n);
cfe34a31 2125 }
600998fc 2126 FIB_ITERATE_END;
cfe34a31 2127
93f50ca3
JMM
2128 /* State change:
2129 * NHU_DIRTY -> NHU_SCHEDULED
2130 * NHU_RUNNING -> NHU_CLEAN
2131 */
cfe34a31
OZ
2132 tab->nhu_state &= 1;
2133
93f50ca3 2134 if (tab->nhu_state != NHU_CLEAN)
cfe34a31
OZ
2135 ev_schedule(tab->rt_event);
2136}
2137
2138
b9626ec6 2139struct rtable_config *
fe9f1a6d 2140rt_new_table(struct symbol *s, uint addr_type)
b9626ec6 2141{
36415e4b 2142 /* Hack that allows to 'redefine' the master table */
f4a60a9b
OZ
2143 if ((s->class == SYM_TABLE) &&
2144 (s->def == new_config->def_tables[addr_type]) &&
2145 ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
36415e4b
OZ
2146 return s->def;
2147
b9626ec6
MM
2148 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
2149
2150 cf_define_symbol(s, SYM_TABLE, c);
2151 c->name = s->name;
fe9f1a6d 2152 c->addr_type = addr_type;
2eca3b3a 2153 c->gc_max_ops = 1000;
b9626ec6 2154 c->gc_min_time = 5;
f4a60a9b
OZ
2155
2156 add_tail(&new_config->tables, &c->n);
2157
2158 /* First table of each type is kept as default */
2159 if (! new_config->def_tables[addr_type])
2160 new_config->def_tables[addr_type] = c;
2161
b9626ec6
MM
2162 return c;
2163}
2164
58740ed4
MM
2165/**
2166 * rt_lock_table - lock a routing table
2167 * @r: routing table to be locked
2168 *
2169 * Lock a routing table, because it's in use by a protocol,
2170 * preventing it from being freed when it gets undefined in a new
2171 * configuration.
2172 */
0e02abfd 2173void
50fe90ed 2174rt_lock_table(rtable *r)
0e02abfd 2175{
50fe90ed
MM
2176 r->use_count++;
2177}
2178
58740ed4
MM
2179/**
2180 * rt_unlock_table - unlock a routing table
2181 * @r: routing table to be unlocked
2182 *
2183 * Unlock a routing table formerly locked by rt_lock_table(),
2184 * that is decrease its use count and delete it if it's scheduled
2185 * for deletion by configuration changes.
2186 */
50fe90ed
MM
2187void
2188rt_unlock_table(rtable *r)
2189{
2190 if (!--r->use_count && r->deleted)
2191 {
2192 struct config *conf = r->deleted;
2193 DBG("Deleting routing table %s\n", r->name);
86b4e170 2194 r->config->table = NULL;
cfe34a31
OZ
2195 if (r->hostcache)
2196 rt_free_hostcache(r);
50fe90ed
MM
2197 rem_node(&r->n);
2198 fib_free(&r->fib);
cfe34a31 2199 rfree(r->rt_event);
50fe90ed
MM
2200 mb_free(r);
2201 config_del_obstacle(conf);
2202 }
2203}
2204
bcb4af81
OZ
2205static struct rtable_config *
2206rt_find_table_config(struct config *cf, char *name)
2207{
2208 struct symbol *sym = cf_find_symbol(cf, name);
2209 return (sym && (sym->class == SYM_TABLE)) ? sym->def : NULL;
2210}
2211
58740ed4
MM
2212/**
2213 * rt_commit - commit new routing table configuration
2214 * @new: new configuration
2215 * @old: original configuration or %NULL if it's boot time config
2216 *
2217 * Scan differences between @old and @new configuration and modify
2218 * the routing tables according to these changes. If @new defines a
2219 * previously unknown table, create it, if it omits a table existing
2220 * in @old, schedule it for deletion (it gets deleted when all protocols
2221 * disconnect from it by calling rt_unlock_table()), if it exists
2222 * in both configurations, leave it unchanged.
2223 */
50fe90ed
MM
2224void
2225rt_commit(struct config *new, struct config *old)
2226{
2227 struct rtable_config *o, *r;
0e02abfd 2228
50fe90ed
MM
2229 DBG("rt_commit:\n");
2230 if (old)
0e02abfd 2231 {
50fe90ed
MM
2232 WALK_LIST(o, old->tables)
2233 {
2234 rtable *ot = o->table;
2235 if (!ot->deleted)
2236 {
bcb4af81
OZ
2237 r = rt_find_table_config(new, o->name);
2238 if (r && (r->addr_type == o->addr_type) && !new->shutdown)
50fe90ed
MM
2239 {
2240 DBG("\t%s: same\n", o->name);
50fe90ed
MM
2241 r->table = ot;
2242 ot->name = r->name;
b9626ec6 2243 ot->config = r;
26822d8f
OZ
2244 if (o->sorted != r->sorted)
2245 log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
50fe90ed
MM
2246 }
2247 else
2248 {
bf8558bc 2249 DBG("\t%s: deleted\n", o->name);
50fe90ed
MM
2250 ot->deleted = old;
2251 config_add_obstacle(old);
2252 rt_lock_table(ot);
2253 rt_unlock_table(ot);
2254 }
2255 }
2256 }
0e02abfd 2257 }
50fe90ed
MM
2258
2259 WALK_LIST(r, new->tables)
2260 if (!r->table)
2261 {
2262 rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2263 DBG("\t%s: created\n", r->name);
28b3b551 2264 rt_setup(rt_table_pool, t, r);
50fe90ed
MM
2265 add_tail(&routing_tables, &t->n);
2266 r->table = t;
2267 }
2268 DBG("\tdone\n");
0e02abfd 2269}
730f2e2c 2270
23ac9e9a 2271static inline void
f4a60a9b 2272do_feed_channel(struct channel *c, net *n, rte *e)
23ac9e9a 2273{
23ac9e9a 2274 rte_update_lock();
f4a60a9b
OZ
2275 if (c->ra_mode == RA_ACCEPTED)
2276 rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2277 else if (c->ra_mode == RA_MERGED)
2278 rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2279 else /* RA_BASIC */
2280 rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
23ac9e9a
OZ
2281 rte_update_unlock();
2282}
2283
58740ed4 2284/**
f4a60a9b
OZ
2285 * rt_feed_channel - advertise all routes to a channel
2286 * @c: channel to be fed
58740ed4 2287 *
f4a60a9b
OZ
2288 * This function performs one pass of advertisement of routes to a channel that
2289 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2290 * has something to do. (We avoid transferring all the routes in single pass in
2291 * order not to monopolize CPU time.)
58740ed4 2292 */
ac5d8012 2293int
f4a60a9b 2294rt_feed_channel(struct channel *c)
ac5d8012 2295{
f4a60a9b 2296 struct fib_iterator *fit = &c->feed_fit;
76dfda9e 2297 int max_feed = 256;
ac5d8012 2298
f4a60a9b
OZ
2299 ASSERT(c->export_state == ES_FEEDING);
2300
2301 if (!c->feed_active)
ac5d8012 2302 {
f4a60a9b
OZ
2303 FIB_ITERATE_INIT(fit, &c->table->fib);
2304 c->feed_active = 1;
ac5d8012 2305 }
ac5d8012 2306
f4a60a9b 2307 FIB_ITERATE_START(&c->table->fib, fit, net, n)
ac5d8012 2308 {
258d0ad4 2309 rte *e = n->routes;
76dfda9e
MM
2310 if (max_feed <= 0)
2311 {
600998fc 2312 FIB_ITERATE_PUT(fit);
76dfda9e
MM
2313 return 0;
2314 }
23ac9e9a 2315
f4a60a9b 2316 /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
cf98be7b 2317
f4a60a9b
OZ
2318 if ((c->ra_mode == RA_OPTIMAL) ||
2319 (c->ra_mode == RA_ACCEPTED) ||
2320 (c->ra_mode == RA_MERGED))
cf98be7b 2321 if (rte_is_valid(e))
23ac9e9a 2322 {
f4a60a9b
OZ
2323 /* In the meantime, the protocol may fell down */
2324 if (c->export_state != ES_FEEDING)
2325 goto done;
ca34698c 2326
f4a60a9b 2327 do_feed_channel(c, n, e);
23ac9e9a
OZ
2328 max_feed--;
2329 }
2330
f4a60a9b 2331 if (c->ra_mode == RA_ANY)
ca34698c 2332 for(e = n->routes; e; e = e->next)
23ac9e9a 2333 {
f4a60a9b
OZ
2334 /* In the meantime, the protocol may fell down */
2335 if (c->export_state != ES_FEEDING)
2336 goto done;
ca34698c
OZ
2337
2338 if (!rte_is_valid(e))
2339 continue;
2340
f4a60a9b 2341 do_feed_channel(c, n, e);
23ac9e9a
OZ
2342 max_feed--;
2343 }
ac5d8012 2344 }
600998fc 2345 FIB_ITERATE_END;
ac5d8012 2346
f4a60a9b
OZ
2347done:
2348 c->feed_active = 0;
2349 return 1;
ac5d8012
MM
2350}
2351
58740ed4
MM
2352/**
2353 * rt_feed_baby_abort - abort protocol feeding
f4a60a9b 2354 * @c: channel
58740ed4 2355 *
f4a60a9b
OZ
2356 * This function is called by the protocol code when the protocol stops or
2357 * ceases to exist during the feeding.
58740ed4 2358 */
ac5d8012 2359void
f4a60a9b 2360rt_feed_channel_abort(struct channel *c)
ac5d8012 2361{
f4a60a9b 2362 if (c->feed_active)
ac5d8012 2363 {
f4a60a9b
OZ
2364 /* Unlink the iterator */
2365 fit_get(&c->table->fib, &c->feed_fit);
2366 c->feed_active = 0;
ac5d8012
MM
2367 }
2368}
2369
682d3f7d
OZ
2370
2371int
2372rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
2373{
67d8665a 2374 struct rtable *tab = c->in_table;
682d3f7d
OZ
2375 rte *old, **pos;
2376 net *net;
2377
2378 if (new)
2379 {
67d8665a 2380 net = net_get(tab, n);
682d3f7d
OZ
2381
2382 if (!new->pref)
2383 new->pref = c->preference;
2384
2385 if (!rta_is_cached(new->attrs))
2386 new->attrs = rta_lookup(new->attrs);
2387 }
2388 else
2389 {
67d8665a 2390 net = net_find(tab, n);
682d3f7d
OZ
2391
2392 if (!net)
67d8665a 2393 goto drop_withdraw;
682d3f7d
OZ
2394 }
2395
2396 /* Find the old rte */
2397 for (pos = &net->routes; old = *pos; pos = &old->next)
2398 if (old->attrs->src == src)
2399 {
2400 if (new && rte_same(old, new))
93af78d2
OZ
2401 {
2402 /* Refresh the old rte, continue with update to main rtable */
2403 if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
2404 {
2405 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
2406 return 1;
2407 }
2408
67d8665a 2409 goto drop_update;
93af78d2 2410 }
682d3f7d
OZ
2411
2412 /* Remove the old rte */
2413 *pos = old->next;
2414 rte_free_quick(old);
67d8665a 2415 tab->rt_count--;
682d3f7d
OZ
2416
2417 break;
2418 }
2419
2420 if (!new)
67d8665a
OZ
2421 {
2422 if (!old)
2423 goto drop_withdraw;
2424
2425 return 1;
2426 }
2427
2428 struct channel_limit *l = &c->rx_limit;
2429 if (l->action && !old)
2430 {
2431 if (tab->rt_count >= l->limit)
2432 channel_notify_limit(c, l, PLD_RX, tab->rt_count);
2433
2434 if (l->state == PLS_BLOCKED)
2435 {
2436 rte_trace_in(D_FILTERS, c->proto, new, "ignored [limit]");
2437 goto drop_update;
2438 }
2439 }
682d3f7d
OZ
2440
2441 /* Insert the new rte */
2442 rte *e = rte_do_cow(new);
2443 e->flags |= REF_COW;
2444 e->net = net;
2445 e->sender = c;
2446 e->lastmod = current_time();
2447 e->next = *pos;
2448 *pos = e;
67d8665a 2449 tab->rt_count++;
682d3f7d 2450 return 1;
67d8665a
OZ
2451
2452drop_update:
2453 c->stats.imp_updates_received++;
2454 c->stats.imp_updates_ignored++;
2455 rte_free(new);
2456 return 0;
2457
2458drop_withdraw:
2459 c->stats.imp_withdraws_received++;
2460 c->stats.imp_withdraws_ignored++;
2461 return 0;
682d3f7d
OZ
2462}
2463
2464int
2465rt_reload_channel(struct channel *c)
2466{
2467 struct rtable *tab = c->in_table;
2468 struct fib_iterator *fit = &c->reload_fit;
2469 int max_feed = 64;
2470
2471 ASSERT(c->channel_state == CS_UP);
2472
2473 if (!c->reload_active)
2474 {
2475 FIB_ITERATE_INIT(fit, &tab->fib);
2476 c->reload_active = 1;
2477 }
2478
2479 FIB_ITERATE_START(&tab->fib, fit, net, n)
2480 {
2481 if (max_feed <= 0)
2482 {
2483 FIB_ITERATE_PUT(fit);
2484 return 0;
2485 }
2486
2487 for (rte *e = n->routes; e; e = e->next)
2488 {
2489 rte_update2(c, n->n.addr, rte_do_cow(e), e->attrs->src);
2490 max_feed--;
2491 }
2492 }
2493 FIB_ITERATE_END;
2494
2495 c->reload_active = 0;
2496 return 1;
2497}
2498
2499void
2500rt_reload_channel_abort(struct channel *c)
2501{
2502 if (c->reload_active)
2503 {
2504 /* Unlink the iterator */
2505 fit_get(&c->in_table->fib, &c->reload_fit);
2506 c->reload_active = 0;
2507 }
2508}
2509
2510void
2511rt_prune_sync(rtable *t, int all)
2512{
2513 FIB_WALK(&t->fib, net, n)
2514 {
2515 rte *e, **ee = &n->routes;
2516 while (e = *ee)
2517 {
2518 if (all || (e->flags & (REF_STALE | REF_DISCARD)))
2519 {
2520 *ee = e->next;
2521 rte_free_quick(e);
67d8665a 2522 t->rt_count--;
682d3f7d
OZ
2523 }
2524 else
2525 ee = &e->next;
2526 }
2527 }
2528 FIB_WALK_END;
2529}
2530
2531
04632fd7 2532static inline u32
f2b76f2c
OZ
2533hc_hash(ip_addr a, rtable *dep)
2534{
04632fd7 2535 return ipa_hash(a) ^ ptr_hash(dep);
f2b76f2c
OZ
2536}
2537
2538static inline void
2539hc_insert(struct hostcache *hc, struct hostentry *he)
2540{
ae80a2de 2541 uint k = he->hash_key >> hc->hash_shift;
f2b76f2c
OZ
2542 he->next = hc->hash_table[k];
2543 hc->hash_table[k] = he;
2544}
2545
2546static inline void
2547hc_remove(struct hostcache *hc, struct hostentry *he)
2548{
2549 struct hostentry **hep;
ae80a2de 2550 uint k = he->hash_key >> hc->hash_shift;
f2b76f2c
OZ
2551
2552 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2553 *hep = he->next;
2554}
2555
2556#define HC_DEF_ORDER 10
2557#define HC_HI_MARK *4
2558#define HC_HI_STEP 2
2559#define HC_HI_ORDER 16 /* Must be at most 16 */
2560#define HC_LO_MARK /5
2561#define HC_LO_STEP 2
2562#define HC_LO_ORDER 10
2563
2564static void
2565hc_alloc_table(struct hostcache *hc, unsigned order)
2566{
3e236955 2567 uint hsize = 1 << order;
f2b76f2c 2568 hc->hash_order = order;
04632fd7 2569 hc->hash_shift = 32 - order;
3e236955
JMM
2570 hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2571 hc->hash_min = (order <= HC_LO_ORDER) ? 0U : (hsize HC_LO_MARK);
f2b76f2c
OZ
2572
2573 hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2574}
2575
cfe34a31 2576static void
f2b76f2c 2577hc_resize(struct hostcache *hc, unsigned new_order)
cfe34a31 2578{
f2b76f2c
OZ
2579 struct hostentry **old_table = hc->hash_table;
2580 struct hostentry *he, *hen;
3e236955
JMM
2581 uint old_size = 1 << hc->hash_order;
2582 uint i;
f2b76f2c
OZ
2583
2584 hc_alloc_table(hc, new_order);
2585 for (i = 0; i < old_size; i++)
2586 for (he = old_table[i]; he != NULL; he=hen)
2587 {
2588 hen = he->next;
2589 hc_insert(hc, he);
2590 }
2591 mb_free(old_table);
2592}
2593
2594static struct hostentry *
1b180121 2595hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
f2b76f2c
OZ
2596{
2597 struct hostentry *he = sl_alloc(hc->slab);
2598
039a65d0
JMM
2599 *he = (struct hostentry) {
2600 .addr = a,
2601 .link = ll,
2602 .tab = dep,
2603 .hash_key = k,
2604 };
f2b76f2c
OZ
2605
2606 add_tail(&hc->hostentries, &he->ln);
2607 hc_insert(hc, he);
2608
2609 hc->hash_items++;
2610 if (hc->hash_items > hc->hash_max)
2611 hc_resize(hc, hc->hash_order + HC_HI_STEP);
2612
2613 return he;
2614}
2615
2616static void
2617hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2618{
7e95c05d
OZ
2619 rta_free(he->src);
2620
f2b76f2c
OZ
2621 rem_node(&he->ln);
2622 hc_remove(hc, he);
2623 sl_free(hc->slab, he);
2624
2625 hc->hash_items--;
2626 if (hc->hash_items < hc->hash_min)
2627 hc_resize(hc, hc->hash_order - HC_LO_STEP);
cfe34a31
OZ
2628}
2629
2630static void
2631rt_init_hostcache(rtable *tab)
2632{
2633 struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2634 init_list(&hc->hostentries);
f2b76f2c
OZ
2635
2636 hc->hash_items = 0;
2637 hc_alloc_table(hc, HC_DEF_ORDER);
2638 hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2639
05d47bd5 2640 hc->lp = lp_new(rt_table_pool, LP_GOOD_SIZE(1024));
51762a45 2641 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
c477f489 2642
cfe34a31
OZ
2643 tab->hostcache = hc;
2644}
2645
2646static void
2647rt_free_hostcache(rtable *tab)
2648{
2649 struct hostcache *hc = tab->hostcache;
2650
2651 node *n;
2652 WALK_LIST(n, hc->hostentries)
2653 {
2654 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
7e95c05d
OZ
2655 rta_free(he->src);
2656
cfe34a31
OZ
2657 if (he->uc)
2658 log(L_ERR "Hostcache is not empty in table %s", tab->name);
2659 }
2660
f2b76f2c 2661 rfree(hc->slab);
c477f489 2662 rfree(hc->lp);
f2b76f2c 2663 mb_free(hc->hash_table);
cfe34a31
OZ
2664 mb_free(hc);
2665}
2666
2667static void
2668rt_notify_hostcache(rtable *tab, net *net)
2669{
cfe34a31
OZ
2670 if (tab->hcu_scheduled)
2671 return;
2672
04632fd7
OZ
2673 if (trie_match_net(tab->hostcache->trie, net->n.addr))
2674 rt_schedule_hcu(tab);
cfe34a31
OZ
2675}
2676
2677static int
2678if_local_addr(ip_addr a, struct iface *i)
2679{
2680 struct ifa *b;
2681
2682 WALK_LIST(b, i->addrs)
2683 if (ipa_equal(a, b->ip))
2684 return 1;
2685
2686 return 0;
2687}
2688
a82f692e 2689static u32
d1e146f2
OZ
2690rt_get_igp_metric(rte *rt)
2691{
ba5e5940
OZ
2692 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2693
2694 if (ea)
2695 return ea->u.data;
2696
d1e146f2 2697 rta *a = rt->attrs;
b7c48981
OF
2698
2699#ifdef CONFIG_OSPF
d1e146f2
OZ
2700 if ((a->source == RTS_OSPF) ||
2701 (a->source == RTS_OSPF_IA) ||
2702 (a->source == RTS_OSPF_EXT1))
2703 return rt->u.ospf.metric1;
b7c48981 2704#endif
d1e146f2 2705
b7c48981 2706#ifdef CONFIG_RIP
d1e146f2
OZ
2707 if (a->source == RTS_RIP)
2708 return rt->u.rip.metric;
b7c48981 2709#endif
d1e146f2 2710
4e276a89 2711 if (a->source == RTS_DEVICE)
d1e146f2
OZ
2712 return 0;
2713
2714 return IGP_METRIC_UNKNOWN;
2715}
2716
cfe34a31
OZ
2717static int
2718rt_update_hostentry(rtable *tab, struct hostentry *he)
2719{
7e95c05d 2720 rta *old_src = he->src;
85ad5855 2721 int direct = 0;
c477f489 2722 int pxlen = 0;
cfe34a31 2723
04632fd7 2724 /* Reset the hostentry */
7e95c05d 2725 he->src = NULL;
7e95c05d 2726 he->dest = RTD_UNREACHABLE;
85ad5855 2727 he->nexthop_linkable = 0;
7e95c05d
OZ
2728 he->igp_metric = 0;
2729
04632fd7
OZ
2730 net_addr he_addr;
2731 net_fill_ip_host(&he_addr, he->addr);
2732 net *n = net_route(tab, &he_addr);
d1e146f2 2733 if (n)
cfe34a31 2734 {
cf98be7b
OZ
2735 rte *e = n->routes;
2736 rta *a = e->attrs;
fe9f1a6d 2737 pxlen = n->n.addr->pxlen;
cfe34a31 2738
2c9033af
OZ
2739 if (a->hostentry)
2740 {
2741 /* Recursive route should not depend on another recursive route */
fe9f1a6d
OZ
2742 log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2743 he->addr, n->n.addr);
7e95c05d 2744 goto done;
2c9033af 2745 }
7e95c05d 2746
85ad5855 2747 if (a->dest == RTD_UNICAST)
3c744164
JMM
2748 {
2749 for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
2750 if (ipa_zero(nh->gw))
2751 {
2752 if (if_local_addr(he->addr, nh->iface))
2753 {
2754 /* The host address is a local address, this is not valid */
2755 log(L_WARN "Next hop address %I is a local address of iface %s",
2756 he->addr, nh->iface->name);
2757 goto done;
2758 }
2759
85ad5855 2760 direct++;
3c744164
JMM
2761 }
2762 }
665be7f6 2763
7e95c05d 2764 he->src = rta_clone(a);
85ad5855
OZ
2765 he->dest = a->dest;
2766 he->nexthop_linkable = !direct;
cf98be7b 2767 he->igp_metric = rt_get_igp_metric(e);
cfe34a31
OZ
2768 }
2769
665be7f6 2770done:
c477f489 2771 /* Add a prefix range to the trie */
04632fd7 2772 trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
c477f489 2773
7e95c05d
OZ
2774 rta_free(old_src);
2775 return old_src != he->src;
cfe34a31
OZ
2776}
2777
2778static void
2779rt_update_hostcache(rtable *tab)
2780{
2781 struct hostcache *hc = tab->hostcache;
2782 struct hostentry *he;
2783 node *n, *x;
2784
c477f489
OZ
2785 /* Reset the trie */
2786 lp_flush(hc->lp);
51762a45 2787 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
c477f489 2788
cfe34a31
OZ
2789 WALK_LIST_DELSAFE(n, x, hc->hostentries)
2790 {
2791 he = SKIP_BACK(struct hostentry, ln, n);
2792 if (!he->uc)
2793 {
f2b76f2c 2794 hc_delete_hostentry(hc, he);
cfe34a31
OZ
2795 continue;
2796 }
2797
2798 if (rt_update_hostentry(tab, he))
2799 rt_schedule_nhu(he->tab);
2800 }
2801
2802 tab->hcu_scheduled = 0;
2803}
2804
1e37e35c 2805struct hostentry *
094d2bdb 2806rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
cfe34a31
OZ
2807{
2808 struct hostentry *he;
2809
2810 if (!tab->hostcache)
2811 rt_init_hostcache(tab);
2812
04632fd7 2813 u32 k = hc_hash(a, dep);
f2b76f2c
OZ
2814 struct hostcache *hc = tab->hostcache;
2815 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2816 if (ipa_equal(he->addr, a) && (he->tab == dep))
2817 return he;
cfe34a31 2818
1e37e35c 2819 he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
f2b76f2c 2820 rt_update_hostentry(tab, he);
cfe34a31
OZ
2821 return he;
2822}
2823
094d2bdb 2824
3ce8c610
MM
2825/*
2826 * Documentation for functions declared inline in route.h
2827 */
2828#if 0
2829
2830/**
2831 * net_find - find a network entry
2832 * @tab: a routing table
2833 * @addr: address of the network
3ce8c610
MM
2834 *
2835 * net_find() looks up the given network in routing table @tab and
2836 * returns a pointer to its &net entry or %NULL if no such network
2837 * exists.
2838 */
fe9f1a6d 2839static inline net *net_find(rtable *tab, net_addr *addr)
3ce8c610
MM
2840{ DUMMY; }
2841
2842/**
2843 * net_get - obtain a network entry
2844 * @tab: a routing table
2845 * @addr: address of the network
3ce8c610
MM
2846 *
2847 * net_get() looks up the given network in routing table @tab and
2848 * returns a pointer to its &net entry. If no such entry exists, it's
2849 * created.
2850 */
fe9f1a6d 2851static inline net *net_get(rtable *tab, net_addr *addr)
3ce8c610
MM
2852{ DUMMY; }
2853
2854/**
2855 * rte_cow - copy a route for writing
2856 * @r: a route entry to be copied
2857 *
2858 * rte_cow() takes a &rte and prepares it for modification. The exact action
2859 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2860 * just returned unchanged, else a new temporary entry with the same contents
2861 * is created.
2862 *
2863 * The primary use of this function is inside the filter machinery -- when
2864 * a filter wants to modify &rte contents (to change the preference or to
2865 * attach another set of attributes), it must ensure that the &rte is not
2866 * shared with anyone else (and especially that it isn't stored in any routing
2867 * table).
2868 *
2e9b2421 2869 * Result: a pointer to the new writable &rte.
3ce8c610
MM
2870 */
2871static inline rte * rte_cow(rte *r)
2872{ DUMMY; }
2873
2874#endif