]> git.ipfire.org Git - thirdparty/bird.git/blame - nest/rt-table.c
Fix in the last commit
[thirdparty/bird.git] / nest / rt-table.c
CommitLineData
62aa008a 1/*
58740ed4 2 * BIRD -- Routing Tables
62aa008a 3 *
50fe90ed 4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
62aa008a
MM
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
58740ed4
MM
9/**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
725270cb 16 * There are multiple routing tables (a primary one together with any
58740ed4
MM
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
58f7d004 19 * destination networks. For each network (represented by structure &net),
725270cb
MM
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
58740ed4
MM
22 * for routing), the order of the other ones is undetermined.
23 *
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
29 */
30
6b9fa320 31#undef LOCAL_DEBUG
1a54b1c6 32
62aa008a
MM
33#include "nest/bird.h"
34#include "nest/route.h"
2326b001 35#include "nest/protocol.h"
730f2e2c
MM
36#include "nest/cli.h"
37#include "nest/iface.h"
2326b001 38#include "lib/resource.h"
5996da6a 39#include "lib/event.h"
730f2e2c 40#include "lib/string.h"
0e02abfd 41#include "conf/conf.h"
529c4149 42#include "filter/filter.h"
221135d6 43#include "lib/string.h"
10af3676 44#include "lib/alloca.h"
7d875e09 45
acb60628
OZ
46pool *rt_table_pool;
47
2326b001 48static slab *rte_slab;
e2dc2f30 49static linpool *rte_update_pool;
2326b001 50
0e02abfd 51static list routing_tables;
5996da6a 52
cfd46ee4 53static void rt_format_via(rte *e, byte *via);
cfe34a31
OZ
54static void rt_free_hostcache(rtable *tab);
55static void rt_notify_hostcache(rtable *tab, net *net);
56static void rt_update_hostcache(rtable *tab);
57static void rt_next_hop_update(rtable *tab);
0c791f87 58static inline int rt_prune_table(rtable *tab);
cfe34a31 59static inline void rt_schedule_gc(rtable *tab);
0c791f87
OZ
60static inline void rt_schedule_prune(rtable *tab);
61
cfd46ee4 62
094d2bdb
OZ
63static inline struct ea_list *
64make_tmp_attrs(struct rte *rt, struct linpool *pool)
65{
66 struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
67 mta = rt->attrs->src->proto->make_tmp_attrs;
68 return mta ? mta(rt, rte_update_pool) : NULL;
69}
70
d1e146f2
OZ
71/* Like fib_route(), but skips empty net entries */
72static net *
73net_route(rtable *tab, ip_addr a, int len)
74{
75 ip_addr a0;
76 net *n;
77
78 while (len >= 0)
79 {
80 a0 = ipa_and(a, ipa_mkmask(len));
81 n = fib_find(&tab->fib, &a0, len);
cf98be7b 82 if (n && rte_is_valid(n->routes))
d1e146f2
OZ
83 return n;
84 len--;
85 }
86 return NULL;
87}
88
5996da6a 89static void
2326b001
MM
90rte_init(struct fib_node *N)
91{
92 net *n = (net *) N;
93
4c45595e 94 N->flags = 0;
2326b001
MM
95 n->routes = NULL;
96}
97
58740ed4
MM
98/**
99 * rte_find - find a route
100 * @net: network node
094d2bdb 101 * @src: route source
58740ed4
MM
102 *
103 * The rte_find() function returns a route for destination @net
094d2bdb 104 * which is from route source @src.
58740ed4 105 */
2326b001 106rte *
094d2bdb 107rte_find(net *net, struct rte_src *src)
2326b001
MM
108{
109 rte *e = net->routes;
110
094d2bdb 111 while (e && e->attrs->src != src)
2326b001
MM
112 e = e->next;
113 return e;
114}
115
58740ed4
MM
116/**
117 * rte_get_temp - get a temporary &rte
3ce8c610 118 * @a: attributes to assign to the new route (a &rta; in case it's
2e9b2421 119 * un-cached, rte_update() will create a cached copy automatically)
58740ed4
MM
120 *
121 * Create a temporary &rte and bind it with the attributes @a.
122 * Also set route preference to the default preference set for
123 * the protocol.
124 */
2326b001
MM
125rte *
126rte_get_temp(rta *a)
127{
128 rte *e = sl_alloc(rte_slab);
129
130 e->attrs = a;
0cdbd397 131 e->flags = 0;
094d2bdb 132 e->pref = a->src->proto->preference;
2326b001
MM
133 return e;
134}
135
e2dc2f30
MM
136rte *
137rte_do_cow(rte *r)
138{
139 rte *e = sl_alloc(rte_slab);
140
141 memcpy(e, r, sizeof(rte));
142 e->attrs = rta_clone(r->attrs);
143 e->flags = 0;
144 return e;
145}
146
2326b001
MM
147static int /* Actually better or at least as good as */
148rte_better(rte *new, rte *old)
149{
d9f330c5
MM
150 int (*better)(rte *, rte *);
151
cf98be7b 152 if (!rte_is_valid(old))
2326b001 153 return 1;
cf98be7b
OZ
154 if (!rte_is_valid(new))
155 return 0;
156
2326b001
MM
157 if (new->pref > old->pref)
158 return 1;
159 if (new->pref < old->pref)
160 return 0;
094d2bdb 161 if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
4c1b4e1a
MM
162 {
163 /*
164 * If the user has configured protocol preferences, so that two different protocols
165 * have the same preference, try to break the tie by comparing addresses. Not too
166 * useful, but keeps the ordering of routes unambiguous.
167 */
094d2bdb 168 return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
4c1b4e1a 169 }
094d2bdb 170 if (better = new->attrs->src->proto->rte_better)
d9f330c5
MM
171 return better(new, old);
172 return 0;
2326b001
MM
173}
174
cfd46ee4
MM
175static void
176rte_trace(struct proto *p, rte *e, int dir, char *msg)
177{
178 byte via[STD_ADDRESS_P_LENGTH+32];
179
180 rt_format_via(e, via);
181 log(L_TRACE "%s %c %s %I/%d %s", p->name, dir, msg, e->net->n.prefix, e->net->n.pxlen, via);
182}
183
184static inline void
185rte_trace_in(unsigned int flag, struct proto *p, rte *e, char *msg)
186{
187 if (p->debug & flag)
b0a47440 188 rte_trace(p, e, '>', msg);
cfd46ee4
MM
189}
190
191static inline void
192rte_trace_out(unsigned int flag, struct proto *p, rte *e, char *msg)
193{
194 if (p->debug & flag)
b0a47440 195 rte_trace(p, e, '<', msg);
cfd46ee4
MM
196}
197
00a09f3c
OZ
198static rte *
199export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
529c4149 200{
c0adf7e9
OZ
201 struct proto *p = ah->proto;
202 struct filter *filter = ah->out_filter;
203 struct proto_stats *stats = ah->stats;
00a09f3c
OZ
204 ea_list *tmpb = NULL;
205 rte *rt;
206 int v;
c0adf7e9 207
00a09f3c
OZ
208 rt = rt0;
209 *rt_free = NULL;
7de45ba4 210
00a09f3c
OZ
211 /* If called does not care for eattrs, we prepare one internally */
212 if (!tmpa)
529c4149 213 {
094d2bdb 214 tmpb = make_tmp_attrs(rt, rte_update_pool);
00a09f3c 215 tmpa = &tmpb;
529c4149 216 }
925fe2d3 217
00a09f3c
OZ
218 v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0;
219 if (v < 0)
220 {
221 if (silent)
222 goto reject;
11361a10 223
00a09f3c 224 stats->exp_updates_rejected++;
36da2857
OZ
225 if (v == RIC_REJECT)
226 rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
00a09f3c
OZ
227 goto reject;
228 }
229 if (v > 0)
e2dc2f30 230 {
00a09f3c
OZ
231 if (!silent)
232 rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
233 goto accept;
e2dc2f30 234 }
925fe2d3 235
00a09f3c
OZ
236 v = filter && ((filter == FILTER_REJECT) ||
237 (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT));
238 if (v)
239 {
240 if (silent)
241 goto reject;
242
243 stats->exp_updates_filtered++;
244 rte_trace_out(D_FILTERS, p, rt, "filtered out");
245 goto reject;
e2dc2f30 246 }
925fe2d3 247
00a09f3c
OZ
248 accept:
249 if (rt != rt0)
250 *rt_free = rt;
251 return rt;
252
253 reject:
254 /* Discard temporary rte */
255 if (rt != rt0)
256 rte_free(rt);
257 return NULL;
258}
259
260static void
261do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
262{
263 struct proto *p = ah->proto;
264 struct proto_stats *stats = ah->stats;
925fe2d3 265
abced4a9 266
ab758e4f 267 /*
abced4a9
OZ
268 * First, apply export limit.
269 *
ab758e4f
OZ
270 * Export route limits has several problems. Because exp_routes
271 * counter is reset before refeed, we don't really know whether
abced4a9 272 * limit is breached and whether the update is new or not. Therefore
ab758e4f
OZ
273 * the number of really exported routes may exceed the limit
274 * temporarily (routes exported before and new routes in refeed).
275 *
276 * Minor advantage is that if the limit is decreased and refeed is
277 * requested, the number of exported routes really decrease.
278 *
279 * Second problem is that with export limits, we don't know whether
280 * old was really exported (it might be blocked by limit). When a
281 * withdraw is exported, we announce it even when the previous
282 * update was blocked. This is not a big issue, but the same problem
283 * is in updating exp_routes counter. Therefore, to be consistent in
284 * increases and decreases of exp_routes, we count exported routes
285 * regardless of blocking by limits.
286 *
287 * Similar problem is in handling updates - when a new route is
288 * received and blocking is active, the route would be blocked, but
289 * when an update for the route will be received later, the update
290 * would be propagated (as old != NULL). Therefore, we have to block
291 * also non-new updates (contrary to import blocking).
292 */
925fe2d3 293
d9b77cc2 294 struct proto_limit *l = ah->out_limit;
ab758e4f 295 if (l && new)
d9b77cc2 296 {
ab758e4f 297 if ((!old || refeed) && (stats->exp_routes >= l->limit))
b662290f 298 proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes);
d9b77cc2
OZ
299
300 if (l->state == PLS_BLOCKED)
301 {
ab758e4f 302 stats->exp_routes++; /* see note above */
d9b77cc2
OZ
303 stats->exp_updates_rejected++;
304 rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
ab758e4f 305 new = NULL;
abced4a9
OZ
306
307 if (!old)
308 return;
d9b77cc2
OZ
309 }
310 }
311
ab758e4f 312
925fe2d3 313 if (new)
9db74169 314 stats->exp_updates_accepted++;
925fe2d3 315 else
9db74169 316 stats->exp_withdraws_accepted++;
925fe2d3 317
8a7fb885
OZ
318 /* Hack: We do not decrease exp_routes during refeed, we instead
319 reset exp_routes at the start of refeed. */
925fe2d3 320 if (new)
9db74169 321 stats->exp_routes++;
8a7fb885 322 if (old && !refeed)
9db74169 323 stats->exp_routes--;
925fe2d3 324
cfd46ee4
MM
325 if (p->debug & D_ROUTES)
326 {
327 if (new && old)
328 rte_trace_out(D_ROUTES, p, new, "replaced");
329 else if (new)
330 rte_trace_out(D_ROUTES, p, new, "added");
349e21bb 331 else if (old)
cfd46ee4
MM
332 rte_trace_out(D_ROUTES, p, old, "removed");
333 }
08f0290a 334 if (!new)
c0adf7e9 335 p->rt_notify(p, ah->table, net, NULL, old, NULL);
08f0290a
MM
336 else if (tmpa)
337 {
2f711231
MM
338 ea_list *t = tmpa;
339 while (t->next)
340 t = t->next;
341 t->next = new->attrs->eattrs;
c0adf7e9 342 p->rt_notify(p, ah->table, net, new, old, tmpa);
2f711231 343 t->next = NULL;
08f0290a
MM
344 }
345 else
c0adf7e9 346 p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs);
00a09f3c
OZ
347}
348
00a09f3c
OZ
349static void
350rt_notify_basic(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
351{
352 // struct proto *p = ah->proto;
353 struct proto_stats *stats = ah->stats;
354
355 rte *new_free = NULL;
356 rte *old_free = NULL;
357
358 if (new)
359 stats->exp_updates_received++;
360 else
361 stats->exp_withdraws_received++;
362
363 /*
364 * This is a tricky part - we don't know whether route 'old' was
365 * exported to protocol 'p' or was filtered by the export filter.
366 * We try to run the export filter to know this to have a correct
367 * value in 'old' argument of rte_update (and proper filter value)
368 *
369 * FIXME - this is broken because 'configure soft' may change
370 * filters but keep routes. Refeed is expected to be called after
371 * change of the filters and with old == new, therefore we do not
372 * even try to run the filter on an old route, This may lead to
373 * 'spurious withdraws' but ensure that there are no 'missing
374 * withdraws'.
375 *
376 * This is not completely safe as there is a window between
377 * reconfiguration and the end of refeed - if a newly filtered
378 * route disappears during this period, proper withdraw is not
379 * sent (because old would be also filtered) and the route is
380 * not refeeded (because it disappeared before that).
381 */
382
383 if (new)
384 new = export_filter(ah, new, &new_free, &tmpa, 0);
385
386 if (old && !refeed)
387 old = export_filter(ah, old, &old_free, NULL, 1);
388
389 /* FIXME - This is broken because of incorrect 'old' value (see above) */
390 if (!new && !old)
391 return;
392
393 do_rt_notify(ah, net, new, old, tmpa, refeed);
394
395 /* Discard temporary rte's */
396 if (new_free)
397 rte_free(new_free);
398 if (old_free)
399 rte_free(old_free);
400}
401
402static void
403rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old,
404 ea_list *tmpa, int feed)
405{
406 // struct proto *p = ah->proto;
407 struct proto_stats *stats = ah->stats;
408
409 rte *new_best = NULL;
410 rte *old_best = NULL;
411 rte *new_free = NULL;
412 rte *old_free = NULL;
413 rte *r;
414
cf98be7b
OZ
415 /* Used to track whether we met old_changed position. If before_old is NULL
416 old_changed was the first and we met it implicitly before current best route. */
417 int old_meet = old_changed && !before_old;
418
419 /* Note that before_old is either NULL or valid (not rejected) route.
420 If old_changed is valid, before_old have to be too. If old changed route
421 was not valid, caller must use NULL for both old_changed and before_old. */
00a09f3c
OZ
422
423 if (new_changed)
424 stats->exp_updates_received++;
425 else
426 stats->exp_withdraws_received++;
427
428 /* First, find the new_best route - first accepted by filters */
cf98be7b 429 for (r=net->routes; rte_is_valid(r); r=r->next)
00a09f3c
OZ
430 {
431 if (new_best = export_filter(ah, r, &new_free, &tmpa, 0))
432 break;
433
434 /* Note if we walked around the position of old_changed route */
435 if (r == before_old)
436 old_meet = 1;
437 }
438
439 /*
440 * Second, handle the feed case. That means we do not care for
441 * old_best. It is NULL for feed, and the new_best for refeed.
442 * For refeed, there is a hack similar to one in rt_notify_basic()
443 * to ensure withdraws in case of changed filters
444 */
445 if (feed)
446 {
447 if (feed == 2) /* refeed */
cf98be7b
OZ
448 old_best = new_best ? new_best :
449 (rte_is_valid(net->routes) ? net->routes : NULL);
00a09f3c
OZ
450 else
451 old_best = NULL;
452
453 if (!new_best && !old_best)
454 return;
455
456 goto found;
457 }
458
459 /*
460 * Now, we find the old_best route. Generally, it is the same as the
461 * new_best, unless new_best is the same as new_changed or
462 * old_changed is accepted before new_best.
463 *
464 * There are four cases:
465 *
466 * - We would find and accept old_changed before new_best, therefore
467 * old_changed is old_best. In remaining cases we suppose this
468 * is not true.
469 *
470 * - We found no new_best, therefore there is also no old_best and
471 * we ignore this withdraw.
472 *
473 * - We found new_best different than new_changed, therefore
474 * old_best is the same as new_best and we ignore this update.
475 *
476 * - We found new_best the same as new_changed, therefore it cannot
477 * be old_best and we have to continue search for old_best.
478 */
479
480 /* First case */
481 if (old_meet)
482 if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
483 goto found;
484
485 /* Second case */
486 if (!new_best)
487 return;
d9b77cc2 488
26822d8f 489 /* Third case, we use r instead of new_best, because export_filter() could change it */
00a09f3c
OZ
490 if (r != new_changed)
491 {
492 if (new_free)
493 rte_free(new_free);
494 return;
495 }
496
497 /* Fourth case */
cf98be7b 498 for (r=r->next; rte_is_valid(r); r=r->next)
00a09f3c
OZ
499 {
500 if (old_best = export_filter(ah, r, &old_free, NULL, 1))
501 goto found;
502
503 if (r == before_old)
504 if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
505 goto found;
506 }
507
508 /* Implicitly, old_best is NULL and new_best is non-NULL */
509
510 found:
511 do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2));
512
513 /* Discard temporary rte's */
514 if (new_free)
515 rte_free(new_free);
516 if (old_free)
517 rte_free(old_free);
529c4149
MM
518}
519
9a8f20fc
MM
520/**
521 * rte_announce - announce a routing table change
522 * @tab: table the route has been added to
23ac9e9a 523 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
9a8f20fc
MM
524 * @net: network in question
525 * @new: the new route to be announced
23ac9e9a 526 * @old: the previous route for the same network
9a8f20fc
MM
527 * @tmpa: a list of temporary attributes belonging to the new route
528 *
529 * This function gets a routing table update and announces it
f98e2915
OZ
530 * to all protocols that acccepts given type of route announcement
531 * and are connected to the same table by their announcement hooks.
9a8f20fc 532 *
f98e2915
OZ
533 * Route announcement of type RA_OPTIMAL si generated when optimal
534 * route (in routing table @tab) changes. In that case @old stores the
535 * old optimal route.
23ac9e9a 536 *
f98e2915
OZ
537 * Route announcement of type RA_ANY si generated when any route (in
538 * routing table @tab) changes In that case @old stores the old route
539 * from the same protocol.
540 *
541 * For each appropriate protocol, we first call its import_control()
542 * hook which performs basic checks on the route (each protocol has a
543 * right to veto or force accept of the route before any filter is
544 * asked) and adds default values of attributes specific to the new
545 * protocol (metrics, tags etc.). Then it consults the protocol's
546 * export filter and if it accepts the route, the rt_notify() hook of
547 * the protocol gets called.
9a8f20fc 548 */
e2dc2f30 549static void
00a09f3c 550rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *before_old, ea_list *tmpa)
2326b001 551{
cf98be7b
OZ
552 if (!rte_is_valid(old))
553 old = before_old = NULL;
554
555 if (!rte_is_valid(new))
556 new = NULL;
557
558 if (!old && !new)
559 return;
2326b001 560
925fe2d3
OZ
561 if (type == RA_OPTIMAL)
562 {
563 if (new)
094d2bdb 564 new->attrs->src->proto->stats.pref_routes++;
925fe2d3 565 if (old)
094d2bdb 566 old->attrs->src->proto->stats.pref_routes--;
cfe34a31
OZ
567
568 if (tab->hostcache)
569 rt_notify_hostcache(tab, net);
925fe2d3
OZ
570 }
571
cf98be7b 572 struct announce_hook *a;
0e02abfd 573 WALK_LIST(a, tab->hooks)
0a2e9d9f 574 {
0c791f87 575 ASSERT(a->proto->export_state != ES_DOWN);
23ac9e9a 576 if (a->proto->accept_ra_types == type)
00a09f3c
OZ
577 if (type == RA_ACCEPTED)
578 rt_notify_accepted(a, net, new, old, before_old, tmpa, 0);
579 else
580 rt_notify_basic(a, net, new, old, tmpa, 0);
0a2e9d9f 581 }
2326b001
MM
582}
583
421838ff
MM
584static inline int
585rte_validate(rte *e)
586{
587 int c;
588 net *n = e->net;
589
852b7062 590 if ((n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
cfd46ee4 591 {
ab164971 592 log(L_WARN "Ignoring bogus prefix %I/%d received via %s",
c0adf7e9 593 n->n.prefix, n->n.pxlen, e->sender->proto->name);
cfd46ee4
MM
594 return 0;
595 }
ff2857b0
OZ
596
597 c = ipa_classify_net(n->n.prefix);
598 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
421838ff 599 {
ff2857b0 600 log(L_WARN "Ignoring bogus route %I/%d received via %s",
c0adf7e9 601 n->n.prefix, n->n.pxlen, e->sender->proto->name);
ff2857b0 602 return 0;
421838ff 603 }
ff2857b0 604
421838ff
MM
605 return 1;
606}
607
58740ed4
MM
608/**
609 * rte_free - delete a &rte
610 * @e: &rte to be deleted
611 *
612 * rte_free() deletes the given &rte from the routing table it's linked to.
613 */
04925e90 614void
2326b001 615rte_free(rte *e)
04925e90 616{
094d2bdb 617 if (rta_is_cached(e->attrs))
04925e90
MM
618 rta_free(e->attrs);
619 sl_free(rte_slab, e);
620}
621
622static inline void
623rte_free_quick(rte *e)
2326b001
MM
624{
625 rta_free(e->attrs);
626 sl_free(rte_slab, e);
627}
628
67be5b23
MM
629static int
630rte_same(rte *x, rte *y)
631{
632 return
633 x->attrs == y->attrs &&
634 x->flags == y->flags &&
635 x->pflags == y->pflags &&
636 x->pref == y->pref &&
094d2bdb 637 (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
67be5b23
MM
638}
639
70577529
OZ
640static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
641
e2dc2f30 642static void
094d2bdb 643rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, struct rte_src *src)
2326b001 644{
c0adf7e9
OZ
645 struct proto *p = ah->proto;
646 struct rtable *table = ah->table;
647 struct proto_stats *stats = ah->stats;
1123e707 648 static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
00a09f3c 649 rte *before_old = NULL;
2326b001
MM
650 rte *old_best = net->routes;
651 rte *old = NULL;
00a09f3c 652 rte **k;
2326b001
MM
653
654 k = &net->routes; /* Find and remove original route from the same protocol */
655 while (old = *k)
656 {
094d2bdb 657 if (old->attrs->src == src)
2326b001 658 {
11787b84
OZ
659 /* If there is the same route in the routing table but from
660 * a different sender, then there are two paths from the
661 * source protocol to this routing table through transparent
662 * pipes, which is not allowed.
663 *
664 * We log that and ignore the route. If it is withdraw, we
665 * ignore it completely (there might be 'spurious withdraws',
666 * see FIXME in do_rte_announce())
667 */
c0adf7e9 668 if (old->sender->proto != p)
11787b84
OZ
669 {
670 if (new)
671 {
548c329c 672 log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %I/%d to table %s",
11787b84
OZ
673 net->n.prefix, net->n.pxlen, table->name);
674 rte_free_quick(new);
675 }
676 return;
677 }
678
0b761098 679 if (new && rte_same(old, new))
67be5b23
MM
680 {
681 /* No changes, ignore the new route */
cf98be7b 682
15550957 683 if (!rte_is_filtered(new))
cf98be7b
OZ
684 {
685 stats->imp_updates_ignored++;
686 rte_trace_in(D_ROUTES, p, new, "ignored");
687 }
688
67be5b23 689 rte_free_quick(new);
4d9a0d1f
OZ
690#ifdef CONFIG_RIP
691 /* lastmod is used internally by RIP as the last time
692 when the route was received. */
094d2bdb 693 if (src->proto->proto == &proto_rip)
4d9a0d1f
OZ
694 old->lastmod = now;
695#endif
67be5b23
MM
696 return;
697 }
2326b001
MM
698 *k = old->next;
699 break;
700 }
701 k = &old->next;
00a09f3c 702 before_old = old;
2326b001
MM
703 }
704
00a09f3c
OZ
705 if (!old)
706 before_old = NULL;
707
925fe2d3
OZ
708 if (!old && !new)
709 {
9db74169 710 stats->imp_withdraws_ignored++;
925fe2d3
OZ
711 return;
712 }
713
b662290f
OZ
714 int new_ok = rte_is_ok(new);
715 int old_ok = rte_is_ok(old);
716
717 struct proto_limit *l = ah->rx_limit;
7d0a31de 718 if (l && !old && new)
ebecb6f6 719 {
15550957 720 u32 all_routes = stats->imp_routes + stats->filt_routes;
cf98be7b
OZ
721
722 if (all_routes >= l->limit)
b662290f 723 proto_notify_limit(ah, l, PLD_RX, all_routes);
7d0a31de
OZ
724
725 if (l->state == PLS_BLOCKED)
726 {
b662290f
OZ
727 /* In receive limit the situation is simple, old is NULL so
728 we just free new and exit like nothing happened */
729
7d0a31de
OZ
730 stats->imp_updates_ignored++;
731 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
732 rte_free_quick(new);
733 return;
734 }
ebecb6f6
OZ
735 }
736
b662290f
OZ
737 l = ah->in_limit;
738 if (l && !old_ok && new_ok)
739 {
740 if (stats->imp_routes >= l->limit)
741 proto_notify_limit(ah, l, PLD_IN, stats->imp_routes);
742
743 if (l->state == PLS_BLOCKED)
744 {
745 /* In import limit the situation is more complicated. We
746 shouldn't just drop the route, we should handle it like
747 it was filtered. We also have to continue the route
748 processing if old or new is non-NULL, but we should exit
749 if both are NULL as this case is probably assumed to be
750 already handled. */
751
752 stats->imp_updates_ignored++;
753 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
754
755 if (ah->in_keep_filtered)
756 new->flags |= REF_FILTERED;
757 else
758 { rte_free_quick(new); new = NULL; }
759
760 /* Note that old && !new could be possible when
761 ah->in_keep_filtered changed in the recent past. */
762
763 if (!old && !new)
764 return;
765
766 new_ok = 0;
767 goto skip_stats1;
768 }
769 }
70577529
OZ
770
771 if (new_ok)
9db74169 772 stats->imp_updates_accepted++;
70577529 773 else if (old_ok)
9db74169 774 stats->imp_withdraws_accepted++;
70577529
OZ
775 else
776 stats->imp_withdraws_ignored++;
925fe2d3 777
b662290f 778 skip_stats1:
925fe2d3
OZ
779
780 if (new)
15550957 781 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
925fe2d3 782 if (old)
15550957 783 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
925fe2d3 784
26822d8f 785 if (table->config->sorted)
2326b001 786 {
26822d8f
OZ
787 /* If routes are sorted, just insert new route to appropriate position */
788 if (new)
789 {
790 if (before_old && !rte_better(new, before_old))
791 k = &before_old->next;
792 else
793 k = &net->routes;
c0973621 794
26822d8f
OZ
795 for (; *k; k=&(*k)->next)
796 if (rte_better(new, *k))
797 break;
c0973621 798
26822d8f
OZ
799 new->next = *k;
800 *k = new;
801 }
2326b001 802 }
26822d8f 803 else
2326b001 804 {
26822d8f
OZ
805 /* If routes are not sorted, find the best route and move it on
806 the first position. There are several optimized cases. */
807
094d2bdb 808 if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
26822d8f
OZ
809 goto do_recalculate;
810
811 if (new && rte_better(new, old_best))
2326b001 812 {
26822d8f
OZ
813 /* The first case - the new route is cleary optimal,
814 we link it at the first position */
815
c0973621
OZ
816 new->next = net->routes;
817 net->routes = new;
818 }
26822d8f 819 else if (old == old_best)
c0973621 820 {
26822d8f
OZ
821 /* The second case - the old best route disappeared, we add the
822 new route (if we have any) to the list (we don't care about
823 position) and then we elect the new optimal route and relink
824 that route at the first position and announce it. New optimal
825 route might be NULL if there is no more routes */
826
827 do_recalculate:
828 /* Add the new route to the list */
829 if (new)
2326b001 830 {
26822d8f
OZ
831 new->next = net->routes;
832 net->routes = new;
833 }
834
835 /* Find a new optimal route (if there is any) */
836 if (net->routes)
837 {
838 rte **bp = &net->routes;
839 for (k=&(*bp)->next; *k; k=&(*k)->next)
840 if (rte_better(*k, *bp))
841 bp = k;
842
843 /* And relink it */
844 rte *best = *bp;
845 *bp = best->next;
846 best->next = net->routes;
847 net->routes = best;
2326b001 848 }
2326b001 849 }
26822d8f
OZ
850 else if (new)
851 {
852 /* The third case - the new route is not better than the old
853 best route (therefore old_best != NULL) and the old best
854 route was not removed (therefore old_best == net->routes).
855 We just link the new route after the old best route. */
856
857 ASSERT(net->routes != NULL);
858 new->next = net->routes->next;
859 net->routes->next = new;
860 }
861 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
2326b001 862 }
c0973621 863
26822d8f
OZ
864 if (new)
865 new->lastmod = now;
866
867 /* Log the route change */
70577529 868 if (p->debug & D_ROUTES)
e8b29bdc 869 {
70577529
OZ
870 if (new_ok)
871 rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
872 else if (old_ok)
873 {
874 if (old != old_best)
875 rte_trace(p, old, '>', "removed");
876 else if (rte_is_ok(net->routes))
877 rte_trace(p, old, '>', "removed [replaced]");
878 else
879 rte_trace(p, old, '>', "removed [sole]");
880 }
c0973621
OZ
881 }
882
26822d8f 883 /* Propagate the route change */
00a09f3c 884 rte_announce(table, RA_ANY, net, new, old, NULL, tmpa);
26822d8f
OZ
885 if (net->routes != old_best)
886 rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, tmpa);
887 if (table->config->sorted)
888 rte_announce(table, RA_ACCEPTED, net, new, old, before_old, tmpa);
00a09f3c
OZ
889
890 if (!net->routes &&
891 (table->gc_counter++ >= table->config->gc_max_ops) &&
892 (table->gc_time + table->config->gc_min_time <= now))
893 rt_schedule_gc(table);
894
70577529
OZ
895 if (old_ok && p->rte_remove)
896 p->rte_remove(net, old);
897 if (new_ok && p->rte_insert)
898 p->rte_insert(net, new);
899
2326b001 900 if (old)
70577529 901 rte_free_quick(old);
5b22683d
MM
902}
903
e2dc2f30
MM
904static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
905
906static inline void
907rte_update_lock(void)
908{
909 rte_update_nest_cnt++;
910}
911
912static inline void
913rte_update_unlock(void)
914{
915 if (!--rte_update_nest_cnt)
916 lp_flush(rte_update_pool);
917}
918
fad04c75
OZ
919static inline void
920rte_hide_dummy_routes(net *net, rte **dummy)
921{
922 if (net->routes && net->routes->attrs->source == RTS_DUMMY)
923 {
924 *dummy = net->routes;
925 net->routes = (*dummy)->next;
926 }
927}
928
929static inline void
930rte_unhide_dummy_routes(net *net, rte **dummy)
931{
932 if (*dummy)
933 {
934 (*dummy)->next = net->routes;
935 net->routes = *dummy;
936 }
937}
938
58740ed4
MM
939/**
940 * rte_update - enter a new update to a routing table
941 * @table: table to be updated
c0adf7e9 942 * @ah: pointer to table announce hook
58740ed4
MM
943 * @net: network node
944 * @p: protocol submitting the update
f98e2915 945 * @src: protocol originating the update
58740ed4
MM
946 * @new: a &rte representing the new route or %NULL for route removal.
947 *
948 * This function is called by the routing protocols whenever they discover
949 * a new route or wish to update/remove an existing route. The right announcement
2e9b2421 950 * sequence is to build route attributes first (either un-cached with @aflags set
58740ed4
MM
951 * to zero or a cached one using rta_lookup(); in this case please note that
952 * you need to increase the use count of the attributes yourself by calling
953 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
954 * the appropriate data and finally submit the new &rte by calling rte_update().
955 *
f98e2915
OZ
956 * @src specifies the protocol that originally created the route and the meaning
957 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
958 * same value as @new->attrs->proto. @p specifies the protocol that called
959 * rte_update(). In most cases it is the same protocol as @src. rte_update()
960 * stores @p in @new->sender;
961 *
9a8f20fc
MM
962 * When rte_update() gets any route, it automatically validates it (checks,
963 * whether the network and next hop address are valid IP addresses and also
964 * whether a normal routing protocol doesn't try to smuggle a host or link
965 * scope route to the table), converts all protocol dependent attributes stored
966 * in the &rte to temporary extended attributes, consults import filters of the
967 * protocol to see if the route should be accepted and/or its attributes modified,
968 * stores the temporary attributes back to the &rte.
969 *
970 * Now, having a "public" version of the route, we
f98e2915 971 * automatically find any old route defined by the protocol @src
58740ed4
MM
972 * for network @n, replace it by the new one (or removing it if @new is %NULL),
973 * recalculate the optimal route for this destination and finally broadcast
9a8f20fc 974 * the change (if any) to all routing protocols by calling rte_announce().
3ce8c610
MM
975 *
976 * All memory used for attribute lists and other temporary allocations is taken
977 * from a special linear pool @rte_update_pool and freed when rte_update()
978 * finishes.
58740ed4 979 */
23ac9e9a
OZ
980
981void
094d2bdb 982rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
e2dc2f30 983{
c0adf7e9
OZ
984 struct proto *p = ah->proto;
985 struct proto_stats *stats = ah->stats;
986 struct filter *filter = ah->in_filter;
e2dc2f30 987 ea_list *tmpa = NULL;
fad04c75 988 rte *dummy = NULL;
e2dc2f30
MM
989
990 rte_update_lock();
991 if (new)
992 {
c0adf7e9 993 new->sender = ah;
40b65f94 994
9db74169 995 stats->imp_updates_received++;
cfd46ee4
MM
996 if (!rte_validate(new))
997 {
998 rte_trace_in(D_FILTERS, p, new, "invalid");
9db74169 999 stats->imp_updates_invalid++;
cfd46ee4
MM
1000 goto drop;
1001 }
cf98be7b 1002
40b65f94 1003 if (filter == FILTER_REJECT)
cfd46ee4 1004 {
9db74169 1005 stats->imp_updates_filtered++;
cfd46ee4 1006 rte_trace_in(D_FILTERS, p, new, "filtered out");
094d2bdb 1007
15550957 1008 if (! ah->in_keep_filtered)
cf98be7b
OZ
1009 goto drop;
1010
1011 /* new is a private copy, i could modify it */
15550957 1012 new->flags |= REF_FILTERED;
cfd46ee4 1013 }
cf98be7b 1014 else
e2dc2f30 1015 {
736e143f 1016 tmpa = make_tmp_attrs(new, rte_update_pool);
cf98be7b 1017 if (filter && (filter != FILTER_REJECT))
cfd46ee4 1018 {
cf98be7b
OZ
1019 ea_list *old_tmpa = tmpa;
1020 int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1021 if (fr > F_ACCEPT)
1022 {
1023 stats->imp_updates_filtered++;
1024 rte_trace_in(D_FILTERS, p, new, "filtered out");
1025
15550957 1026 if (! ah->in_keep_filtered)
cf98be7b
OZ
1027 goto drop;
1028
15550957 1029 new->flags |= REF_FILTERED;
cf98be7b 1030 }
736e143f
OZ
1031 if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1032 src->proto->store_tmp_attrs(new, tmpa);
cfd46ee4 1033 }
e2dc2f30 1034 }
094d2bdb 1035 if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
e2dc2f30
MM
1036 new->attrs = rta_lookup(new->attrs);
1037 new->flags |= REF_COW;
1038 }
925fe2d3 1039 else
094d2bdb
OZ
1040 {
1041 stats->imp_withdraws_received++;
1042
1043 if (!net || !src)
1044 {
1045 stats->imp_withdraws_ignored++;
1046 rte_update_unlock();
1047 return;
1048 }
1049 }
925fe2d3 1050
fad04c75
OZ
1051 recalc:
1052 rte_hide_dummy_routes(net, &dummy);
c0adf7e9 1053 rte_recalculate(ah, net, new, tmpa, src);
fad04c75 1054 rte_unhide_dummy_routes(net, &dummy);
e2dc2f30
MM
1055 rte_update_unlock();
1056 return;
1057
fad04c75 1058 drop:
e2dc2f30 1059 rte_free(new);
fad04c75
OZ
1060 new = NULL;
1061 tmpa = NULL;
1062 goto recalc;
e2dc2f30
MM
1063}
1064
cfe34a31
OZ
1065/* Independent call to rte_announce(), used from next hop
1066 recalculation, outside of rte_update(). new must be non-NULL */
1067static inline void
1068rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old)
1069{
cfe34a31
OZ
1070 ea_list *tmpa;
1071
1072 rte_update_lock();
094d2bdb 1073 tmpa = make_tmp_attrs(new, rte_update_pool);
00a09f3c 1074 rte_announce(tab, type, n, new, old, NULL, tmpa);
cfe34a31
OZ
1075 rte_update_unlock();
1076}
1077
5b22683d 1078void
0e02abfd 1079rte_discard(rtable *t, rte *old) /* Non-filtered route deletion, used during garbage collection */
5b22683d 1080{
e2dc2f30 1081 rte_update_lock();
094d2bdb 1082 rte_recalculate(old->sender, old->net, NULL, NULL, old->attrs->src);
e2dc2f30 1083 rte_update_unlock();
2326b001
MM
1084}
1085
36da2857
OZ
1086/* Check rtable for best route to given net whether it would be exported do p */
1087int
1088rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter)
1089{
1090 net *n = net_find(t, prefix, pxlen);
1091 rte *rt = n ? n->routes : NULL;
1092
1093 if (!rte_is_valid(rt))
1094 return 0;
1095
1096 rte_update_lock();
1097
1098 /* Rest is stripped down export_filter() */
736e143f 1099 ea_list *tmpa = make_tmp_attrs(rt, rte_update_pool);
36da2857
OZ
1100 int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1101 if (v == RIC_PROCESS)
1102 v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1103
1104 /* Discard temporary rte */
1105 if (rt != n->routes)
1106 rte_free(rt);
1107
1108 rte_update_unlock();
1109
1110 return v > 0;
1111}
1112
6eda3f13
OZ
1113
1114/**
1115 * rt_refresh_begin - start a refresh cycle
1116 * @t: related routing table
1117 * @ah: related announce hook
1118 *
1119 * This function starts a refresh cycle for given routing table and announce
1120 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1121 * routes to the routing table (by rte_update()). After that, all protocol
1122 * routes (more precisely routes with @ah as @sender) not sent during the
1123 * refresh cycle but still in the table from the past are pruned. This is
1124 * implemented by marking all related routes as stale by REF_STALE flag in
1125 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1126 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1127 */
0c791f87
OZ
1128void
1129rt_refresh_begin(rtable *t, struct announce_hook *ah)
1130{
1131 net *n;
1132 rte *e;
1133
1134 FIB_WALK(&t->fib, fn)
1135 {
1136 n = (net *) fn;
1137 for (e = n->routes; e; e = e->next)
1138 if (e->sender == ah)
1139 e->flags |= REF_STALE;
1140 }
1141 FIB_WALK_END;
1142}
1143
6eda3f13
OZ
1144/**
1145 * rt_refresh_end - end a refresh cycle
1146 * @t: related routing table
1147 * @ah: related announce hook
1148 *
1149 * This function starts a refresh cycle for given routing table and announce
1150 * hook. See rt_refresh_begin() for description of refresh cycles.
1151 */
0c791f87
OZ
1152void
1153rt_refresh_end(rtable *t, struct announce_hook *ah)
1154{
1155 int prune = 0;
1156 net *n;
1157 rte *e;
1158
1159 FIB_WALK(&t->fib, fn)
1160 {
1161 n = (net *) fn;
1162 for (e = n->routes; e; e = e->next)
1163 if ((e->sender == ah) && (e->flags & REF_STALE))
1164 {
1165 e->flags |= REF_DISCARD;
1166 prune = 1;
1167 }
1168 }
1169 FIB_WALK_END;
1170
1171 if (prune)
1172 rt_schedule_prune(t);
1173}
1174
1175
58740ed4
MM
1176/**
1177 * rte_dump - dump a route
1178 * @e: &rte to be dumped
1179 *
1180 * This functions dumps contents of a &rte to debug output.
1181 */
2326b001 1182void
a0762910 1183rte_dump(rte *e)
2326b001 1184{
a0762910 1185 net *n = e->net;
47c447c4 1186 debug("%-1I/%2d ", n->n.prefix, n->n.pxlen);
c10421d3 1187 debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
0cdbd397 1188 rta_dump(e->attrs);
094d2bdb
OZ
1189 if (e->attrs->src->proto->proto->dump_attrs)
1190 e->attrs->src->proto->proto->dump_attrs(e);
0cdbd397 1191 debug("\n");
2326b001 1192}
62aa008a 1193
58740ed4
MM
1194/**
1195 * rt_dump - dump a routing table
1196 * @t: routing table to be dumped
1197 *
1198 * This function dumps contents of a given routing table to debug output.
1199 */
2326b001
MM
1200void
1201rt_dump(rtable *t)
1202{
0cdbd397
MM
1203 rte *e;
1204 net *n;
0e02abfd 1205 struct announce_hook *a;
0cdbd397
MM
1206
1207 debug("Dump of routing table <%s>\n", t->name);
e440395d 1208#ifdef DEBUGGING
08e2d625 1209 fib_check(&t->fib);
e440395d 1210#endif
08e2d625
MM
1211 FIB_WALK(&t->fib, fn)
1212 {
1213 n = (net *) fn;
1214 for(e=n->routes; e; e=e->next)
1215 rte_dump(e);
0cdbd397 1216 }
08e2d625 1217 FIB_WALK_END;
0e02abfd
MM
1218 WALK_LIST(a, t->hooks)
1219 debug("\tAnnounces routes to protocol %s\n", a->proto->name);
0cdbd397 1220 debug("\n");
2326b001 1221}
62aa008a 1222
58740ed4
MM
1223/**
1224 * rt_dump_all - dump all routing tables
1225 *
1226 * This function dumps contents of all routing tables to debug output.
1227 */
6d45cf21
MM
1228void
1229rt_dump_all(void)
1230{
0e02abfd
MM
1231 rtable *t;
1232
1233 WALK_LIST(t, routing_tables)
1234 rt_dump(t);
6d45cf21
MM
1235}
1236
0c791f87
OZ
1237static inline void
1238rt_schedule_prune(rtable *tab)
1239{
1240 rt_mark_for_prune(tab);
1241 ev_schedule(tab->rt_event);
1242}
1243
cfe34a31
OZ
1244static inline void
1245rt_schedule_gc(rtable *tab)
1246{
1247 if (tab->gc_scheduled)
1248 return;
1249
1250 tab->gc_scheduled = 1;
1251 ev_schedule(tab->rt_event);
1252}
1253
1254static inline void
1255rt_schedule_hcu(rtable *tab)
1256{
1257 if (tab->hcu_scheduled)
1258 return;
1259
1260 tab->hcu_scheduled = 1;
1261 ev_schedule(tab->rt_event);
1262}
1263
1264static inline void
1265rt_schedule_nhu(rtable *tab)
1266{
1267 if (tab->nhu_state == 0)
1268 ev_schedule(tab->rt_event);
1269
1270 /* state change 0->1, 2->3 */
1271 tab->nhu_state |= 1;
1272}
1273
0c791f87 1274
fb829de6
OZ
1275static void
1276rt_prune_nets(rtable *tab)
1277{
1278 struct fib_iterator fit;
1279 int ncnt = 0, ndel = 0;
1280
1281#ifdef DEBUGGING
1282 fib_check(&tab->fib);
1283#endif
1284
1285 FIB_ITERATE_INIT(&fit, &tab->fib);
1286again:
1287 FIB_ITERATE_START(&tab->fib, &fit, f)
1288 {
1289 net *n = (net *) f;
1290 ncnt++;
1291 if (!n->routes) /* Orphaned FIB entry */
1292 {
1293 FIB_ITERATE_PUT(&fit, f);
1294 fib_delete(&tab->fib, f);
1295 ndel++;
1296 goto again;
1297 }
1298 }
1299 FIB_ITERATE_END(f);
1300 DBG("Pruned %d of %d networks\n", ndel, ncnt);
1301
1302 tab->gc_counter = 0;
1303 tab->gc_time = now;
1304 tab->gc_scheduled = 0;
1305}
1306
8f6accb5 1307static void
cfe34a31 1308rt_event(void *ptr)
5996da6a 1309{
cfe34a31
OZ
1310 rtable *tab = ptr;
1311
1312 if (tab->hcu_scheduled)
1313 rt_update_hostcache(tab);
0e02abfd 1314
cfe34a31
OZ
1315 if (tab->nhu_state)
1316 rt_next_hop_update(tab);
1317
0c791f87
OZ
1318 if (tab->prune_state)
1319 if (!rt_prune_table(tab))
1320 {
1321 /* Table prune unfinished */
1322 ev_schedule(tab->rt_event);
1323 return;
1324 }
1325
cfe34a31 1326 if (tab->gc_scheduled)
094d2bdb
OZ
1327 {
1328 rt_prune_nets(tab);
1329 rt_prune_sources(); // FIXME this should be moved to independent event
1330 }
5996da6a
MM
1331}
1332
b9626ec6
MM
1333void
1334rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1335{
1336 bzero(t, sizeof(*t));
1337 fib_init(&t->fib, p, sizeof(net), 0, rte_init);
1338 t->name = name;
1339 t->config = cf;
1340 init_list(&t->hooks);
1341 if (cf)
1342 {
cfe34a31
OZ
1343 t->rt_event = ev_new(p);
1344 t->rt_event->hook = rt_event;
1345 t->rt_event->data = t;
2eca3b3a 1346 t->gc_time = now;
b9626ec6
MM
1347 }
1348}
1349
58740ed4
MM
1350/**
1351 * rt_init - initialize routing tables
1352 *
1353 * This function is called during BIRD startup. It initializes the
1354 * routing table module.
1355 */
2326b001
MM
1356void
1357rt_init(void)
1358{
1359 rta_init();
5996da6a 1360 rt_table_pool = rp_new(&root_pool, "Routing tables");
e2dc2f30 1361 rte_update_pool = lp_new(rt_table_pool, 4080);
5996da6a 1362 rte_slab = sl_new(rt_table_pool, sizeof(rte));
0e02abfd 1363 init_list(&routing_tables);
2326b001 1364}
1a54b1c6 1365
fb829de6 1366
0c791f87
OZ
1367static int
1368rt_prune_step(rtable *tab, int step, int *limit)
fb829de6 1369{
1123e707 1370 static struct tbf rl_flush = TBF_DEFAULT_LOG_LIMITS;
fb829de6 1371 struct fib_iterator *fit = &tab->prune_fit;
1a54b1c6
MM
1372
1373 DBG("Pruning route table %s\n", tab->name);
0521e4f6
MM
1374#ifdef DEBUGGING
1375 fib_check(&tab->fib);
1376#endif
fb829de6 1377
0c791f87 1378 if (tab->prune_state == RPS_NONE)
fb829de6
OZ
1379 return 1;
1380
0c791f87 1381 if (tab->prune_state == RPS_SCHEDULED)
fb829de6
OZ
1382 {
1383 FIB_ITERATE_INIT(fit, &tab->fib);
0c791f87 1384 tab->prune_state = RPS_RUNNING;
fb829de6
OZ
1385 }
1386
08e2d625 1387again:
fb829de6 1388 FIB_ITERATE_START(&tab->fib, fit, fn)
1a54b1c6 1389 {
fb829de6 1390 net *n = (net *) fn;
08e2d625 1391 rte *e;
fb829de6 1392
08e2d625 1393 rescan:
fb829de6 1394 for (e=n->routes; e; e=e->next)
9135c1f0 1395 if (e->sender->proto->flushing ||
0c791f87 1396 (e->flags & REF_DISCARD) ||
736e143f 1397 (step && e->attrs->src->proto->flushing))
08e2d625 1398 {
0c791f87 1399 if (*limit <= 0)
fb829de6
OZ
1400 {
1401 FIB_ITERATE_PUT(fit, fn);
1402 return 0;
1403 }
1404
9135c1f0 1405 if (step)
548c329c 1406 log_rl(&rl_flush, L_WARN "Route %I/%d from %s still in %s after flush",
736e143f 1407 n->n.prefix, n->n.pxlen, e->attrs->src->proto->name, tab->name);
9135c1f0 1408
0e02abfd 1409 rte_discard(tab, e);
0c791f87 1410 (*limit)--;
fb829de6 1411
08e2d625
MM
1412 goto rescan;
1413 }
fb829de6 1414 if (!n->routes) /* Orphaned FIB entry */
1a54b1c6 1415 {
fb829de6
OZ
1416 FIB_ITERATE_PUT(fit, fn);
1417 fib_delete(&tab->fib, fn);
08e2d625 1418 goto again;
1a54b1c6 1419 }
1a54b1c6 1420 }
fb829de6
OZ
1421 FIB_ITERATE_END(fn);
1422
0521e4f6
MM
1423#ifdef DEBUGGING
1424 fib_check(&tab->fib);
1425#endif
fb829de6 1426
0c791f87 1427 tab->prune_state = RPS_NONE;
fb829de6 1428 return 1;
1a54b1c6 1429}
0e02abfd 1430
6eda3f13
OZ
1431/**
1432 * rt_prune_table - prune a routing table
1433 *
1434 * This function scans the routing table @tab and removes routes belonging to
1435 * flushing protocols, discarded routes and also stale network entries, in a
1436 * similar fashion like rt_prune_loop(). Returns 1 when all such routes are
1437 * pruned. Contrary to rt_prune_loop(), this function is not a part of the
1438 * protocol flushing loop, but it is called from rt_event() for just one routing
1439 * table.
1440 *
1441 * Note that rt_prune_table() and rt_prune_loop() share (for each table) the
1442 * prune state (@prune_state) and also the pruning iterator (@prune_fit).
1443 */
0c791f87
OZ
1444static inline int
1445rt_prune_table(rtable *tab)
1446{
1447 int limit = 512;
1448 return rt_prune_step(tab, 0, &limit);
1449}
1450
58740ed4 1451/**
fb829de6 1452 * rt_prune_loop - prune routing tables
58740ed4 1453 *
6eda3f13
OZ
1454 * The prune loop scans routing tables and removes routes belonging to flushing
1455 * protocols, discarded routes and also stale network entries. Returns 1 when
1456 * all such routes are pruned. It is a part of the protocol flushing loop.
9135c1f0 1457 *
6eda3f13
OZ
1458 * The prune loop runs in two steps. In the first step it prunes just the routes
1459 * with flushing senders (in explicitly marked tables) so the route removal is
1460 * propagated as usual. In the second step, all remaining relevant routes are
1461 * removed. Ideally, there shouldn't be any, but it happens when pipe filters
1462 * are changed.
58740ed4 1463 */
fb829de6
OZ
1464int
1465rt_prune_loop(void)
0e02abfd 1466{
9135c1f0 1467 static int step = 0;
0c791f87 1468 int limit = 512;
9135c1f0 1469 rtable *t;
0e02abfd 1470
9135c1f0 1471 again:
0e02abfd 1472 WALK_LIST(t, routing_tables)
0c791f87 1473 if (! rt_prune_step(t, step, &limit))
fb829de6
OZ
1474 return 0;
1475
9135c1f0
OZ
1476 if (step == 0)
1477 {
1478 /* Prepare for the second step */
1479 WALK_LIST(t, routing_tables)
0c791f87 1480 t->prune_state = RPS_SCHEDULED;
9135c1f0
OZ
1481
1482 step = 1;
1483 goto again;
1484 }
1485
1486 /* Done */
1487 step = 0;
fb829de6 1488 return 1;
0e02abfd
MM
1489}
1490
cfe34a31
OZ
1491void
1492rt_preconfig(struct config *c)
1493{
1494 struct symbol *s = cf_find_symbol("master");
1495
1496 init_list(&c->tables);
1497 c->master_rtc = rt_new_table(s);
1498}
1499
1500
1501/*
1502 * Some functions for handing internal next hop updates
1503 * triggered by rt_schedule_nhu().
1504 */
1505
cfe34a31
OZ
1506static inline int
1507rta_next_hop_outdated(rta *a)
1508{
1509 struct hostentry *he = a->hostentry;
7e95c05d
OZ
1510
1511 if (!he)
1512 return 0;
1513
1514 if (!he->src)
1515 return a->dest != RTD_UNREACHABLE;
1516
1517 return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
1518 (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1519 !mpnh_same(a->nexthops, he->src->nexthops);
cfe34a31
OZ
1520}
1521
1522static inline void
1523rta_apply_hostentry(rta *a, struct hostentry *he)
1524{
1525 a->hostentry = he;
7e95c05d 1526 a->iface = he->src ? he->src->iface : NULL;
cfe34a31
OZ
1527 a->gw = he->gw;
1528 a->dest = he->dest;
d1e146f2 1529 a->igp_metric = he->igp_metric;
7e95c05d 1530 a->nexthops = he->src ? he->src->nexthops : NULL;
cfe34a31
OZ
1531}
1532
1533static inline rte *
1534rt_next_hop_update_rte(rtable *tab, rte *old)
1535{
1536 rta a;
1537 memcpy(&a, old->attrs, sizeof(rta));
1538 rta_apply_hostentry(&a, old->attrs->hostentry);
1539 a.aflags = 0;
1540
1541 rte *e = sl_alloc(rte_slab);
1542 memcpy(e, old, sizeof(rte));
1543 e->attrs = rta_lookup(&a);
1544
1545 return e;
1546}
1547
1548static inline int
1549rt_next_hop_update_net(rtable *tab, net *n)
1550{
1551 rte **k, *e, *new, *old_best, **new_best;
1552 int count = 0;
1553 int free_old_best = 0;
1554
1555 old_best = n->routes;
1556 if (!old_best)
1557 return 0;
1558
cfe34a31 1559 for (k = &n->routes; e = *k; k = &e->next)
be4cd99a
OZ
1560 if (rta_next_hop_outdated(e->attrs))
1561 {
1562 new = rt_next_hop_update_rte(tab, e);
1563 *k = new;
cfe34a31 1564
be4cd99a 1565 rte_announce_i(tab, RA_ANY, n, new, e);
c0adf7e9 1566 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
cfe34a31 1567
be4cd99a
OZ
1568 /* Call a pre-comparison hook */
1569 /* Not really an efficient way to compute this */
094d2bdb
OZ
1570 if (e->attrs->src->proto->rte_recalculate)
1571 e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
cfe34a31 1572
be4cd99a
OZ
1573 if (e != old_best)
1574 rte_free_quick(e);
1575 else /* Freeing of the old best rte is postponed */
1576 free_old_best = 1;
cfe34a31 1577
be4cd99a
OZ
1578 e = new;
1579 count++;
1580 }
1581
1582 if (!count)
1583 return 0;
1584
1585 /* Find the new best route */
1586 new_best = NULL;
1587 for (k = &n->routes; e = *k; k = &e->next)
1588 {
cfe34a31
OZ
1589 if (!new_best || rte_better(e, *new_best))
1590 new_best = k;
1591 }
1592
1593 /* Relink the new best route to the first position */
1594 new = *new_best;
1595 if (new != n->routes)
1596 {
1597 *new_best = new->next;
1598 new->next = n->routes;
1599 n->routes = new;
1600 }
1601
1602 /* Announce the new best route */
1603 if (new != old_best)
1604 {
1605 rte_announce_i(tab, RA_OPTIMAL, n, new, old_best);
c0adf7e9 1606 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
cfe34a31
OZ
1607 }
1608
1609 if (free_old_best)
1610 rte_free_quick(old_best);
1611
1612 return count;
1613}
1614
1615static void
1616rt_next_hop_update(rtable *tab)
1617{
1618 struct fib_iterator *fit = &tab->nhu_fit;
1619 int max_feed = 32;
1620
1621 if (tab->nhu_state == 0)
1622 return;
1623
1624 if (tab->nhu_state == 1)
1625 {
1626 FIB_ITERATE_INIT(fit, &tab->fib);
1627 tab->nhu_state = 2;
1628 }
1629
1630 FIB_ITERATE_START(&tab->fib, fit, fn)
1631 {
1632 if (max_feed <= 0)
1633 {
1634 FIB_ITERATE_PUT(fit, fn);
1635 ev_schedule(tab->rt_event);
1636 return;
1637 }
1638 max_feed -= rt_next_hop_update_net(tab, (net *) fn);
1639 }
1640 FIB_ITERATE_END(fn);
1641
1642 /* state change 2->0, 3->1 */
1643 tab->nhu_state &= 1;
1644
1645 if (tab->nhu_state > 0)
1646 ev_schedule(tab->rt_event);
1647}
1648
1649
b9626ec6
MM
1650struct rtable_config *
1651rt_new_table(struct symbol *s)
1652{
36415e4b
OZ
1653 /* Hack that allows to 'redefine' the master table */
1654 if ((s->class == SYM_TABLE) && (s->def == new_config->master_rtc))
1655 return s->def;
1656
b9626ec6
MM
1657 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1658
1659 cf_define_symbol(s, SYM_TABLE, c);
1660 c->name = s->name;
1661 add_tail(&new_config->tables, &c->n);
2eca3b3a 1662 c->gc_max_ops = 1000;
b9626ec6
MM
1663 c->gc_min_time = 5;
1664 return c;
1665}
1666
58740ed4
MM
1667/**
1668 * rt_lock_table - lock a routing table
1669 * @r: routing table to be locked
1670 *
1671 * Lock a routing table, because it's in use by a protocol,
1672 * preventing it from being freed when it gets undefined in a new
1673 * configuration.
1674 */
0e02abfd 1675void
50fe90ed 1676rt_lock_table(rtable *r)
0e02abfd 1677{
50fe90ed
MM
1678 r->use_count++;
1679}
1680
58740ed4
MM
1681/**
1682 * rt_unlock_table - unlock a routing table
1683 * @r: routing table to be unlocked
1684 *
1685 * Unlock a routing table formerly locked by rt_lock_table(),
1686 * that is decrease its use count and delete it if it's scheduled
1687 * for deletion by configuration changes.
1688 */
50fe90ed
MM
1689void
1690rt_unlock_table(rtable *r)
1691{
1692 if (!--r->use_count && r->deleted)
1693 {
1694 struct config *conf = r->deleted;
1695 DBG("Deleting routing table %s\n", r->name);
cfe34a31
OZ
1696 if (r->hostcache)
1697 rt_free_hostcache(r);
50fe90ed
MM
1698 rem_node(&r->n);
1699 fib_free(&r->fib);
cfe34a31 1700 rfree(r->rt_event);
50fe90ed
MM
1701 mb_free(r);
1702 config_del_obstacle(conf);
1703 }
1704}
1705
58740ed4
MM
1706/**
1707 * rt_commit - commit new routing table configuration
1708 * @new: new configuration
1709 * @old: original configuration or %NULL if it's boot time config
1710 *
1711 * Scan differences between @old and @new configuration and modify
1712 * the routing tables according to these changes. If @new defines a
1713 * previously unknown table, create it, if it omits a table existing
1714 * in @old, schedule it for deletion (it gets deleted when all protocols
1715 * disconnect from it by calling rt_unlock_table()), if it exists
1716 * in both configurations, leave it unchanged.
1717 */
50fe90ed
MM
1718void
1719rt_commit(struct config *new, struct config *old)
1720{
1721 struct rtable_config *o, *r;
0e02abfd 1722
50fe90ed
MM
1723 DBG("rt_commit:\n");
1724 if (old)
0e02abfd 1725 {
50fe90ed
MM
1726 WALK_LIST(o, old->tables)
1727 {
1728 rtable *ot = o->table;
1729 if (!ot->deleted)
1730 {
1731 struct symbol *sym = cf_find_symbol(o->name);
bf8558bc 1732 if (sym && sym->class == SYM_TABLE && !new->shutdown)
50fe90ed
MM
1733 {
1734 DBG("\t%s: same\n", o->name);
1735 r = sym->def;
1736 r->table = ot;
1737 ot->name = r->name;
b9626ec6 1738 ot->config = r;
26822d8f
OZ
1739 if (o->sorted != r->sorted)
1740 log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
50fe90ed
MM
1741 }
1742 else
1743 {
bf8558bc 1744 DBG("\t%s: deleted\n", o->name);
50fe90ed
MM
1745 ot->deleted = old;
1746 config_add_obstacle(old);
1747 rt_lock_table(ot);
1748 rt_unlock_table(ot);
1749 }
1750 }
1751 }
0e02abfd 1752 }
50fe90ed
MM
1753
1754 WALK_LIST(r, new->tables)
1755 if (!r->table)
1756 {
1757 rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
1758 DBG("\t%s: created\n", r->name);
b9626ec6 1759 rt_setup(rt_table_pool, t, r->name, r);
50fe90ed
MM
1760 add_tail(&routing_tables, &t->n);
1761 r->table = t;
1762 }
1763 DBG("\tdone\n");
0e02abfd 1764}
730f2e2c 1765
23ac9e9a
OZ
1766static inline void
1767do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
1768{
23ac9e9a
OZ
1769 ea_list *tmpa;
1770
1771 rte_update_lock();
094d2bdb 1772 tmpa = make_tmp_attrs(e, rte_update_pool);
00a09f3c
OZ
1773 if (type == RA_ACCEPTED)
1774 rt_notify_accepted(h, n, e, NULL, NULL, tmpa, p->refeeding ? 2 : 1);
1775 else
1776 rt_notify_basic(h, n, e, p->refeeding ? e : NULL, tmpa, p->refeeding);
23ac9e9a
OZ
1777 rte_update_unlock();
1778}
1779
58740ed4
MM
1780/**
1781 * rt_feed_baby - advertise routes to a new protocol
1782 * @p: protocol to be fed
1783 *
1784 * This function performs one pass of advertisement of routes to a newly
1785 * initialized protocol. It's called by the protocol code as long as it
1786 * has something to do. (We avoid transferring all the routes in single
1787 * pass in order not to monopolize CPU time.)
1788 */
ac5d8012
MM
1789int
1790rt_feed_baby(struct proto *p)
1791{
1792 struct announce_hook *h;
1793 struct fib_iterator *fit;
76dfda9e 1794 int max_feed = 256;
ac5d8012
MM
1795
1796 if (!p->feed_ahook) /* Need to initialize first */
1797 {
1798 if (!p->ahooks)
1799 return 1;
1800 DBG("Announcing routes to new protocol %s\n", p->name);
1801 p->feed_ahook = p->ahooks;
1802 fit = p->feed_iterator = mb_alloc(p->pool, sizeof(struct fib_iterator));
1803 goto next_hook;
1804 }
1805 fit = p->feed_iterator;
1806
1807again:
1808 h = p->feed_ahook;
1809 FIB_ITERATE_START(&h->table->fib, fit, fn)
1810 {
1811 net *n = (net *) fn;
258d0ad4 1812 rte *e = n->routes;
76dfda9e
MM
1813 if (max_feed <= 0)
1814 {
1815 FIB_ITERATE_PUT(fit, fn);
1816 return 0;
1817 }
23ac9e9a 1818
cf98be7b
OZ
1819 /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
1820
b7f3df79
OZ
1821 if ((p->accept_ra_types == RA_OPTIMAL) ||
1822 (p->accept_ra_types == RA_ACCEPTED))
cf98be7b 1823 if (rte_is_valid(e))
23ac9e9a 1824 {
0c791f87 1825 if (p->export_state != ES_FEEDING)
23ac9e9a 1826 return 1; /* In the meantime, the protocol fell down. */
b7f3df79 1827 do_feed_baby(p, p->accept_ra_types, h, n, e);
23ac9e9a
OZ
1828 max_feed--;
1829 }
1830
1831 if (p->accept_ra_types == RA_ANY)
cf98be7b 1832 for(e = n->routes; rte_is_valid(e); e = e->next)
23ac9e9a 1833 {
0c791f87 1834 if (p->export_state != ES_FEEDING)
23ac9e9a
OZ
1835 return 1; /* In the meantime, the protocol fell down. */
1836 do_feed_baby(p, RA_ANY, h, n, e);
1837 max_feed--;
1838 }
ac5d8012
MM
1839 }
1840 FIB_ITERATE_END(fn);
1841 p->feed_ahook = h->next;
1842 if (!p->feed_ahook)
1843 {
1844 mb_free(p->feed_iterator);
1845 p->feed_iterator = NULL;
1846 return 1;
1847 }
1848
1849next_hook:
1850 h = p->feed_ahook;
1851 FIB_ITERATE_INIT(fit, &h->table->fib);
1852 goto again;
1853}
1854
58740ed4
MM
1855/**
1856 * rt_feed_baby_abort - abort protocol feeding
1857 * @p: protocol
1858 *
1859 * This function is called by the protocol code when the protocol
1860 * stops or ceases to exist before the last iteration of rt_feed_baby()
1861 * has finished.
1862 */
ac5d8012
MM
1863void
1864rt_feed_baby_abort(struct proto *p)
1865{
1866 if (p->feed_ahook)
1867 {
1868 /* Unlink the iterator and exit */
1869 fit_get(&p->feed_ahook->table->fib, p->feed_iterator);
1870 p->feed_ahook = NULL;
1871 }
1872}
1873
f2b76f2c
OZ
1874
1875static inline unsigned
1876ptr_hash(void *ptr)
1877{
1878 uintptr_t p = (uintptr_t) ptr;
1879 return p ^ (p << 8) ^ (p >> 16);
1880}
1881
1882static inline unsigned
1883hc_hash(ip_addr a, rtable *dep)
1884{
1885 return (ipa_hash(a) ^ ptr_hash(dep)) & 0xffff;
1886}
1887
1888static inline void
1889hc_insert(struct hostcache *hc, struct hostentry *he)
1890{
1891 unsigned int k = he->hash_key >> hc->hash_shift;
1892 he->next = hc->hash_table[k];
1893 hc->hash_table[k] = he;
1894}
1895
1896static inline void
1897hc_remove(struct hostcache *hc, struct hostentry *he)
1898{
1899 struct hostentry **hep;
1900 unsigned int k = he->hash_key >> hc->hash_shift;
1901
1902 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
1903 *hep = he->next;
1904}
1905
1906#define HC_DEF_ORDER 10
1907#define HC_HI_MARK *4
1908#define HC_HI_STEP 2
1909#define HC_HI_ORDER 16 /* Must be at most 16 */
1910#define HC_LO_MARK /5
1911#define HC_LO_STEP 2
1912#define HC_LO_ORDER 10
1913
1914static void
1915hc_alloc_table(struct hostcache *hc, unsigned order)
1916{
1917 unsigned hsize = 1 << order;
1918 hc->hash_order = order;
1919 hc->hash_shift = 16 - order;
1920 hc->hash_max = (order >= HC_HI_ORDER) ? ~0 : (hsize HC_HI_MARK);
1921 hc->hash_min = (order <= HC_LO_ORDER) ? 0 : (hsize HC_LO_MARK);
1922
1923 hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
1924}
1925
cfe34a31 1926static void
f2b76f2c 1927hc_resize(struct hostcache *hc, unsigned new_order)
cfe34a31 1928{
f2b76f2c
OZ
1929 unsigned old_size = 1 << hc->hash_order;
1930 struct hostentry **old_table = hc->hash_table;
1931 struct hostentry *he, *hen;
1932 int i;
1933
1934 hc_alloc_table(hc, new_order);
1935 for (i = 0; i < old_size; i++)
1936 for (he = old_table[i]; he != NULL; he=hen)
1937 {
1938 hen = he->next;
1939 hc_insert(hc, he);
1940 }
1941 mb_free(old_table);
1942}
1943
1944static struct hostentry *
1b180121 1945hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
f2b76f2c
OZ
1946{
1947 struct hostentry *he = sl_alloc(hc->slab);
1948
1949 he->addr = a;
1b180121 1950 he->link = ll;
f2b76f2c
OZ
1951 he->tab = dep;
1952 he->hash_key = k;
1953 he->uc = 0;
7e95c05d 1954 he->src = NULL;
f2b76f2c
OZ
1955
1956 add_tail(&hc->hostentries, &he->ln);
1957 hc_insert(hc, he);
1958
1959 hc->hash_items++;
1960 if (hc->hash_items > hc->hash_max)
1961 hc_resize(hc, hc->hash_order + HC_HI_STEP);
1962
1963 return he;
1964}
1965
1966static void
1967hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
1968{
7e95c05d
OZ
1969 rta_free(he->src);
1970
f2b76f2c
OZ
1971 rem_node(&he->ln);
1972 hc_remove(hc, he);
1973 sl_free(hc->slab, he);
1974
1975 hc->hash_items--;
1976 if (hc->hash_items < hc->hash_min)
1977 hc_resize(hc, hc->hash_order - HC_LO_STEP);
cfe34a31
OZ
1978}
1979
1980static void
1981rt_init_hostcache(rtable *tab)
1982{
1983 struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
1984 init_list(&hc->hostentries);
f2b76f2c
OZ
1985
1986 hc->hash_items = 0;
1987 hc_alloc_table(hc, HC_DEF_ORDER);
1988 hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
1989
c477f489 1990 hc->lp = lp_new(rt_table_pool, 1008);
51762a45 1991 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
c477f489 1992
cfe34a31
OZ
1993 tab->hostcache = hc;
1994}
1995
1996static void
1997rt_free_hostcache(rtable *tab)
1998{
1999 struct hostcache *hc = tab->hostcache;
2000
2001 node *n;
2002 WALK_LIST(n, hc->hostentries)
2003 {
2004 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
7e95c05d
OZ
2005 rta_free(he->src);
2006
cfe34a31
OZ
2007 if (he->uc)
2008 log(L_ERR "Hostcache is not empty in table %s", tab->name);
2009 }
2010
f2b76f2c 2011 rfree(hc->slab);
c477f489 2012 rfree(hc->lp);
f2b76f2c 2013 mb_free(hc->hash_table);
cfe34a31
OZ
2014 mb_free(hc);
2015}
2016
2017static void
2018rt_notify_hostcache(rtable *tab, net *net)
2019{
2020 struct hostcache *hc = tab->hostcache;
2021
2022 if (tab->hcu_scheduled)
2023 return;
2024
c477f489
OZ
2025 if (trie_match_prefix(hc->trie, net->n.prefix, net->n.pxlen))
2026 rt_schedule_hcu(tab);
cfe34a31
OZ
2027}
2028
2029static int
2030if_local_addr(ip_addr a, struct iface *i)
2031{
2032 struct ifa *b;
2033
2034 WALK_LIST(b, i->addrs)
2035 if (ipa_equal(a, b->ip))
2036 return 1;
2037
2038 return 0;
2039}
2040
d1e146f2
OZ
2041static u32
2042rt_get_igp_metric(rte *rt)
2043{
ba5e5940
OZ
2044 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2045
2046 if (ea)
2047 return ea->u.data;
2048
d1e146f2 2049 rta *a = rt->attrs;
b7c48981
OF
2050
2051#ifdef CONFIG_OSPF
d1e146f2
OZ
2052 if ((a->source == RTS_OSPF) ||
2053 (a->source == RTS_OSPF_IA) ||
2054 (a->source == RTS_OSPF_EXT1))
2055 return rt->u.ospf.metric1;
b7c48981 2056#endif
d1e146f2 2057
b7c48981 2058#ifdef CONFIG_RIP
d1e146f2
OZ
2059 if (a->source == RTS_RIP)
2060 return rt->u.rip.metric;
b7c48981 2061#endif
d1e146f2
OZ
2062
2063 /* Device routes */
7e95c05d 2064 if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
d1e146f2
OZ
2065 return 0;
2066
2067 return IGP_METRIC_UNKNOWN;
2068}
2069
cfe34a31
OZ
2070static int
2071rt_update_hostentry(rtable *tab, struct hostentry *he)
2072{
7e95c05d 2073 rta *old_src = he->src;
c477f489 2074 int pxlen = 0;
cfe34a31 2075
7e95c05d
OZ
2076 /* Reset the hostentry */
2077 he->src = NULL;
2078 he->gw = IPA_NONE;
2079 he->dest = RTD_UNREACHABLE;
2080 he->igp_metric = 0;
2081
d1e146f2
OZ
2082 net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH);
2083 if (n)
cfe34a31 2084 {
cf98be7b
OZ
2085 rte *e = n->routes;
2086 rta *a = e->attrs;
c477f489 2087 pxlen = n->n.pxlen;
cfe34a31 2088
2c9033af
OZ
2089 if (a->hostentry)
2090 {
2091 /* Recursive route should not depend on another recursive route */
2092 log(L_WARN "Next hop address %I resolvable through recursive route for %I/%d",
7e95c05d
OZ
2093 he->addr, n->n.prefix, pxlen);
2094 goto done;
2c9033af 2095 }
7e95c05d
OZ
2096
2097 if (a->dest == RTD_DEVICE)
cfe34a31 2098 {
f2b76f2c 2099 if (if_local_addr(he->addr, a->iface))
cfe34a31
OZ
2100 {
2101 /* The host address is a local address, this is not valid */
2102 log(L_WARN "Next hop address %I is a local address of iface %s",
f2b76f2c 2103 he->addr, a->iface->name);
7e95c05d 2104 goto done;
cfe34a31 2105 }
7e95c05d
OZ
2106
2107 /* The host is directly reachable, use link as a gateway */
2108 he->gw = he->link;
2109 he->dest = RTD_ROUTER;
cfe34a31
OZ
2110 }
2111 else
2112 {
2113 /* The host is reachable through some route entry */
cfe34a31
OZ
2114 he->gw = a->gw;
2115 he->dest = a->dest;
2116 }
d1e146f2 2117
7e95c05d 2118 he->src = rta_clone(a);
cf98be7b 2119 he->igp_metric = rt_get_igp_metric(e);
cfe34a31
OZ
2120 }
2121
7e95c05d 2122 done:
c477f489
OZ
2123 /* Add a prefix range to the trie */
2124 trie_add_prefix(tab->hostcache->trie, he->addr, MAX_PREFIX_LENGTH, pxlen, MAX_PREFIX_LENGTH);
2125
7e95c05d
OZ
2126 rta_free(old_src);
2127 return old_src != he->src;
cfe34a31
OZ
2128}
2129
2130static void
2131rt_update_hostcache(rtable *tab)
2132{
2133 struct hostcache *hc = tab->hostcache;
2134 struct hostentry *he;
2135 node *n, *x;
2136
c477f489
OZ
2137 /* Reset the trie */
2138 lp_flush(hc->lp);
51762a45 2139 hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
c477f489 2140
cfe34a31
OZ
2141 WALK_LIST_DELSAFE(n, x, hc->hostentries)
2142 {
2143 he = SKIP_BACK(struct hostentry, ln, n);
2144 if (!he->uc)
2145 {
f2b76f2c 2146 hc_delete_hostentry(hc, he);
cfe34a31
OZ
2147 continue;
2148 }
2149
2150 if (rt_update_hostentry(tab, he))
2151 rt_schedule_nhu(he->tab);
2152 }
2153
2154 tab->hcu_scheduled = 0;
2155}
2156
2157static struct hostentry *
094d2bdb 2158rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
cfe34a31
OZ
2159{
2160 struct hostentry *he;
2161
2162 if (!tab->hostcache)
2163 rt_init_hostcache(tab);
2164
f2b76f2c
OZ
2165 unsigned int k = hc_hash(a, dep);
2166 struct hostcache *hc = tab->hostcache;
2167 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2168 if (ipa_equal(he->addr, a) && (he->tab == dep))
2169 return he;
cfe34a31 2170
1b180121 2171 he = hc_new_hostentry(hc, a, ll, dep, k);
f2b76f2c 2172 rt_update_hostentry(tab, he);
cfe34a31
OZ
2173 return he;
2174}
2175
2176void
1b180121 2177rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll)
cfe34a31 2178{
094d2bdb 2179 rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep));
cfe34a31
OZ
2180}
2181
094d2bdb 2182
730f2e2c
MM
2183/*
2184 * CLI commands
2185 */
2186
2187static void
cfd46ee4 2188rt_format_via(rte *e, byte *via)
730f2e2c 2189{
730f2e2c
MM
2190 rta *a = e->attrs;
2191
2192 switch (a->dest)
2193 {
2194 case RTD_ROUTER: bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
2195 case RTD_DEVICE: bsprintf(via, "dev %s", a->iface->name); break;
2196 case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break;
2197 case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break;
2198 case RTD_PROHIBIT: bsprintf(via, "prohibited"); break;
7e95c05d 2199 case RTD_MULTIPATH: bsprintf(via, "multipath"); break;
730f2e2c
MM
2200 default: bsprintf(via, "???");
2201 }
cfd46ee4
MM
2202}
2203
2204static void
ce1da96e 2205rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa)
cfd46ee4 2206{
4ca93a50 2207 byte via[STD_ADDRESS_P_LENGTH+32], from[STD_ADDRESS_P_LENGTH+8];
c37e7851 2208 byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
cfd46ee4 2209 rta *a = e->attrs;
5a56f27c 2210 int primary = (e->net->routes == e);
32f95476 2211 int sync_error = (e->net->n.flags & KRF_SYNC_ERROR);
094d2bdb 2212 void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs);
7e95c05d 2213 struct mpnh *nh;
cfd46ee4
MM
2214
2215 rt_format_via(e, via);
c37e7851 2216 tm_format_datetime(tm, &config->tf_route, e->lastmod);
730f2e2c
MM
2217 if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw))
2218 bsprintf(from, " from %I", a->from);
2219 else
2220 from[0] = 0;
094d2bdb
OZ
2221
2222 get_route_info = a->src->proto->proto->get_route_info;
2223 if (get_route_info || d->verbose)
ce1da96e
MM
2224 {
2225 /* Need to normalize the extended attributes */
2226 ea_list *t = tmpa;
2227 t = ea_append(t, a->eattrs);
2228 tmpa = alloca(ea_scan(t));
2229 ea_merge(t, tmpa);
2f711231 2230 ea_sort(tmpa);
ce1da96e 2231 }
094d2bdb
OZ
2232 if (get_route_info)
2233 get_route_info(e, info, tmpa);
730f2e2c
MM
2234 else
2235 bsprintf(info, " (%d)", e->pref);
094d2bdb 2236 cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->src->proto->name,
32f95476 2237 tm, from, primary ? (sync_error ? " !" : " *") : "", info);
7e95c05d
OZ
2238 for (nh = a->nexthops; nh; nh = nh->next)
2239 cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
730f2e2c 2240 if (d->verbose)
ce1da96e 2241 rta_show(c, a, tmpa);
730f2e2c
MM
2242}
2243
2244static void
2245rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
2246{
2247 rte *e, *ee;
2248 byte ia[STD_ADDRESS_P_LENGTH+8];
c865cae3
OZ
2249 struct ea_list *tmpa;
2250 struct announce_hook *a = NULL;
2251 int first = 1;
2252 int pass = 0;
730f2e2c
MM
2253
2254 bsprintf(ia, "%I/%d", n->n.prefix, n->n.pxlen);
cf98be7b 2255
c865cae3
OZ
2256 if (d->export_mode)
2257 {
7aa80901
OZ
2258 if (! d->export_protocol->rt_notify)
2259 return;
2260
c865cae3
OZ
2261 a = proto_find_announce_hook(d->export_protocol, d->table);
2262 if (!a)
2263 return;
2264 }
2265
2266 for (e = n->routes; e; e = e->next)
730f2e2c 2267 {
15550957 2268 if (rte_is_filtered(e) != d->filtered)
cf98be7b
OZ
2269 continue;
2270
23693958 2271 d->rt_counter++;
c865cae3
OZ
2272 d->net_counter += first;
2273 first = 0;
2274
2275 if (pass)
2276 continue;
2277
730f2e2c
MM
2278 ee = e;
2279 rte_update_lock(); /* We use the update buffer for filtering */
094d2bdb 2280 tmpa = make_tmp_attrs(e, rte_update_pool);
c865cae3
OZ
2281
2282 if (d->export_mode)
ce1da96e 2283 {
c865cae3
OZ
2284 struct proto *ep = d->export_protocol;
2285 int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
2286
2287 if (ep->accept_ra_types == RA_OPTIMAL)
2288 pass = 1;
2289
2290 if (ic < 0)
2291 goto skip;
2292
7aa80901 2293 if (d->export_mode > RSEM_PREEXPORT)
ce1da96e 2294 {
c865cae3
OZ
2295 /*
2296 * FIXME - This shows what should be exported according to current
2297 * filters, but not what was really exported. 'configure soft'
2298 * command may change the export filter and do not update routes.
2299 */
7aa80901
OZ
2300 int do_export = (ic > 0) ||
2301 (f_run(a->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
c865cae3 2302
7aa80901 2303 if (do_export != (d->export_mode == RSEM_EXPORT))
c865cae3
OZ
2304 goto skip;
2305
7aa80901 2306 if ((d->export_mode == RSEM_EXPORT) && (ep->accept_ra_types == RA_ACCEPTED))
c865cae3 2307 pass = 1;
ce1da96e
MM
2308 }
2309 }
c865cae3
OZ
2310
2311 if (d->show_protocol && (d->show_protocol != e->attrs->src->proto))
2312 goto skip;
2313
2314 if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)
2315 goto skip;
2316
2317 d->show_counter++;
2318 if (d->stats < 2)
2319 rt_show_rte(c, ia, e, d, tmpa);
2320 ia[0] = 0;
2321
2322 skip:
730f2e2c 2323 if (e != ee)
5e9bdac2
OZ
2324 {
2325 rte_free(e);
2326 e = ee;
2327 }
730f2e2c 2328 rte_update_unlock();
c865cae3 2329
0117d004 2330 if (d->primary_only)
ce1da96e 2331 break;
730f2e2c
MM
2332 }
2333}
2334
2335static void
2336rt_show_cont(struct cli *c)
2337{
2338 struct rt_show_data *d = c->rover;
f098e072
MM
2339#ifdef DEBUGGING
2340 unsigned max = 4;
2341#else
2342 unsigned max = 64;
2343#endif
730f2e2c
MM
2344 struct fib *fib = &d->table->fib;
2345 struct fib_iterator *it = &d->fit;
2346
2347 FIB_ITERATE_START(fib, it, f)
2348 {
2349 net *n = (net *) f;
ce1da96e
MM
2350 if (d->running_on_config && d->running_on_config != config)
2351 {
2352 cli_printf(c, 8004, "Stopped due to reconfiguration");
2353 goto done;
2354 }
0c791f87 2355 if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN))
ce1da96e
MM
2356 {
2357 cli_printf(c, 8005, "Protocol is down");
2358 goto done;
2359 }
730f2e2c
MM
2360 if (!max--)
2361 {
2362 FIB_ITERATE_PUT(it, f);
2363 return;
2364 }
2365 rt_show_net(c, n, d);
2366 }
2367 FIB_ITERATE_END(f);
23693958
MM
2368 if (d->stats)
2369 cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter);
2370 else
2371 cli_printf(c, 0, "");
ce1da96e 2372done:
730f2e2c
MM
2373 c->cont = c->cleanup = NULL;
2374}
2375
2376static void
2377rt_show_cleanup(struct cli *c)
2378{
2379 struct rt_show_data *d = c->rover;
2380
2381 /* Unlink the iterator */
2382 fit_get(&d->table->fib, &d->fit);
2383}
2384
2385void
2386rt_show(struct rt_show_data *d)
2387{
730f2e2c
MM
2388 net *n;
2389
e667622a 2390 /* Default is either a master table or a table related to a respective protocol */
c865cae3
OZ
2391 if (!d->table && d->export_protocol) d->table = d->export_protocol->table;
2392 if (!d->table && d->show_protocol) d->table = d->show_protocol->table;
e667622a
OF
2393 if (!d->table) d->table = config->master_rtc->table;
2394
c865cae3
OZ
2395 /* Filtered routes are neither exported nor have sensible ordering */
2396 if (d->filtered && (d->export_mode || d->primary_only))
2397 cli_msg(0, "");
2398
730f2e2c
MM
2399 if (d->pxlen == 256)
2400 {
2401 FIB_ITERATE_INIT(&d->fit, &d->table->fib);
2402 this_cli->cont = rt_show_cont;
2403 this_cli->cleanup = rt_show_cleanup;
2404 this_cli->rover = d;
2405 }
2406 else
2407 {
9449c91a 2408 if (d->show_for)
d1e146f2 2409 n = net_route(d->table, d->prefix, d->pxlen);
9449c91a 2410 else
d1e146f2 2411 n = net_find(d->table, d->prefix, d->pxlen);
0da562a7 2412
730f2e2c 2413 if (n)
0da562a7
OZ
2414 rt_show_net(this_cli, n, d);
2415
2416 if (d->rt_counter)
2417 cli_msg(0, "");
730f2e2c
MM
2418 else
2419 cli_msg(8001, "Network not in table");
2420 }
2421}
3ce8c610
MM
2422
2423/*
2424 * Documentation for functions declared inline in route.h
2425 */
2426#if 0
2427
2428/**
2429 * net_find - find a network entry
2430 * @tab: a routing table
2431 * @addr: address of the network
2432 * @len: length of the network prefix
2433 *
2434 * net_find() looks up the given network in routing table @tab and
2435 * returns a pointer to its &net entry or %NULL if no such network
2436 * exists.
2437 */
2438static inline net *net_find(rtable *tab, ip_addr addr, unsigned len)
2439{ DUMMY; }
2440
2441/**
2442 * net_get - obtain a network entry
2443 * @tab: a routing table
2444 * @addr: address of the network
2445 * @len: length of the network prefix
2446 *
2447 * net_get() looks up the given network in routing table @tab and
2448 * returns a pointer to its &net entry. If no such entry exists, it's
2449 * created.
2450 */
2451static inline net *net_get(rtable *tab, ip_addr addr, unsigned len)
2452{ DUMMY; }
2453
2454/**
2455 * rte_cow - copy a route for writing
2456 * @r: a route entry to be copied
2457 *
2458 * rte_cow() takes a &rte and prepares it for modification. The exact action
2459 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2460 * just returned unchanged, else a new temporary entry with the same contents
2461 * is created.
2462 *
2463 * The primary use of this function is inside the filter machinery -- when
2464 * a filter wants to modify &rte contents (to change the preference or to
2465 * attach another set of attributes), it must ensure that the &rte is not
2466 * shared with anyone else (and especially that it isn't stored in any routing
2467 * table).
2468 *
2e9b2421 2469 * Result: a pointer to the new writable &rte.
3ce8c610
MM
2470 */
2471static inline rte * rte_cow(rte *r)
2472{ DUMMY; }
2473
2474#endif