]> git.ipfire.org Git - thirdparty/bird.git/blob - nest/rt-table.c
Uninitialized list nodes fixes
[thirdparty/bird.git] / nest / rt-table.c
1 /*
2 * BIRD -- Routing Tables
3 *
4 * (c) 1998--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Routing tables
11 *
12 * Routing tables are probably the most important structures BIRD uses. They
13 * hold all the information about known networks, the associated routes and
14 * their attributes.
15 *
16 * There are multiple routing tables (a primary one together with any
17 * number of secondary ones if requested by the configuration). Each table
18 * is basically a FIB containing entries describing the individual
19 * destination networks. For each network (represented by structure &net),
20 * there is a one-way linked list of route entries (&rte), the first entry
21 * on the list being the best one (i.e., the one we currently use
22 * for routing), the order of the other ones is undetermined.
23 *
24 * The &rte contains information specific to the route (preference, protocol
25 * metrics, time of last modification etc.) and a pointer to a &rta structure
26 * (see the route attribute module for a precise explanation) holding the
27 * remaining route attributes which are expected to be shared by multiple
28 * routes in order to conserve memory.
29 */
30
31 #undef LOCAL_DEBUG
32
33 #include "nest/bird.h"
34 #include "nest/route.h"
35 #include "nest/protocol.h"
36 #include "nest/iface.h"
37 #include "lib/resource.h"
38 #include "lib/event.h"
39 #include "lib/string.h"
40 #include "conf/conf.h"
41 #include "filter/filter.h"
42 #include "filter/data.h"
43 #include "lib/hash.h"
44 #include "lib/string.h"
45 #include "lib/alloca.h"
46
47 #ifdef CONFIG_BGP
48 #include "proto/bgp/bgp.h"
49 #endif
50
51 pool *rt_table_pool;
52
53 static slab *rte_slab;
54 static linpool *rte_update_pool;
55
56 list routing_tables;
57
58 static void rt_free_hostcache(rtable *tab);
59 static void rt_notify_hostcache(rtable *tab, net *net);
60 static void rt_update_hostcache(rtable *tab);
61 static void rt_next_hop_update(rtable *tab);
62 static inline void rt_prune_table(rtable *tab);
63
64
65 /* Like fib_route(), but skips empty net entries */
66 static inline void *
67 net_route_ip4(rtable *t, net_addr_ip4 *n)
68 {
69 net *r;
70
71 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
72 {
73 n->pxlen--;
74 ip4_clrbit(&n->prefix, n->pxlen);
75 }
76
77 return r;
78 }
79
80 static inline void *
81 net_route_ip6(rtable *t, net_addr_ip6 *n)
82 {
83 net *r;
84
85 while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
86 {
87 n->pxlen--;
88 ip6_clrbit(&n->prefix, n->pxlen);
89 }
90
91 return r;
92 }
93
94 static inline void *
95 net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n)
96 {
97 struct fib_node *fn;
98
99 while (1)
100 {
101 net *best = NULL;
102 int best_pxlen = 0;
103
104 /* We need to do dst first matching. Since sadr addresses are hashed on dst
105 prefix only, find the hash table chain and go through it to find the
106 match with the smallest matching src prefix. */
107 for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next)
108 {
109 net_addr_ip6_sadr *a = (void *) fn->addr;
110
111 if (net_equal_dst_ip6_sadr(n, a) &&
112 net_in_net_src_ip6_sadr(n, a) &&
113 (a->src_pxlen >= best_pxlen))
114 {
115 best = fib_node_to_user(&t->fib, fn);
116 best_pxlen = a->src_pxlen;
117 }
118 }
119
120 if (best)
121 return best;
122
123 if (!n->dst_pxlen)
124 break;
125
126 n->dst_pxlen--;
127 ip6_clrbit(&n->dst_prefix, n->dst_pxlen);
128 }
129
130 return NULL;
131 }
132
133 void *
134 net_route(rtable *tab, const net_addr *n)
135 {
136 ASSERT(tab->addr_type == n->type);
137
138 net_addr *n0 = alloca(n->length);
139 net_copy(n0, n);
140
141 switch (n->type)
142 {
143 case NET_IP4:
144 case NET_VPN4:
145 case NET_ROA4:
146 return net_route_ip4(tab, (net_addr_ip4 *) n0);
147
148 case NET_IP6:
149 case NET_VPN6:
150 case NET_ROA6:
151 return net_route_ip6(tab, (net_addr_ip6 *) n0);
152
153 case NET_IP6_SADR:
154 return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0);
155
156 default:
157 return NULL;
158 }
159 }
160
161
162 static int
163 net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
164 {
165 struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
166 struct fib_node *fn;
167 int anything = 0;
168
169 while (1)
170 {
171 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
172 {
173 net_addr_roa4 *roa = (void *) fn->addr;
174 net *r = fib_node_to_user(&tab->fib, fn);
175
176 if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
177 {
178 anything = 1;
179 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
180 return ROA_VALID;
181 }
182 }
183
184 if (n.pxlen == 0)
185 break;
186
187 n.pxlen--;
188 ip4_clrbit(&n.prefix, n.pxlen);
189 }
190
191 return anything ? ROA_INVALID : ROA_UNKNOWN;
192 }
193
194 static int
195 net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
196 {
197 struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
198 struct fib_node *fn;
199 int anything = 0;
200
201 while (1)
202 {
203 for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
204 {
205 net_addr_roa6 *roa = (void *) fn->addr;
206 net *r = fib_node_to_user(&tab->fib, fn);
207
208 if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
209 {
210 anything = 1;
211 if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
212 return ROA_VALID;
213 }
214 }
215
216 if (n.pxlen == 0)
217 break;
218
219 n.pxlen--;
220 ip6_clrbit(&n.prefix, n.pxlen);
221 }
222
223 return anything ? ROA_INVALID : ROA_UNKNOWN;
224 }
225
226 /**
227 * roa_check - check validity of route origination in a ROA table
228 * @tab: ROA table
229 * @n: network prefix to check
230 * @asn: AS number of network prefix
231 *
232 * Implements RFC 6483 route validation for the given network prefix. The
233 * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
234 * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
235 * a candidate ROA with matching ASN and maxlen field greater than or equal to
236 * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
237 * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
238 * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
239 * must have type NET_IP4 or NET_IP6, respectively.
240 */
241 int
242 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
243 {
244 if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
245 return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
246 else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
247 return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
248 else
249 return ROA_UNKNOWN; /* Should not happen */
250 }
251
252 /**
253 * rte_find - find a route
254 * @net: network node
255 * @src: route source
256 *
257 * The rte_find() function returns a route for destination @net
258 * which is from route source @src.
259 */
260 rte *
261 rte_find(net *net, struct rte_src *src)
262 {
263 rte *e = net->routes;
264
265 while (e && e->attrs->src != src)
266 e = e->next;
267 return e;
268 }
269
270 /**
271 * rte_get_temp - get a temporary &rte
272 * @a: attributes to assign to the new route (a &rta; in case it's
273 * un-cached, rte_update() will create a cached copy automatically)
274 *
275 * Create a temporary &rte and bind it with the attributes @a.
276 * Also set route preference to the default preference set for
277 * the protocol.
278 */
279 rte *
280 rte_get_temp(rta *a)
281 {
282 rte *e = sl_alloc(rte_slab);
283
284 e->attrs = a;
285 e->id = 0;
286 e->flags = 0;
287 e->pref = 0;
288 return e;
289 }
290
291 rte *
292 rte_do_cow(rte *r)
293 {
294 rte *e = sl_alloc(rte_slab);
295
296 memcpy(e, r, sizeof(rte));
297 e->attrs = rta_clone(r->attrs);
298 e->flags = 0;
299 return e;
300 }
301
302 /**
303 * rte_cow_rta - get a private writable copy of &rte with writable &rta
304 * @r: a route entry to be copied
305 * @lp: a linpool from which to allocate &rta
306 *
307 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
308 * modification. There are three possibilities: First, both &rte and &rta are
309 * private copies, in that case they are returned unchanged. Second, &rte is
310 * private copy, but &rta is cached, in that case &rta is duplicated using
311 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
312 * both structures are duplicated by rte_do_cow() and rta_do_cow().
313 *
314 * Note that in the second case, cached &rta loses one reference, while private
315 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
316 * nexthops, ...) with it. To work properly, original shared &rta should have
317 * another reference during the life of created private copy.
318 *
319 * Result: a pointer to the new writable &rte with writable &rta.
320 */
321 rte *
322 rte_cow_rta(rte *r, linpool *lp)
323 {
324 if (!rta_is_cached(r->attrs))
325 return r;
326
327 r = rte_cow(r);
328 rta *a = rta_do_cow(r->attrs, lp);
329 rta_free(r->attrs);
330 r->attrs = a;
331 return r;
332 }
333
334
335 /**
336 * rte_init_tmp_attrs - initialize temporary ea_list for route
337 * @r: route entry to be modified
338 * @lp: linpool from which to allocate attributes
339 * @max: maximum number of added temporary attribus
340 *
341 * This function is supposed to be called from make_tmp_attrs() and
342 * store_tmp_attrs() hooks before rte_make_tmp_attr() / rte_store_tmp_attr()
343 * functions. It allocates &ea_list with length for @max items for temporary
344 * attributes and puts it on top of eattrs stack.
345 */
346 void
347 rte_init_tmp_attrs(rte *r, linpool *lp, uint max)
348 {
349 struct ea_list *e = lp_alloc(lp, sizeof(struct ea_list) + max * sizeof(eattr));
350
351 e->next = r->attrs->eattrs;
352 e->flags = EALF_SORTED | EALF_TEMP;
353 e->count = 0;
354
355 r->attrs->eattrs = e;
356 }
357
358 /**
359 * rte_make_tmp_attr - make temporary eattr from private route fields
360 * @r: route entry to be modified
361 * @id: attribute ID
362 * @type: attribute type
363 * @val: attribute value (u32 or adata ptr)
364 *
365 * This function is supposed to be called from make_tmp_attrs() hook for
366 * each temporary attribute, after temporary &ea_list was initialized by
367 * rte_init_tmp_attrs(). It checks whether temporary attribute is supposed to
368 * be defined (based on route pflags) and if so then it fills &eattr field in
369 * preallocated temporary &ea_list on top of route @r eattrs stack.
370 *
371 * Note that it may require free &eattr in temporary &ea_list, so it must not be
372 * called more times than @max argument of rte_init_tmp_attrs().
373 */
374 void
375 rte_make_tmp_attr(rte *r, uint id, uint type, uintptr_t val)
376 {
377 if (r->pflags & EA_ID_FLAG(id))
378 {
379 ea_list *e = r->attrs->eattrs;
380 eattr *a = &e->attrs[e->count++];
381 a->id = id;
382 a->type = type;
383 a->flags = 0;
384
385 if (type & EAF_EMBEDDED)
386 a->u.data = (u32) val;
387 else
388 a->u.ptr = (struct adata *) val;
389 }
390 }
391
392 /**
393 * rte_store_tmp_attr - store temporary eattr to private route fields
394 * @r: route entry to be modified
395 * @id: attribute ID
396 *
397 * This function is supposed to be called from store_tmp_attrs() hook for
398 * each temporary attribute, after temporary &ea_list was initialized by
399 * rte_init_tmp_attrs(). It checks whether temporary attribute is defined in
400 * route @r eattrs stack, updates route pflags accordingly, undefines it by
401 * filling &eattr field in preallocated temporary &ea_list on top of the eattrs
402 * stack, and returns the value. Caller is supposed to store it in the
403 * appropriate private field.
404 *
405 * Note that it may require free &eattr in temporary &ea_list, so it must not be
406 * called more times than @max argument of rte_init_tmp_attrs()
407 */
408 uintptr_t
409 rte_store_tmp_attr(rte *r, uint id)
410 {
411 ea_list *e = r->attrs->eattrs;
412 eattr *a = ea_find(e->next, id);
413
414 if (a)
415 {
416 e->attrs[e->count++] = (struct eattr) { .id = id, .type = EAF_TYPE_UNDEF };
417 r->pflags |= EA_ID_FLAG(id);
418 return (a->type & EAF_EMBEDDED) ? a->u.data : (uintptr_t) a->u.ptr;
419 }
420 else
421 {
422 r->pflags &= ~EA_ID_FLAG(id);
423 return 0;
424 }
425 }
426
427 /**
428 * rte_make_tmp_attrs - prepare route by adding all relevant temporary route attributes
429 * @r: route entry to be modified (may be replaced if COW)
430 * @lp: linpool from which to allocate attributes
431 * @old_attrs: temporary ref to old &rta (may be NULL)
432 *
433 * This function expands privately stored protocol-dependent route attributes
434 * to a uniform &eattr / &ea_list representation. It is essentially a wrapper
435 * around protocol make_tmp_attrs() hook, which does some additional work like
436 * ensuring that route @r is writable.
437 *
438 * The route @r may be read-only (with %REF_COW flag), in that case rw copy is
439 * obtained by rte_cow() and @r is replaced. If @rte is originally rw, it may be
440 * directly modified (and it is never copied).
441 *
442 * If the @old_attrs ptr is supplied, the function obtains another reference of
443 * old cached &rta, that is necessary in some cases (see rte_cow_rta() for
444 * details). It is freed by rte_store_tmp_attrs(), or manually by rta_free().
445 *
446 * Generally, if caller ensures that @r is read-only (e.g. in route export) then
447 * it may ignore @old_attrs (and set it to NULL), but must handle replacement of
448 * @r. If caller ensures that @r is writable (e.g. in route import) then it may
449 * ignore replacement of @r, but it must handle @old_attrs.
450 */
451 void
452 rte_make_tmp_attrs(rte **r, linpool *lp, rta **old_attrs)
453 {
454 void (*make_tmp_attrs)(rte *r, linpool *lp);
455 make_tmp_attrs = (*r)->attrs->src->proto->make_tmp_attrs;
456
457 if (!make_tmp_attrs)
458 return;
459
460 /* We may need to keep ref to old attributes, will be freed in rte_store_tmp_attrs() */
461 if (old_attrs)
462 *old_attrs = rta_is_cached((*r)->attrs) ? rta_clone((*r)->attrs) : NULL;
463
464 *r = rte_cow_rta(*r, lp);
465 make_tmp_attrs(*r, lp);
466 }
467
468 /**
469 * rte_store_tmp_attrs - store temporary route attributes back to private route fields
470 * @r: route entry to be modified
471 * @lp: linpool from which to allocate attributes
472 * @old_attrs: temporary ref to old &rta
473 *
474 * This function stores temporary route attributes that were expanded by
475 * rte_make_tmp_attrs() back to private route fields and also undefines them.
476 * It is essentially a wrapper around protocol store_tmp_attrs() hook, which
477 * does some additional work like shortcut if there is no change and cleanup
478 * of @old_attrs reference obtained by rte_make_tmp_attrs().
479 */
480 static void
481 rte_store_tmp_attrs(rte *r, linpool *lp, rta *old_attrs)
482 {
483 void (*store_tmp_attrs)(rte *rt, linpool *lp);
484 store_tmp_attrs = r->attrs->src->proto->store_tmp_attrs;
485
486 if (!store_tmp_attrs)
487 return;
488
489 ASSERT(!rta_is_cached(r->attrs));
490
491 /* If there is no new ea_list, we just skip the temporary ea_list */
492 ea_list *ea = r->attrs->eattrs;
493 if (ea && (ea->flags & EALF_TEMP))
494 r->attrs->eattrs = ea->next;
495 else
496 store_tmp_attrs(r, lp);
497
498 /* Free ref we got in rte_make_tmp_attrs(), have to do rta_lookup() first */
499 r->attrs = rta_lookup(r->attrs);
500 rta_free(old_attrs);
501 }
502
503
504 static int /* Actually better or at least as good as */
505 rte_better(rte *new, rte *old)
506 {
507 int (*better)(rte *, rte *);
508
509 if (!rte_is_valid(old))
510 return 1;
511 if (!rte_is_valid(new))
512 return 0;
513
514 if (new->pref > old->pref)
515 return 1;
516 if (new->pref < old->pref)
517 return 0;
518 if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
519 {
520 /*
521 * If the user has configured protocol preferences, so that two different protocols
522 * have the same preference, try to break the tie by comparing addresses. Not too
523 * useful, but keeps the ordering of routes unambiguous.
524 */
525 return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
526 }
527 if (better = new->attrs->src->proto->rte_better)
528 return better(new, old);
529 return 0;
530 }
531
532 static int
533 rte_mergable(rte *pri, rte *sec)
534 {
535 int (*mergable)(rte *, rte *);
536
537 if (!rte_is_valid(pri) || !rte_is_valid(sec))
538 return 0;
539
540 if (pri->pref != sec->pref)
541 return 0;
542
543 if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
544 return 0;
545
546 if (mergable = pri->attrs->src->proto->rte_mergable)
547 return mergable(pri, sec);
548
549 return 0;
550 }
551
552 static void
553 rte_trace(struct proto *p, rte *e, int dir, char *msg)
554 {
555 log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest));
556 }
557
558 static inline void
559 rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
560 {
561 if (p->debug & flag)
562 rte_trace(p, e, '>', msg);
563 }
564
565 static inline void
566 rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
567 {
568 if (p->debug & flag)
569 rte_trace(p, e, '<', msg);
570 }
571
572 static rte *
573 export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent)
574 {
575 struct proto *p = c->proto;
576 const struct filter *filter = c->out_filter;
577 struct proto_stats *stats = &c->stats;
578 rte *rt;
579 int v;
580
581 rt = rt0;
582 *rt_free = NULL;
583
584 v = p->preexport ? p->preexport(p, &rt, pool) : 0;
585 if (v < 0)
586 {
587 if (silent)
588 goto reject;
589
590 stats->exp_updates_rejected++;
591 if (v == RIC_REJECT)
592 rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
593 goto reject;
594 }
595 if (v > 0)
596 {
597 if (!silent)
598 rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
599 goto accept;
600 }
601
602 rte_make_tmp_attrs(&rt, pool, NULL);
603
604 v = filter && ((filter == FILTER_REJECT) ||
605 (f_run(filter, &rt, pool,
606 (silent ? FF_SILENT : 0)) > F_ACCEPT));
607 if (v)
608 {
609 if (silent)
610 goto reject;
611
612 stats->exp_updates_filtered++;
613 rte_trace_out(D_FILTERS, p, rt, "filtered out");
614 goto reject;
615 }
616
617 accept:
618 if (rt != rt0)
619 *rt_free = rt;
620 return rt;
621
622 reject:
623 /* Discard temporary rte */
624 if (rt != rt0)
625 rte_free(rt);
626 return NULL;
627 }
628
629 static inline rte *
630 export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent)
631 {
632 return export_filter_(c, rt0, rt_free, rte_update_pool, silent);
633 }
634
635 static void
636 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed)
637 {
638 struct proto *p = c->proto;
639 struct proto_stats *stats = &c->stats;
640
641 if (refeed && new)
642 c->refeed_count++;
643
644 /* Apply export limit */
645 struct channel_limit *l = &c->out_limit;
646 if (l->action && !old && new)
647 {
648 if (stats->exp_routes >= l->limit)
649 channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
650
651 if (l->state == PLS_BLOCKED)
652 {
653 stats->exp_updates_rejected++;
654 rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
655 return;
656 }
657 }
658
659 /* Apply export table */
660 if (c->out_table && !rte_update_out(c, net->n.addr, new, old, refeed))
661 return;
662
663 if (new)
664 stats->exp_updates_accepted++;
665 else
666 stats->exp_withdraws_accepted++;
667
668 if (old)
669 {
670 bmap_clear(&c->export_map, old->id);
671 stats->exp_routes--;
672 }
673
674 if (new)
675 {
676 bmap_set(&c->export_map, new->id);
677 stats->exp_routes++;
678 }
679
680 if (p->debug & D_ROUTES)
681 {
682 if (new && old)
683 rte_trace_out(D_ROUTES, p, new, "replaced");
684 else if (new)
685 rte_trace_out(D_ROUTES, p, new, "added");
686 else if (old)
687 rte_trace_out(D_ROUTES, p, old, "removed");
688 }
689
690 p->rt_notify(p, c, net, new, old);
691 }
692
693 static void
694 rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed)
695 {
696 // struct proto *p = c->proto;
697 rte *new_free = NULL;
698
699 if (new)
700 c->stats.exp_updates_received++;
701 else
702 c->stats.exp_withdraws_received++;
703
704 if (new)
705 new = export_filter(c, new, &new_free, 0);
706
707 if (old && !bmap_test(&c->export_map, old->id))
708 old = NULL;
709
710 if (!new && !old)
711 return;
712
713 do_rt_notify(c, net, new, old, refeed);
714
715 /* Discard temporary rte */
716 if (new_free)
717 rte_free(new_free);
718 }
719
720 static void
721 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed)
722 {
723 // struct proto *p = c->proto;
724 rte *new_best = NULL;
725 rte *old_best = NULL;
726 rte *new_free = NULL;
727 int new_first = 0;
728
729 /*
730 * We assume that there are no changes in net route order except (added)
731 * new_changed and (removed) old_changed. Therefore, the function is not
732 * compatible with deterministic_med (where nontrivial reordering can happen
733 * as a result of a route change) and with recomputation of recursive routes
734 * due to next hop update (where many routes can be changed in one step).
735 *
736 * Note that we need this assumption just for optimizations, we could just
737 * run full new_best recomputation otherwise.
738 *
739 * There are three cases:
740 * feed or old_best is old_changed -> we need to recompute new_best
741 * old_best is before new_changed -> new_best is old_best, ignore
742 * old_best is after new_changed -> try new_changed, otherwise old_best
743 */
744
745 if (net->routes)
746 c->stats.exp_updates_received++;
747 else
748 c->stats.exp_withdraws_received++;
749
750 /* Find old_best - either old_changed, or route for net->routes */
751 if (old_changed && bmap_test(&c->export_map, old_changed->id))
752 old_best = old_changed;
753 else
754 {
755 for (rte *r = net->routes; rte_is_valid(r); r = r->next)
756 {
757 if (bmap_test(&c->export_map, r->id))
758 {
759 old_best = r;
760 break;
761 }
762
763 /* Note if new_changed found before old_best */
764 if (r == new_changed)
765 new_first = 1;
766 }
767 }
768
769 /* Find new_best */
770 if ((new_changed == old_changed) || (old_best == old_changed))
771 {
772 /* Feed or old_best changed -> find first accepted by filters */
773 for (rte *r = net->routes; rte_is_valid(r); r = r->next)
774 if (new_best = export_filter(c, r, &new_free, 0))
775 break;
776 }
777 else
778 {
779 /* Other cases -> either new_changed, or old_best (and nothing changed) */
780 if (new_first && (new_changed = export_filter(c, new_changed, &new_free, 0)))
781 new_best = new_changed;
782 else
783 return;
784 }
785
786 if (!new_best && !old_best)
787 return;
788
789 do_rt_notify(c, net, new_best, old_best, refeed);
790
791 /* Discard temporary rte */
792 if (new_free)
793 rte_free(new_free);
794 }
795
796
797 static struct nexthop *
798 nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
799 {
800 return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
801 }
802
803 rte *
804 rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent)
805 {
806 // struct proto *p = c->proto;
807 struct nexthop *nhs = NULL;
808 rte *best0, *best, *rt0, *rt, *tmp;
809
810 best0 = net->routes;
811 *rt_free = NULL;
812
813 if (!rte_is_valid(best0))
814 return NULL;
815
816 best = export_filter_(c, best0, rt_free, pool, silent);
817
818 if (!best || !rte_is_reachable(best))
819 return best;
820
821 for (rt0 = best0->next; rt0; rt0 = rt0->next)
822 {
823 if (!rte_mergable(best0, rt0))
824 continue;
825
826 rt = export_filter_(c, rt0, &tmp, pool, 1);
827
828 if (!rt)
829 continue;
830
831 if (rte_is_reachable(rt))
832 nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
833
834 if (tmp)
835 rte_free(tmp);
836 }
837
838 if (nhs)
839 {
840 nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
841
842 if (nhs->next)
843 {
844 best = rte_cow_rta(best, pool);
845 nexthop_link(best->attrs, nhs);
846 }
847 }
848
849 if (best != best0)
850 *rt_free = best;
851
852 return best;
853 }
854
855
856 static void
857 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
858 rte *new_best, rte *old_best, int refeed)
859 {
860 // struct proto *p = c->proto;
861 rte *new_free = NULL;
862
863 /* We assume that all rte arguments are either NULL or rte_is_valid() */
864
865 /* This check should be done by the caller */
866 if (!new_best && !old_best)
867 return;
868
869 /* Check whether the change is relevant to the merged route */
870 if ((new_best == old_best) &&
871 (new_changed != old_changed) &&
872 !rte_mergable(new_best, new_changed) &&
873 !rte_mergable(old_best, old_changed))
874 return;
875
876 if (new_best)
877 c->stats.exp_updates_received++;
878 else
879 c->stats.exp_withdraws_received++;
880
881 /* Prepare new merged route */
882 if (new_best)
883 new_best = rt_export_merged(c, net, &new_free, rte_update_pool, 0);
884
885 /* Check old merged route */
886 if (old_best && !bmap_test(&c->export_map, old_best->id))
887 old_best = NULL;
888
889 if (!new_best && !old_best)
890 return;
891
892 do_rt_notify(c, net, new_best, old_best, refeed);
893
894 /* Discard temporary rte */
895 if (new_free)
896 rte_free(new_free);
897 }
898
899
900 /**
901 * rte_announce - announce a routing table change
902 * @tab: table the route has been added to
903 * @type: type of route announcement (RA_UNDEF or RA_ANY)
904 * @net: network in question
905 * @new: the new or changed route
906 * @old: the previous route replaced by the new one
907 * @new_best: the new best route for the same network
908 * @old_best: the previous best route for the same network
909 *
910 * This function gets a routing table update and announces it to all protocols
911 * that are connected to the same table by their channels.
912 *
913 * There are two ways of how routing table changes are announced. First, there
914 * is a change of just one route in @net (which may caused a change of the best
915 * route of the network). In this case @new and @old describes the changed route
916 * and @new_best and @old_best describes best routes. Other routes are not
917 * affected, but in sorted table the order of other routes might change.
918 *
919 * Second, There is a bulk change of multiple routes in @net, with shared best
920 * route selection. In such case separate route changes are described using
921 * @type of %RA_ANY, with @new and @old specifying the changed route, while
922 * @new_best and @old_best are NULL. After that, another notification is done
923 * where @new_best and @old_best are filled (may be the same), but @new and @old
924 * are NULL.
925 *
926 * The function announces the change to all associated channels. For each
927 * channel, an appropriate preprocessing is done according to channel &ra_mode.
928 * For example, %RA_OPTIMAL channels receive just changes of best routes.
929 *
930 * In general, we first call preexport() hook of a protocol, which performs
931 * basic checks on the route (each protocol has a right to veto or force accept
932 * of the route before any filter is asked). Then we consult an export filter
933 * of the channel and verify the old route in an export map of the channel.
934 * Finally, the rt_notify() hook of the protocol gets called.
935 *
936 * Note that there are also calls of rt_notify() hooks due to feed, but that is
937 * done outside of scope of rte_announce().
938 */
939 static void
940 rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old,
941 rte *new_best, rte *old_best)
942 {
943 if (!rte_is_valid(new))
944 new = NULL;
945
946 if (!rte_is_valid(old))
947 old = NULL;
948
949 if (!rte_is_valid(new_best))
950 new_best = NULL;
951
952 if (!rte_is_valid(old_best))
953 old_best = NULL;
954
955 if (!new && !old && !new_best && !old_best)
956 return;
957
958 if (new_best != old_best)
959 {
960 if (new_best)
961 new_best->sender->stats.pref_routes++;
962 if (old_best)
963 old_best->sender->stats.pref_routes--;
964
965 if (tab->hostcache)
966 rt_notify_hostcache(tab, net);
967 }
968
969 struct channel *c; node *n;
970 WALK_LIST2(c, n, tab->channels, table_node)
971 {
972 if (c->export_state == ES_DOWN)
973 continue;
974
975 if (type && (type != c->ra_mode))
976 continue;
977
978 switch (c->ra_mode)
979 {
980 case RA_OPTIMAL:
981 if (new_best != old_best)
982 rt_notify_basic(c, net, new_best, old_best, 0);
983 break;
984
985 case RA_ANY:
986 if (new != old)
987 rt_notify_basic(c, net, new, old, 0);
988 break;
989
990 case RA_ACCEPTED:
991 rt_notify_accepted(c, net, new, old, 0);
992 break;
993
994 case RA_MERGED:
995 rt_notify_merged(c, net, new, old, new_best, old_best, 0);
996 break;
997 }
998 }
999 }
1000
1001 static inline int
1002 rte_validate(rte *e)
1003 {
1004 int c;
1005 net *n = e->net;
1006
1007 if (!net_validate(n->n.addr))
1008 {
1009 log(L_WARN "Ignoring bogus prefix %N received via %s",
1010 n->n.addr, e->sender->proto->name);
1011 return 0;
1012 }
1013
1014 /* FIXME: better handling different nettypes */
1015 c = !net_is_flow(n->n.addr) ?
1016 net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE);
1017 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1018 {
1019 log(L_WARN "Ignoring bogus route %N received via %s",
1020 n->n.addr, e->sender->proto->name);
1021 return 0;
1022 }
1023
1024 if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
1025 {
1026 log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
1027 n->n.addr, e->attrs->dest, e->sender->proto->name);
1028 return 0;
1029 }
1030
1031 if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
1032 {
1033 log(L_WARN "Ignoring unsorted multipath route %N received via %s",
1034 n->n.addr, e->sender->proto->name);
1035 return 0;
1036 }
1037
1038 return 1;
1039 }
1040
1041 /**
1042 * rte_free - delete a &rte
1043 * @e: &rte to be deleted
1044 *
1045 * rte_free() deletes the given &rte from the routing table it's linked to.
1046 */
1047 void
1048 rte_free(rte *e)
1049 {
1050 if (rta_is_cached(e->attrs))
1051 rta_free(e->attrs);
1052 sl_free(rte_slab, e);
1053 }
1054
1055 static inline void
1056 rte_free_quick(rte *e)
1057 {
1058 rta_free(e->attrs);
1059 sl_free(rte_slab, e);
1060 }
1061
1062 static int
1063 rte_same(rte *x, rte *y)
1064 {
1065 /* rte.flags are not checked, as they are mostly internal to rtable */
1066 return
1067 x->attrs == y->attrs &&
1068 x->pflags == y->pflags &&
1069 x->pref == y->pref &&
1070 (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y)) &&
1071 rte_is_filtered(x) == rte_is_filtered(y);
1072 }
1073
1074 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
1075
1076 static void
1077 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
1078 {
1079 struct proto *p = c->proto;
1080 struct rtable *table = c->table;
1081 struct proto_stats *stats = &c->stats;
1082 static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
1083 rte *before_old = NULL;
1084 rte *old_best = net->routes;
1085 rte *old = NULL;
1086 rte **k;
1087
1088 k = &net->routes; /* Find and remove original route from the same protocol */
1089 while (old = *k)
1090 {
1091 if (old->attrs->src == src)
1092 {
1093 /* If there is the same route in the routing table but from
1094 * a different sender, then there are two paths from the
1095 * source protocol to this routing table through transparent
1096 * pipes, which is not allowed.
1097 *
1098 * We log that and ignore the route. If it is withdraw, we
1099 * ignore it completely (there might be 'spurious withdraws',
1100 * see FIXME in do_rte_announce())
1101 */
1102 if (old->sender->proto != p)
1103 {
1104 if (new)
1105 {
1106 log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
1107 net->n.addr, table->name);
1108 rte_free_quick(new);
1109 }
1110 return;
1111 }
1112
1113 if (new && rte_same(old, new))
1114 {
1115 /* No changes, ignore the new route and refresh the old one */
1116
1117 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
1118
1119 if (!rte_is_filtered(new))
1120 {
1121 stats->imp_updates_ignored++;
1122 rte_trace_in(D_ROUTES, p, new, "ignored");
1123 }
1124
1125 rte_free_quick(new);
1126 return;
1127 }
1128 *k = old->next;
1129 table->rt_count--;
1130 break;
1131 }
1132 k = &old->next;
1133 before_old = old;
1134 }
1135
1136 if (!old)
1137 before_old = NULL;
1138
1139 if (!old && !new)
1140 {
1141 stats->imp_withdraws_ignored++;
1142 return;
1143 }
1144
1145 int new_ok = rte_is_ok(new);
1146 int old_ok = rte_is_ok(old);
1147
1148 struct channel_limit *l = &c->rx_limit;
1149 if (l->action && !old && new && !c->in_table)
1150 {
1151 u32 all_routes = stats->imp_routes + stats->filt_routes;
1152
1153 if (all_routes >= l->limit)
1154 channel_notify_limit(c, l, PLD_RX, all_routes);
1155
1156 if (l->state == PLS_BLOCKED)
1157 {
1158 /* In receive limit the situation is simple, old is NULL so
1159 we just free new and exit like nothing happened */
1160
1161 stats->imp_updates_ignored++;
1162 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1163 rte_free_quick(new);
1164 return;
1165 }
1166 }
1167
1168 l = &c->in_limit;
1169 if (l->action && !old_ok && new_ok)
1170 {
1171 if (stats->imp_routes >= l->limit)
1172 channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1173
1174 if (l->state == PLS_BLOCKED)
1175 {
1176 /* In import limit the situation is more complicated. We
1177 shouldn't just drop the route, we should handle it like
1178 it was filtered. We also have to continue the route
1179 processing if old or new is non-NULL, but we should exit
1180 if both are NULL as this case is probably assumed to be
1181 already handled. */
1182
1183 stats->imp_updates_ignored++;
1184 rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1185
1186 if (c->in_keep_filtered)
1187 new->flags |= REF_FILTERED;
1188 else
1189 { rte_free_quick(new); new = NULL; }
1190
1191 /* Note that old && !new could be possible when
1192 c->in_keep_filtered changed in the recent past. */
1193
1194 if (!old && !new)
1195 return;
1196
1197 new_ok = 0;
1198 goto skip_stats1;
1199 }
1200 }
1201
1202 if (new_ok)
1203 stats->imp_updates_accepted++;
1204 else if (old_ok)
1205 stats->imp_withdraws_accepted++;
1206 else
1207 stats->imp_withdraws_ignored++;
1208
1209 skip_stats1:
1210
1211 if (new)
1212 rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1213 if (old)
1214 rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1215
1216 if (table->config->sorted)
1217 {
1218 /* If routes are sorted, just insert new route to appropriate position */
1219 if (new)
1220 {
1221 if (before_old && !rte_better(new, before_old))
1222 k = &before_old->next;
1223 else
1224 k = &net->routes;
1225
1226 for (; *k; k=&(*k)->next)
1227 if (rte_better(new, *k))
1228 break;
1229
1230 new->next = *k;
1231 *k = new;
1232 table->rt_count++;
1233 }
1234 }
1235 else
1236 {
1237 /* If routes are not sorted, find the best route and move it on
1238 the first position. There are several optimized cases. */
1239
1240 if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1241 goto do_recalculate;
1242
1243 if (new && rte_better(new, old_best))
1244 {
1245 /* The first case - the new route is cleary optimal,
1246 we link it at the first position */
1247
1248 new->next = net->routes;
1249 net->routes = new;
1250 table->rt_count++;
1251 }
1252 else if (old == old_best)
1253 {
1254 /* The second case - the old best route disappeared, we add the
1255 new route (if we have any) to the list (we don't care about
1256 position) and then we elect the new optimal route and relink
1257 that route at the first position and announce it. New optimal
1258 route might be NULL if there is no more routes */
1259
1260 do_recalculate:
1261 /* Add the new route to the list */
1262 if (new)
1263 {
1264 new->next = net->routes;
1265 net->routes = new;
1266 table->rt_count++;
1267 }
1268
1269 /* Find a new optimal route (if there is any) */
1270 if (net->routes)
1271 {
1272 rte **bp = &net->routes;
1273 for (k=&(*bp)->next; *k; k=&(*k)->next)
1274 if (rte_better(*k, *bp))
1275 bp = k;
1276
1277 /* And relink it */
1278 rte *best = *bp;
1279 *bp = best->next;
1280 best->next = net->routes;
1281 net->routes = best;
1282 }
1283 }
1284 else if (new)
1285 {
1286 /* The third case - the new route is not better than the old
1287 best route (therefore old_best != NULL) and the old best
1288 route was not removed (therefore old_best == net->routes).
1289 We just link the new route after the old best route. */
1290
1291 ASSERT(net->routes != NULL);
1292 new->next = net->routes->next;
1293 net->routes->next = new;
1294 table->rt_count++;
1295 }
1296 /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1297 }
1298
1299 if (new)
1300 {
1301 new->lastmod = current_time();
1302
1303 if (!old)
1304 {
1305 new->id = hmap_first_zero(&table->id_map);
1306 hmap_set(&table->id_map, new->id);
1307 }
1308 else
1309 new->id = old->id;
1310 }
1311
1312 /* Log the route change */
1313 if (p->debug & D_ROUTES)
1314 {
1315 if (new_ok)
1316 rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1317 else if (old_ok)
1318 {
1319 if (old != old_best)
1320 rte_trace(p, old, '>', "removed");
1321 else if (rte_is_ok(net->routes))
1322 rte_trace(p, old, '>', "removed [replaced]");
1323 else
1324 rte_trace(p, old, '>', "removed [sole]");
1325 }
1326 }
1327
1328 /* Propagate the route change */
1329 rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best);
1330
1331 if (!net->routes &&
1332 (table->gc_counter++ >= table->config->gc_max_ops) &&
1333 (table->gc_time + table->config->gc_min_time <= current_time()))
1334 rt_schedule_prune(table);
1335
1336 if (old_ok && p->rte_remove)
1337 p->rte_remove(net, old);
1338 if (new_ok && p->rte_insert)
1339 p->rte_insert(net, new);
1340
1341 if (old)
1342 {
1343 if (!new)
1344 hmap_clear(&table->id_map, old->id);
1345
1346 rte_free_quick(old);
1347 }
1348 }
1349
1350 static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */
1351
1352 static inline void
1353 rte_update_lock(void)
1354 {
1355 rte_update_nest_cnt++;
1356 }
1357
1358 static inline void
1359 rte_update_unlock(void)
1360 {
1361 if (!--rte_update_nest_cnt)
1362 lp_flush(rte_update_pool);
1363 }
1364
1365 static inline void
1366 rte_hide_dummy_routes(net *net, rte **dummy)
1367 {
1368 if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1369 {
1370 *dummy = net->routes;
1371 net->routes = (*dummy)->next;
1372 }
1373 }
1374
1375 static inline void
1376 rte_unhide_dummy_routes(net *net, rte **dummy)
1377 {
1378 if (*dummy)
1379 {
1380 (*dummy)->next = net->routes;
1381 net->routes = *dummy;
1382 }
1383 }
1384
1385 /**
1386 * rte_update - enter a new update to a routing table
1387 * @table: table to be updated
1388 * @c: channel doing the update
1389 * @net: network node
1390 * @p: protocol submitting the update
1391 * @src: protocol originating the update
1392 * @new: a &rte representing the new route or %NULL for route removal.
1393 *
1394 * This function is called by the routing protocols whenever they discover
1395 * a new route or wish to update/remove an existing route. The right announcement
1396 * sequence is to build route attributes first (either un-cached with @aflags set
1397 * to zero or a cached one using rta_lookup(); in this case please note that
1398 * you need to increase the use count of the attributes yourself by calling
1399 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1400 * the appropriate data and finally submit the new &rte by calling rte_update().
1401 *
1402 * @src specifies the protocol that originally created the route and the meaning
1403 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1404 * same value as @new->attrs->proto. @p specifies the protocol that called
1405 * rte_update(). In most cases it is the same protocol as @src. rte_update()
1406 * stores @p in @new->sender;
1407 *
1408 * When rte_update() gets any route, it automatically validates it (checks,
1409 * whether the network and next hop address are valid IP addresses and also
1410 * whether a normal routing protocol doesn't try to smuggle a host or link
1411 * scope route to the table), converts all protocol dependent attributes stored
1412 * in the &rte to temporary extended attributes, consults import filters of the
1413 * protocol to see if the route should be accepted and/or its attributes modified,
1414 * stores the temporary attributes back to the &rte.
1415 *
1416 * Now, having a "public" version of the route, we
1417 * automatically find any old route defined by the protocol @src
1418 * for network @n, replace it by the new one (or removing it if @new is %NULL),
1419 * recalculate the optimal route for this destination and finally broadcast
1420 * the change (if any) to all routing protocols by calling rte_announce().
1421 *
1422 * All memory used for attribute lists and other temporary allocations is taken
1423 * from a special linear pool @rte_update_pool and freed when rte_update()
1424 * finishes.
1425 */
1426
1427 void
1428 rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1429 {
1430 struct proto *p = c->proto;
1431 struct proto_stats *stats = &c->stats;
1432 const struct filter *filter = c->in_filter;
1433 rte *dummy = NULL;
1434 net *nn;
1435
1436 ASSERT(c->channel_state == CS_UP);
1437
1438 rte_update_lock();
1439 if (new)
1440 {
1441 /* Create a temporary table node */
1442 nn = alloca(sizeof(net) + n->length);
1443 memset(nn, 0, sizeof(net) + n->length);
1444 net_copy(nn->n.addr, n);
1445
1446 new->net = nn;
1447 new->sender = c;
1448
1449 if (!new->pref)
1450 new->pref = c->preference;
1451
1452 stats->imp_updates_received++;
1453 if (!rte_validate(new))
1454 {
1455 rte_trace_in(D_FILTERS, p, new, "invalid");
1456 stats->imp_updates_invalid++;
1457 goto drop;
1458 }
1459
1460 if (filter == FILTER_REJECT)
1461 {
1462 stats->imp_updates_filtered++;
1463 rte_trace_in(D_FILTERS, p, new, "filtered out");
1464
1465 if (! c->in_keep_filtered)
1466 goto drop;
1467
1468 /* new is a private copy, i could modify it */
1469 new->flags |= REF_FILTERED;
1470 }
1471 else if (filter)
1472 {
1473 rta *old_attrs = NULL;
1474 rte_make_tmp_attrs(&new, rte_update_pool, &old_attrs);
1475
1476 int fr = f_run(filter, &new, rte_update_pool, 0);
1477 if (fr > F_ACCEPT)
1478 {
1479 stats->imp_updates_filtered++;
1480 rte_trace_in(D_FILTERS, p, new, "filtered out");
1481
1482 if (! c->in_keep_filtered)
1483 {
1484 rta_free(old_attrs);
1485 goto drop;
1486 }
1487
1488 new->flags |= REF_FILTERED;
1489 }
1490
1491 rte_store_tmp_attrs(new, rte_update_pool, old_attrs);
1492 }
1493 if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1494 new->attrs = rta_lookup(new->attrs);
1495 new->flags |= REF_COW;
1496
1497 /* Use the actual struct network, not the dummy one */
1498 nn = net_get(c->table, n);
1499 new->net = nn;
1500 }
1501 else
1502 {
1503 stats->imp_withdraws_received++;
1504
1505 if (!(nn = net_find(c->table, n)) || !src)
1506 {
1507 stats->imp_withdraws_ignored++;
1508 rte_update_unlock();
1509 return;
1510 }
1511 }
1512
1513 recalc:
1514 /* And recalculate the best route */
1515 rte_hide_dummy_routes(nn, &dummy);
1516 rte_recalculate(c, nn, new, src);
1517 rte_unhide_dummy_routes(nn, &dummy);
1518
1519 rte_update_unlock();
1520 return;
1521
1522 drop:
1523 rte_free(new);
1524 new = NULL;
1525 if (nn = net_find(c->table, n))
1526 goto recalc;
1527
1528 rte_update_unlock();
1529 }
1530
1531 /* Independent call to rte_announce(), used from next hop
1532 recalculation, outside of rte_update(). new must be non-NULL */
1533 static inline void
1534 rte_announce_i(rtable *tab, uint type, net *net, rte *new, rte *old,
1535 rte *new_best, rte *old_best)
1536 {
1537 rte_update_lock();
1538 rte_announce(tab, type, net, new, old, new_best, old_best);
1539 rte_update_unlock();
1540 }
1541
1542 static inline void
1543 rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */
1544 {
1545 rte_update_lock();
1546 rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1547 rte_update_unlock();
1548 }
1549
1550 /* Modify existing route by protocol hook, used for long-lived graceful restart */
1551 static inline void
1552 rte_modify(rte *old)
1553 {
1554 rte_update_lock();
1555
1556 rte *new = old->sender->proto->rte_modify(old, rte_update_pool);
1557 if (new != old)
1558 {
1559 if (new)
1560 {
1561 if (!rta_is_cached(new->attrs))
1562 new->attrs = rta_lookup(new->attrs);
1563 new->flags = (old->flags & ~REF_MODIFY) | REF_COW;
1564 }
1565
1566 rte_recalculate(old->sender, old->net, new, old->attrs->src);
1567 }
1568
1569 rte_update_unlock();
1570 }
1571
1572 /* Check rtable for best route to given net whether it would be exported do p */
1573 int
1574 rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter)
1575 {
1576 net *n = net_find(t, a);
1577 rte *rt = n ? n->routes : NULL;
1578
1579 if (!rte_is_valid(rt))
1580 return 0;
1581
1582 rte_update_lock();
1583
1584 /* Rest is stripped down export_filter() */
1585 int v = p->preexport ? p->preexport(p, &rt, rte_update_pool) : 0;
1586 if (v == RIC_PROCESS)
1587 {
1588 rte_make_tmp_attrs(&rt, rte_update_pool, NULL);
1589 v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT);
1590 }
1591
1592 /* Discard temporary rte */
1593 if (rt != n->routes)
1594 rte_free(rt);
1595
1596 rte_update_unlock();
1597
1598 return v > 0;
1599 }
1600
1601
1602 /**
1603 * rt_refresh_begin - start a refresh cycle
1604 * @t: related routing table
1605 * @c related channel
1606 *
1607 * This function starts a refresh cycle for given routing table and announce
1608 * hook. The refresh cycle is a sequence where the protocol sends all its valid
1609 * routes to the routing table (by rte_update()). After that, all protocol
1610 * routes (more precisely routes with @c as @sender) not sent during the
1611 * refresh cycle but still in the table from the past are pruned. This is
1612 * implemented by marking all related routes as stale by REF_STALE flag in
1613 * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1614 * flag in rt_refresh_end() and then removing such routes in the prune loop.
1615 */
1616 void
1617 rt_refresh_begin(rtable *t, struct channel *c)
1618 {
1619 FIB_WALK(&t->fib, net, n)
1620 {
1621 rte *e;
1622 for (e = n->routes; e; e = e->next)
1623 if (e->sender == c)
1624 e->flags |= REF_STALE;
1625 }
1626 FIB_WALK_END;
1627 }
1628
1629 /**
1630 * rt_refresh_end - end a refresh cycle
1631 * @t: related routing table
1632 * @c: related channel
1633 *
1634 * This function ends a refresh cycle for given routing table and announce
1635 * hook. See rt_refresh_begin() for description of refresh cycles.
1636 */
1637 void
1638 rt_refresh_end(rtable *t, struct channel *c)
1639 {
1640 int prune = 0;
1641
1642 FIB_WALK(&t->fib, net, n)
1643 {
1644 rte *e;
1645 for (e = n->routes; e; e = e->next)
1646 if ((e->sender == c) && (e->flags & REF_STALE))
1647 {
1648 e->flags |= REF_DISCARD;
1649 prune = 1;
1650 }
1651 }
1652 FIB_WALK_END;
1653
1654 if (prune)
1655 rt_schedule_prune(t);
1656 }
1657
1658 void
1659 rt_modify_stale(rtable *t, struct channel *c)
1660 {
1661 int prune = 0;
1662
1663 FIB_WALK(&t->fib, net, n)
1664 {
1665 rte *e;
1666 for (e = n->routes; e; e = e->next)
1667 if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED))
1668 {
1669 e->flags |= REF_MODIFY;
1670 prune = 1;
1671 }
1672 }
1673 FIB_WALK_END;
1674
1675 if (prune)
1676 rt_schedule_prune(t);
1677 }
1678
1679 /**
1680 * rte_dump - dump a route
1681 * @e: &rte to be dumped
1682 *
1683 * This functions dumps contents of a &rte to debug output.
1684 */
1685 void
1686 rte_dump(rte *e)
1687 {
1688 net *n = e->net;
1689 debug("%-1N ", n->n.addr);
1690 debug("PF=%02x pref=%d ", e->pflags, e->pref);
1691 rta_dump(e->attrs);
1692 if (e->attrs->src->proto->proto->dump_attrs)
1693 e->attrs->src->proto->proto->dump_attrs(e);
1694 debug("\n");
1695 }
1696
1697 /**
1698 * rt_dump - dump a routing table
1699 * @t: routing table to be dumped
1700 *
1701 * This function dumps contents of a given routing table to debug output.
1702 */
1703 void
1704 rt_dump(rtable *t)
1705 {
1706 debug("Dump of routing table <%s>\n", t->name);
1707 #ifdef DEBUGGING
1708 fib_check(&t->fib);
1709 #endif
1710 FIB_WALK(&t->fib, net, n)
1711 {
1712 rte *e;
1713 for(e=n->routes; e; e=e->next)
1714 rte_dump(e);
1715 }
1716 FIB_WALK_END;
1717 debug("\n");
1718 }
1719
1720 /**
1721 * rt_dump_all - dump all routing tables
1722 *
1723 * This function dumps contents of all routing tables to debug output.
1724 */
1725 void
1726 rt_dump_all(void)
1727 {
1728 rtable *t;
1729
1730 WALK_LIST(t, routing_tables)
1731 rt_dump(t);
1732 }
1733
1734 static inline void
1735 rt_schedule_hcu(rtable *tab)
1736 {
1737 if (tab->hcu_scheduled)
1738 return;
1739
1740 tab->hcu_scheduled = 1;
1741 ev_schedule(tab->rt_event);
1742 }
1743
1744 static inline void
1745 rt_schedule_nhu(rtable *tab)
1746 {
1747 if (tab->nhu_state == NHU_CLEAN)
1748 ev_schedule(tab->rt_event);
1749
1750 /* state change:
1751 * NHU_CLEAN -> NHU_SCHEDULED
1752 * NHU_RUNNING -> NHU_DIRTY
1753 */
1754 tab->nhu_state |= NHU_SCHEDULED;
1755 }
1756
1757 void
1758 rt_schedule_prune(rtable *tab)
1759 {
1760 if (tab->prune_state == 0)
1761 ev_schedule(tab->rt_event);
1762
1763 /* state change 0->1, 2->3 */
1764 tab->prune_state |= 1;
1765 }
1766
1767
1768 static void
1769 rt_event(void *ptr)
1770 {
1771 rtable *tab = ptr;
1772
1773 rt_lock_table(tab);
1774
1775 if (tab->hcu_scheduled)
1776 rt_update_hostcache(tab);
1777
1778 if (tab->nhu_state)
1779 rt_next_hop_update(tab);
1780
1781 if (tab->prune_state)
1782 rt_prune_table(tab);
1783
1784 rt_unlock_table(tab);
1785 }
1786
1787 void
1788 rt_setup(pool *p, rtable *t, struct rtable_config *cf)
1789 {
1790 bzero(t, sizeof(*t));
1791 t->name = cf->name;
1792 t->config = cf;
1793 t->addr_type = cf->addr_type;
1794 fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1795 init_list(&t->channels);
1796
1797 hmap_init(&t->id_map, p, 1024);
1798 hmap_set(&t->id_map, 0);
1799
1800 t->rt_event = ev_new_init(p, rt_event, t);
1801 t->gc_time = current_time();
1802 }
1803
1804 /**
1805 * rt_init - initialize routing tables
1806 *
1807 * This function is called during BIRD startup. It initializes the
1808 * routing table module.
1809 */
1810 void
1811 rt_init(void)
1812 {
1813 rta_init();
1814 rt_table_pool = rp_new(&root_pool, "Routing tables");
1815 rte_update_pool = lp_new_default(rt_table_pool);
1816 rte_slab = sl_new(rt_table_pool, sizeof(rte));
1817 init_list(&routing_tables);
1818 }
1819
1820
1821 /**
1822 * rt_prune_table - prune a routing table
1823 *
1824 * The prune loop scans routing tables and removes routes belonging to flushing
1825 * protocols, discarded routes and also stale network entries. It is called from
1826 * rt_event(). The event is rescheduled if the current iteration do not finish
1827 * the table. The pruning is directed by the prune state (@prune_state),
1828 * specifying whether the prune cycle is scheduled or running, and there
1829 * is also a persistent pruning iterator (@prune_fit).
1830 *
1831 * The prune loop is used also for channel flushing. For this purpose, the
1832 * channels to flush are marked before the iteration and notified after the
1833 * iteration.
1834 */
1835 static void
1836 rt_prune_table(rtable *tab)
1837 {
1838 struct fib_iterator *fit = &tab->prune_fit;
1839 int limit = 512;
1840
1841 struct channel *c;
1842 node *n, *x;
1843
1844 DBG("Pruning route table %s\n", tab->name);
1845 #ifdef DEBUGGING
1846 fib_check(&tab->fib);
1847 #endif
1848
1849 if (tab->prune_state == 0)
1850 return;
1851
1852 if (tab->prune_state == 1)
1853 {
1854 /* Mark channels to flush */
1855 WALK_LIST2(c, n, tab->channels, table_node)
1856 if (c->channel_state == CS_FLUSHING)
1857 c->flush_active = 1;
1858
1859 FIB_ITERATE_INIT(fit, &tab->fib);
1860 tab->prune_state = 2;
1861 }
1862
1863 again:
1864 FIB_ITERATE_START(&tab->fib, fit, net, n)
1865 {
1866 rte *e;
1867
1868 rescan:
1869 for (e=n->routes; e; e=e->next)
1870 {
1871 if (e->sender->flush_active || (e->flags & REF_DISCARD))
1872 {
1873 if (limit <= 0)
1874 {
1875 FIB_ITERATE_PUT(fit);
1876 ev_schedule(tab->rt_event);
1877 return;
1878 }
1879
1880 rte_discard(e);
1881 limit--;
1882
1883 goto rescan;
1884 }
1885
1886 if (e->flags & REF_MODIFY)
1887 {
1888 if (limit <= 0)
1889 {
1890 FIB_ITERATE_PUT(fit);
1891 ev_schedule(tab->rt_event);
1892 return;
1893 }
1894
1895 rte_modify(e);
1896 limit--;
1897
1898 goto rescan;
1899 }
1900 }
1901
1902 if (!n->routes) /* Orphaned FIB entry */
1903 {
1904 FIB_ITERATE_PUT(fit);
1905 fib_delete(&tab->fib, n);
1906 goto again;
1907 }
1908 }
1909 FIB_ITERATE_END;
1910
1911 #ifdef DEBUGGING
1912 fib_check(&tab->fib);
1913 #endif
1914
1915 tab->gc_counter = 0;
1916 tab->gc_time = current_time();
1917
1918 /* state change 2->0, 3->1 */
1919 tab->prune_state &= 1;
1920
1921 if (tab->prune_state > 0)
1922 ev_schedule(tab->rt_event);
1923
1924 /* FIXME: This should be handled in a better way */
1925 rt_prune_sources();
1926
1927 /* Close flushed channels */
1928 WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1929 if (c->flush_active)
1930 {
1931 c->flush_active = 0;
1932 channel_set_state(c, CS_DOWN);
1933 }
1934
1935 return;
1936 }
1937
1938 void
1939 rt_preconfig(struct config *c)
1940 {
1941 init_list(&c->tables);
1942
1943 rt_new_table(cf_get_symbol("master4"), NET_IP4);
1944 rt_new_table(cf_get_symbol("master6"), NET_IP6);
1945 }
1946
1947
1948 /*
1949 * Some functions for handing internal next hop updates
1950 * triggered by rt_schedule_nhu().
1951 */
1952
1953 static inline int
1954 rta_next_hop_outdated(rta *a)
1955 {
1956 struct hostentry *he = a->hostentry;
1957
1958 if (!he)
1959 return 0;
1960
1961 if (!he->src)
1962 return a->dest != RTD_UNREACHABLE;
1963
1964 return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1965 (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
1966 }
1967
1968 void
1969 rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
1970 {
1971 a->hostentry = he;
1972 a->dest = he->dest;
1973 a->igp_metric = he->igp_metric;
1974
1975 if (a->dest != RTD_UNICAST)
1976 {
1977 /* No nexthop */
1978 no_nexthop:
1979 a->nh = (struct nexthop) {};
1980 if (mls)
1981 { /* Store the label stack for later changes */
1982 a->nh.labels_orig = a->nh.labels = mls->len;
1983 memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
1984 }
1985 return;
1986 }
1987
1988 if (((!mls) || (!mls->len)) && he->nexthop_linkable)
1989 { /* Just link the nexthop chain, no label append happens. */
1990 memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
1991 return;
1992 }
1993
1994 struct nexthop *nhp = NULL, *nhr = NULL;
1995 int skip_nexthop = 0;
1996
1997 for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
1998 {
1999 if (skip_nexthop)
2000 skip_nexthop--;
2001 else
2002 {
2003 nhr = nhp;
2004 nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
2005 }
2006
2007 memset(nhp, 0, NEXTHOP_MAX_SIZE);
2008 nhp->iface = nh->iface;
2009 nhp->weight = nh->weight;
2010
2011 if (mls)
2012 {
2013 nhp->labels = nh->labels + mls->len;
2014 nhp->labels_orig = mls->len;
2015 if (nhp->labels <= MPLS_MAX_LABEL_STACK)
2016 {
2017 memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
2018 memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
2019 }
2020 else
2021 {
2022 log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
2023 nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
2024 skip_nexthop++;
2025 continue;
2026 }
2027 }
2028 else if (nh->labels)
2029 {
2030 nhp->labels = nh->labels;
2031 nhp->labels_orig = 0;
2032 memcpy(nhp->label, nh->label, nh->labels * sizeof(u32));
2033 }
2034
2035 if (ipa_nonzero(nh->gw))
2036 {
2037 nhp->gw = nh->gw; /* Router nexthop */
2038 nhp->flags |= (nh->flags & RNF_ONLINK);
2039 }
2040 else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK))
2041 nhp->gw = IPA_NONE; /* PtP link - no need for nexthop */
2042 else if (ipa_nonzero(he->link))
2043 nhp->gw = he->link; /* Device nexthop with link-local address known */
2044 else
2045 nhp->gw = he->addr; /* Device nexthop with link-local address unknown */
2046 }
2047
2048 if (skip_nexthop)
2049 if (nhr)
2050 nhr->next = NULL;
2051 else
2052 {
2053 a->dest = RTD_UNREACHABLE;
2054 log(L_WARN "No valid nexthop remaining, setting route unreachable");
2055 goto no_nexthop;
2056 }
2057 }
2058
2059 static inline rte *
2060 rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
2061 {
2062 rta *a = alloca(RTA_MAX_SIZE);
2063 memcpy(a, old->attrs, rta_size(old->attrs));
2064
2065 mpls_label_stack mls = { .len = a->nh.labels_orig };
2066 memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
2067
2068 rta_apply_hostentry(a, old->attrs->hostentry, &mls);
2069 a->aflags = 0;
2070
2071 rte *e = sl_alloc(rte_slab);
2072 memcpy(e, old, sizeof(rte));
2073 e->attrs = rta_lookup(a);
2074
2075 return e;
2076 }
2077
2078 static inline int
2079 rt_next_hop_update_net(rtable *tab, net *n)
2080 {
2081 rte **k, *e, *new, *old_best, **new_best;
2082 int count = 0;
2083 int free_old_best = 0;
2084
2085 old_best = n->routes;
2086 if (!old_best)
2087 return 0;
2088
2089 for (k = &n->routes; e = *k; k = &e->next)
2090 if (rta_next_hop_outdated(e->attrs))
2091 {
2092 new = rt_next_hop_update_rte(tab, e);
2093 *k = new;
2094
2095 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
2096 rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
2097
2098 /* Call a pre-comparison hook */
2099 /* Not really an efficient way to compute this */
2100 if (e->attrs->src->proto->rte_recalculate)
2101 e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
2102
2103 if (e != old_best)
2104 rte_free_quick(e);
2105 else /* Freeing of the old best rte is postponed */
2106 free_old_best = 1;
2107
2108 e = new;
2109 count++;
2110 }
2111
2112 if (!count)
2113 return 0;
2114
2115 /* Find the new best route */
2116 new_best = NULL;
2117 for (k = &n->routes; e = *k; k = &e->next)
2118 {
2119 if (!new_best || rte_better(e, *new_best))
2120 new_best = k;
2121 }
2122
2123 /* Relink the new best route to the first position */
2124 new = *new_best;
2125 if (new != n->routes)
2126 {
2127 *new_best = new->next;
2128 new->next = n->routes;
2129 n->routes = new;
2130 }
2131
2132 /* Announce the new best route */
2133 if (new != old_best)
2134 rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
2135
2136 /* Propagate changes */
2137 rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best);
2138
2139 if (free_old_best)
2140 rte_free_quick(old_best);
2141
2142 return count;
2143 }
2144
2145 static void
2146 rt_next_hop_update(rtable *tab)
2147 {
2148 struct fib_iterator *fit = &tab->nhu_fit;
2149 int max_feed = 32;
2150
2151 if (tab->nhu_state == NHU_CLEAN)
2152 return;
2153
2154 if (tab->nhu_state == NHU_SCHEDULED)
2155 {
2156 FIB_ITERATE_INIT(fit, &tab->fib);
2157 tab->nhu_state = NHU_RUNNING;
2158 }
2159
2160 FIB_ITERATE_START(&tab->fib, fit, net, n)
2161 {
2162 if (max_feed <= 0)
2163 {
2164 FIB_ITERATE_PUT(fit);
2165 ev_schedule(tab->rt_event);
2166 return;
2167 }
2168 max_feed -= rt_next_hop_update_net(tab, n);
2169 }
2170 FIB_ITERATE_END;
2171
2172 /* State change:
2173 * NHU_DIRTY -> NHU_SCHEDULED
2174 * NHU_RUNNING -> NHU_CLEAN
2175 */
2176 tab->nhu_state &= 1;
2177
2178 if (tab->nhu_state != NHU_CLEAN)
2179 ev_schedule(tab->rt_event);
2180 }
2181
2182
2183 struct rtable_config *
2184 rt_new_table(struct symbol *s, uint addr_type)
2185 {
2186 /* Hack that allows to 'redefine' the master table */
2187 if ((s->class == SYM_TABLE) &&
2188 (s->table == new_config->def_tables[addr_type]) &&
2189 ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
2190 return s->table;
2191
2192 struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
2193
2194 cf_define_symbol(s, SYM_TABLE, table, c);
2195 c->name = s->name;
2196 c->addr_type = addr_type;
2197 c->gc_max_ops = 1000;
2198 c->gc_min_time = 5;
2199
2200 add_tail(&new_config->tables, &c->n);
2201
2202 /* First table of each type is kept as default */
2203 if (! new_config->def_tables[addr_type])
2204 new_config->def_tables[addr_type] = c;
2205
2206 return c;
2207 }
2208
2209 /**
2210 * rt_lock_table - lock a routing table
2211 * @r: routing table to be locked
2212 *
2213 * Lock a routing table, because it's in use by a protocol,
2214 * preventing it from being freed when it gets undefined in a new
2215 * configuration.
2216 */
2217 void
2218 rt_lock_table(rtable *r)
2219 {
2220 r->use_count++;
2221 }
2222
2223 /**
2224 * rt_unlock_table - unlock a routing table
2225 * @r: routing table to be unlocked
2226 *
2227 * Unlock a routing table formerly locked by rt_lock_table(),
2228 * that is decrease its use count and delete it if it's scheduled
2229 * for deletion by configuration changes.
2230 */
2231 void
2232 rt_unlock_table(rtable *r)
2233 {
2234 if (!--r->use_count && r->deleted)
2235 {
2236 struct config *conf = r->deleted;
2237 DBG("Deleting routing table %s\n", r->name);
2238 r->config->table = NULL;
2239 if (r->hostcache)
2240 rt_free_hostcache(r);
2241 rem_node(&r->n);
2242 fib_free(&r->fib);
2243 hmap_free(&r->id_map);
2244 rfree(r->rt_event);
2245 mb_free(r);
2246 config_del_obstacle(conf);
2247 }
2248 }
2249
2250 static struct rtable_config *
2251 rt_find_table_config(struct config *cf, char *name)
2252 {
2253 struct symbol *sym = cf_find_symbol(cf, name);
2254 return (sym && (sym->class == SYM_TABLE)) ? sym->table : NULL;
2255 }
2256
2257 /**
2258 * rt_commit - commit new routing table configuration
2259 * @new: new configuration
2260 * @old: original configuration or %NULL if it's boot time config
2261 *
2262 * Scan differences between @old and @new configuration and modify
2263 * the routing tables according to these changes. If @new defines a
2264 * previously unknown table, create it, if it omits a table existing
2265 * in @old, schedule it for deletion (it gets deleted when all protocols
2266 * disconnect from it by calling rt_unlock_table()), if it exists
2267 * in both configurations, leave it unchanged.
2268 */
2269 void
2270 rt_commit(struct config *new, struct config *old)
2271 {
2272 struct rtable_config *o, *r;
2273
2274 DBG("rt_commit:\n");
2275 if (old)
2276 {
2277 WALK_LIST(o, old->tables)
2278 {
2279 rtable *ot = o->table;
2280 if (!ot->deleted)
2281 {
2282 r = rt_find_table_config(new, o->name);
2283 if (r && (r->addr_type == o->addr_type) && !new->shutdown)
2284 {
2285 DBG("\t%s: same\n", o->name);
2286 r->table = ot;
2287 ot->name = r->name;
2288 ot->config = r;
2289 if (o->sorted != r->sorted)
2290 log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2291 }
2292 else
2293 {
2294 DBG("\t%s: deleted\n", o->name);
2295 ot->deleted = old;
2296 config_add_obstacle(old);
2297 rt_lock_table(ot);
2298 rt_unlock_table(ot);
2299 }
2300 }
2301 }
2302 }
2303
2304 WALK_LIST(r, new->tables)
2305 if (!r->table)
2306 {
2307 rtable *t = mb_allocz(rt_table_pool, sizeof(struct rtable));
2308 DBG("\t%s: created\n", r->name);
2309 rt_setup(rt_table_pool, t, r);
2310 add_tail(&routing_tables, &t->n);
2311 r->table = t;
2312 }
2313 DBG("\tdone\n");
2314 }
2315
2316 static inline void
2317 do_feed_channel(struct channel *c, net *n, rte *e)
2318 {
2319 rte_update_lock();
2320 if (c->ra_mode == RA_ACCEPTED)
2321 rt_notify_accepted(c, n, NULL, NULL, c->refeeding);
2322 else if (c->ra_mode == RA_MERGED)
2323 rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding);
2324 else /* RA_BASIC */
2325 rt_notify_basic(c, n, e, e, c->refeeding);
2326 rte_update_unlock();
2327 }
2328
2329 /**
2330 * rt_feed_channel - advertise all routes to a channel
2331 * @c: channel to be fed
2332 *
2333 * This function performs one pass of advertisement of routes to a channel that
2334 * is in the ES_FEEDING state. It is called by the protocol code as long as it
2335 * has something to do. (We avoid transferring all the routes in single pass in
2336 * order not to monopolize CPU time.)
2337 */
2338 int
2339 rt_feed_channel(struct channel *c)
2340 {
2341 struct fib_iterator *fit = &c->feed_fit;
2342 int max_feed = 256;
2343
2344 ASSERT(c->export_state == ES_FEEDING);
2345
2346 if (!c->feed_active)
2347 {
2348 FIB_ITERATE_INIT(fit, &c->table->fib);
2349 c->feed_active = 1;
2350 }
2351
2352 FIB_ITERATE_START(&c->table->fib, fit, net, n)
2353 {
2354 rte *e = n->routes;
2355 if (max_feed <= 0)
2356 {
2357 FIB_ITERATE_PUT(fit);
2358 return 0;
2359 }
2360
2361 if ((c->ra_mode == RA_OPTIMAL) ||
2362 (c->ra_mode == RA_ACCEPTED) ||
2363 (c->ra_mode == RA_MERGED))
2364 if (rte_is_valid(e))
2365 {
2366 /* In the meantime, the protocol may fell down */
2367 if (c->export_state != ES_FEEDING)
2368 goto done;
2369
2370 do_feed_channel(c, n, e);
2371 max_feed--;
2372 }
2373
2374 if (c->ra_mode == RA_ANY)
2375 for(e = n->routes; e; e = e->next)
2376 {
2377 /* In the meantime, the protocol may fell down */
2378 if (c->export_state != ES_FEEDING)
2379 goto done;
2380
2381 if (!rte_is_valid(e))
2382 continue;
2383
2384 do_feed_channel(c, n, e);
2385 max_feed--;
2386 }
2387 }
2388 FIB_ITERATE_END;
2389
2390 done:
2391 c->feed_active = 0;
2392 return 1;
2393 }
2394
2395 /**
2396 * rt_feed_baby_abort - abort protocol feeding
2397 * @c: channel
2398 *
2399 * This function is called by the protocol code when the protocol stops or
2400 * ceases to exist during the feeding.
2401 */
2402 void
2403 rt_feed_channel_abort(struct channel *c)
2404 {
2405 if (c->feed_active)
2406 {
2407 /* Unlink the iterator */
2408 fit_get(&c->table->fib, &c->feed_fit);
2409 c->feed_active = 0;
2410 }
2411 }
2412
2413
2414 /*
2415 * Import table
2416 */
2417
2418 int
2419 rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
2420 {
2421 struct rtable *tab = c->in_table;
2422 rte *old, **pos;
2423 net *net;
2424
2425 if (new)
2426 {
2427 net = net_get(tab, n);
2428
2429 if (!new->pref)
2430 new->pref = c->preference;
2431
2432 if (!rta_is_cached(new->attrs))
2433 new->attrs = rta_lookup(new->attrs);
2434 }
2435 else
2436 {
2437 net = net_find(tab, n);
2438
2439 if (!net)
2440 goto drop_withdraw;
2441 }
2442
2443 /* Find the old rte */
2444 for (pos = &net->routes; old = *pos; pos = &old->next)
2445 if (old->attrs->src == src)
2446 {
2447 if (new && rte_same(old, new))
2448 {
2449 /* Refresh the old rte, continue with update to main rtable */
2450 if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
2451 {
2452 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
2453 return 1;
2454 }
2455
2456 goto drop_update;
2457 }
2458
2459 /* Move iterator if needed */
2460 if (old == c->reload_next_rte)
2461 c->reload_next_rte = old->next;
2462
2463 /* Remove the old rte */
2464 *pos = old->next;
2465 rte_free_quick(old);
2466 tab->rt_count--;
2467
2468 break;
2469 }
2470
2471 if (!new)
2472 {
2473 if (!old)
2474 goto drop_withdraw;
2475
2476 return 1;
2477 }
2478
2479 struct channel_limit *l = &c->rx_limit;
2480 if (l->action && !old)
2481 {
2482 if (tab->rt_count >= l->limit)
2483 channel_notify_limit(c, l, PLD_RX, tab->rt_count);
2484
2485 if (l->state == PLS_BLOCKED)
2486 {
2487 rte_trace_in(D_FILTERS, c->proto, new, "ignored [limit]");
2488 goto drop_update;
2489 }
2490 }
2491
2492 /* Insert the new rte */
2493 rte *e = rte_do_cow(new);
2494 e->flags |= REF_COW;
2495 e->net = net;
2496 e->sender = c;
2497 e->lastmod = current_time();
2498 e->next = *pos;
2499 *pos = e;
2500 tab->rt_count++;
2501 return 1;
2502
2503 drop_update:
2504 c->stats.imp_updates_received++;
2505 c->stats.imp_updates_ignored++;
2506 rte_free(new);
2507 return 0;
2508
2509 drop_withdraw:
2510 c->stats.imp_withdraws_received++;
2511 c->stats.imp_withdraws_ignored++;
2512 return 0;
2513 }
2514
2515 int
2516 rt_reload_channel(struct channel *c)
2517 {
2518 struct rtable *tab = c->in_table;
2519 struct fib_iterator *fit = &c->reload_fit;
2520 int max_feed = 64;
2521
2522 ASSERT(c->channel_state == CS_UP);
2523
2524 if (!c->reload_active)
2525 {
2526 FIB_ITERATE_INIT(fit, &tab->fib);
2527 c->reload_active = 1;
2528 }
2529
2530 do {
2531 for (rte *e = c->reload_next_rte; e; e = e->next)
2532 {
2533 if (max_feed-- <= 0)
2534 {
2535 c->reload_next_rte = e;
2536 debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed);
2537 return 0;
2538 }
2539
2540 rte_update2(c, e->net->n.addr, rte_do_cow(e), e->attrs->src);
2541 }
2542
2543 c->reload_next_rte = NULL;
2544
2545 FIB_ITERATE_START(&tab->fib, fit, net, n)
2546 {
2547 if (c->reload_next_rte = n->routes)
2548 {
2549 FIB_ITERATE_PUT_NEXT(fit, &tab->fib);
2550 break;
2551 }
2552 }
2553 FIB_ITERATE_END;
2554 }
2555 while (c->reload_next_rte);
2556
2557 c->reload_active = 0;
2558 return 1;
2559 }
2560
2561 void
2562 rt_reload_channel_abort(struct channel *c)
2563 {
2564 if (c->reload_active)
2565 {
2566 /* Unlink the iterator */
2567 fit_get(&c->in_table->fib, &c->reload_fit);
2568 c->reload_next_rte = NULL;
2569 c->reload_active = 0;
2570 }
2571 }
2572
2573 void
2574 rt_prune_sync(rtable *t, int all)
2575 {
2576 FIB_WALK(&t->fib, net, n)
2577 {
2578 rte *e, **ee = &n->routes;
2579 while (e = *ee)
2580 {
2581 if (all || (e->flags & (REF_STALE | REF_DISCARD)))
2582 {
2583 *ee = e->next;
2584 rte_free_quick(e);
2585 t->rt_count--;
2586 }
2587 else
2588 ee = &e->next;
2589 }
2590 }
2591 FIB_WALK_END;
2592 }
2593
2594
2595 /*
2596 * Export table
2597 */
2598
2599 int
2600 rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed)
2601 {
2602 struct rtable *tab = c->out_table;
2603 struct rte_src *src;
2604 rte *old, **pos;
2605 net *net;
2606
2607 if (new)
2608 {
2609 net = net_get(tab, n);
2610 src = new->attrs->src;
2611
2612 rte_store_tmp_attrs(new, rte_update_pool, NULL);
2613
2614 if (!rta_is_cached(new->attrs))
2615 new->attrs = rta_lookup(new->attrs);
2616 }
2617 else
2618 {
2619 net = net_find(tab, n);
2620 src = old0->attrs->src;
2621
2622 if (!net)
2623 goto drop_withdraw;
2624 }
2625
2626 /* Find the old rte */
2627 for (pos = &net->routes; old = *pos; pos = &old->next)
2628 if ((c->ra_mode != RA_ANY) || (old->attrs->src == src))
2629 {
2630 if (new && rte_same(old, new))
2631 {
2632 /* REF_STALE / REF_DISCARD not used in export table */
2633 /*
2634 if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
2635 {
2636 old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
2637 return 1;
2638 }
2639 */
2640
2641 goto drop_update;
2642 }
2643
2644 /* Remove the old rte */
2645 *pos = old->next;
2646 rte_free_quick(old);
2647 tab->rt_count--;
2648
2649 break;
2650 }
2651
2652 if (!new)
2653 {
2654 if (!old)
2655 goto drop_withdraw;
2656
2657 return 1;
2658 }
2659
2660 /* Insert the new rte */
2661 rte *e = rte_do_cow(new);
2662 e->flags |= REF_COW;
2663 e->net = net;
2664 e->sender = c;
2665 e->lastmod = current_time();
2666 e->next = *pos;
2667 *pos = e;
2668 tab->rt_count++;
2669 return 1;
2670
2671 drop_update:
2672 return refeed;
2673
2674 drop_withdraw:
2675 return 0;
2676 }
2677
2678
2679 /*
2680 * Hostcache
2681 */
2682
2683 static inline u32
2684 hc_hash(ip_addr a, rtable *dep)
2685 {
2686 return ipa_hash(a) ^ ptr_hash(dep);
2687 }
2688
2689 static inline void
2690 hc_insert(struct hostcache *hc, struct hostentry *he)
2691 {
2692 uint k = he->hash_key >> hc->hash_shift;
2693 he->next = hc->hash_table[k];
2694 hc->hash_table[k] = he;
2695 }
2696
2697 static inline void
2698 hc_remove(struct hostcache *hc, struct hostentry *he)
2699 {
2700 struct hostentry **hep;
2701 uint k = he->hash_key >> hc->hash_shift;
2702
2703 for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2704 *hep = he->next;
2705 }
2706
2707 #define HC_DEF_ORDER 10
2708 #define HC_HI_MARK *4
2709 #define HC_HI_STEP 2
2710 #define HC_HI_ORDER 16 /* Must be at most 16 */
2711 #define HC_LO_MARK /5
2712 #define HC_LO_STEP 2
2713 #define HC_LO_ORDER 10
2714
2715 static void
2716 hc_alloc_table(struct hostcache *hc, unsigned order)
2717 {
2718 uint hsize = 1 << order;
2719 hc->hash_order = order;
2720 hc->hash_shift = 32 - order;
2721 hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2722 hc->hash_min = (order <= HC_LO_ORDER) ? 0U : (hsize HC_LO_MARK);
2723
2724 hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2725 }
2726
2727 static void
2728 hc_resize(struct hostcache *hc, unsigned new_order)
2729 {
2730 struct hostentry **old_table = hc->hash_table;
2731 struct hostentry *he, *hen;
2732 uint old_size = 1 << hc->hash_order;
2733 uint i;
2734
2735 hc_alloc_table(hc, new_order);
2736 for (i = 0; i < old_size; i++)
2737 for (he = old_table[i]; he != NULL; he=hen)
2738 {
2739 hen = he->next;
2740 hc_insert(hc, he);
2741 }
2742 mb_free(old_table);
2743 }
2744
2745 static struct hostentry *
2746 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2747 {
2748 struct hostentry *he = sl_alloc(hc->slab);
2749
2750 *he = (struct hostentry) {
2751 .addr = a,
2752 .link = ll,
2753 .tab = dep,
2754 .hash_key = k,
2755 };
2756
2757 add_tail(&hc->hostentries, &he->ln);
2758 hc_insert(hc, he);
2759
2760 hc->hash_items++;
2761 if (hc->hash_items > hc->hash_max)
2762 hc_resize(hc, hc->hash_order + HC_HI_STEP);
2763
2764 return he;
2765 }
2766
2767 static void
2768 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2769 {
2770 rta_free(he->src);
2771
2772 rem_node(&he->ln);
2773 hc_remove(hc, he);
2774 sl_free(hc->slab, he);
2775
2776 hc->hash_items--;
2777 if (hc->hash_items < hc->hash_min)
2778 hc_resize(hc, hc->hash_order - HC_LO_STEP);
2779 }
2780
2781 static void
2782 rt_init_hostcache(rtable *tab)
2783 {
2784 struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2785 init_list(&hc->hostentries);
2786
2787 hc->hash_items = 0;
2788 hc_alloc_table(hc, HC_DEF_ORDER);
2789 hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2790
2791 hc->lp = lp_new(rt_table_pool, LP_GOOD_SIZE(1024));
2792 hc->trie = f_new_trie(hc->lp, 0);
2793
2794 tab->hostcache = hc;
2795 }
2796
2797 static void
2798 rt_free_hostcache(rtable *tab)
2799 {
2800 struct hostcache *hc = tab->hostcache;
2801
2802 node *n;
2803 WALK_LIST(n, hc->hostentries)
2804 {
2805 struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2806 rta_free(he->src);
2807
2808 if (he->uc)
2809 log(L_ERR "Hostcache is not empty in table %s", tab->name);
2810 }
2811
2812 rfree(hc->slab);
2813 rfree(hc->lp);
2814 mb_free(hc->hash_table);
2815 mb_free(hc);
2816 }
2817
2818 static void
2819 rt_notify_hostcache(rtable *tab, net *net)
2820 {
2821 if (tab->hcu_scheduled)
2822 return;
2823
2824 if (trie_match_net(tab->hostcache->trie, net->n.addr))
2825 rt_schedule_hcu(tab);
2826 }
2827
2828 static int
2829 if_local_addr(ip_addr a, struct iface *i)
2830 {
2831 struct ifa *b;
2832
2833 WALK_LIST(b, i->addrs)
2834 if (ipa_equal(a, b->ip))
2835 return 1;
2836
2837 return 0;
2838 }
2839
2840 u32
2841 rt_get_igp_metric(rte *rt)
2842 {
2843 eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2844
2845 if (ea)
2846 return ea->u.data;
2847
2848 rta *a = rt->attrs;
2849
2850 #ifdef CONFIG_OSPF
2851 if ((a->source == RTS_OSPF) ||
2852 (a->source == RTS_OSPF_IA) ||
2853 (a->source == RTS_OSPF_EXT1))
2854 return rt->u.ospf.metric1;
2855 #endif
2856
2857 #ifdef CONFIG_RIP
2858 if (a->source == RTS_RIP)
2859 return rt->u.rip.metric;
2860 #endif
2861
2862 #ifdef CONFIG_BGP
2863 if (a->source == RTS_BGP)
2864 {
2865 u64 metric = bgp_total_aigp_metric(rt);
2866 return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN);
2867 }
2868 #endif
2869
2870 if (a->source == RTS_DEVICE)
2871 return 0;
2872
2873 return IGP_METRIC_UNKNOWN;
2874 }
2875
2876 static int
2877 rt_update_hostentry(rtable *tab, struct hostentry *he)
2878 {
2879 rta *old_src = he->src;
2880 int direct = 0;
2881 int pxlen = 0;
2882
2883 /* Reset the hostentry */
2884 he->src = NULL;
2885 he->dest = RTD_UNREACHABLE;
2886 he->nexthop_linkable = 0;
2887 he->igp_metric = 0;
2888
2889 net_addr he_addr;
2890 net_fill_ip_host(&he_addr, he->addr);
2891 net *n = net_route(tab, &he_addr);
2892 if (n)
2893 {
2894 rte *e = n->routes;
2895 rta *a = e->attrs;
2896 pxlen = n->n.addr->pxlen;
2897
2898 if (a->hostentry)
2899 {
2900 /* Recursive route should not depend on another recursive route */
2901 log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2902 he->addr, n->n.addr);
2903 goto done;
2904 }
2905
2906 if (a->dest == RTD_UNICAST)
2907 {
2908 for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
2909 if (ipa_zero(nh->gw))
2910 {
2911 if (if_local_addr(he->addr, nh->iface))
2912 {
2913 /* The host address is a local address, this is not valid */
2914 log(L_WARN "Next hop address %I is a local address of iface %s",
2915 he->addr, nh->iface->name);
2916 goto done;
2917 }
2918
2919 direct++;
2920 }
2921 }
2922
2923 he->src = rta_clone(a);
2924 he->dest = a->dest;
2925 he->nexthop_linkable = !direct;
2926 he->igp_metric = rt_get_igp_metric(e);
2927 }
2928
2929 done:
2930 /* Add a prefix range to the trie */
2931 trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2932
2933 rta_free(old_src);
2934 return old_src != he->src;
2935 }
2936
2937 static void
2938 rt_update_hostcache(rtable *tab)
2939 {
2940 struct hostcache *hc = tab->hostcache;
2941 struct hostentry *he;
2942 node *n, *x;
2943
2944 /* Reset the trie */
2945 lp_flush(hc->lp);
2946 hc->trie = f_new_trie(hc->lp, 0);
2947
2948 WALK_LIST_DELSAFE(n, x, hc->hostentries)
2949 {
2950 he = SKIP_BACK(struct hostentry, ln, n);
2951 if (!he->uc)
2952 {
2953 hc_delete_hostentry(hc, he);
2954 continue;
2955 }
2956
2957 if (rt_update_hostentry(tab, he))
2958 rt_schedule_nhu(he->tab);
2959 }
2960
2961 tab->hcu_scheduled = 0;
2962 }
2963
2964 struct hostentry *
2965 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2966 {
2967 struct hostentry *he;
2968
2969 if (!tab->hostcache)
2970 rt_init_hostcache(tab);
2971
2972 u32 k = hc_hash(a, dep);
2973 struct hostcache *hc = tab->hostcache;
2974 for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2975 if (ipa_equal(he->addr, a) && (he->tab == dep))
2976 return he;
2977
2978 he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
2979 rt_update_hostentry(tab, he);
2980 return he;
2981 }
2982
2983
2984 /*
2985 * Documentation for functions declared inline in route.h
2986 */
2987 #if 0
2988
2989 /**
2990 * net_find - find a network entry
2991 * @tab: a routing table
2992 * @addr: address of the network
2993 *
2994 * net_find() looks up the given network in routing table @tab and
2995 * returns a pointer to its &net entry or %NULL if no such network
2996 * exists.
2997 */
2998 static inline net *net_find(rtable *tab, net_addr *addr)
2999 { DUMMY; }
3000
3001 /**
3002 * net_get - obtain a network entry
3003 * @tab: a routing table
3004 * @addr: address of the network
3005 *
3006 * net_get() looks up the given network in routing table @tab and
3007 * returns a pointer to its &net entry. If no such entry exists, it's
3008 * created.
3009 */
3010 static inline net *net_get(rtable *tab, net_addr *addr)
3011 { DUMMY; }
3012
3013 /**
3014 * rte_cow - copy a route for writing
3015 * @r: a route entry to be copied
3016 *
3017 * rte_cow() takes a &rte and prepares it for modification. The exact action
3018 * taken depends on the flags of the &rte -- if it's a temporary entry, it's
3019 * just returned unchanged, else a new temporary entry with the same contents
3020 * is created.
3021 *
3022 * The primary use of this function is inside the filter machinery -- when
3023 * a filter wants to modify &rte contents (to change the preference or to
3024 * attach another set of attributes), it must ensure that the &rte is not
3025 * shared with anyone else (and especially that it isn't stored in any routing
3026 * table).
3027 *
3028 * Result: a pointer to the new writable &rte.
3029 */
3030 static inline rte * rte_cow(rte *r)
3031 { DUMMY; }
3032
3033 #endif