nest/rt-table.c

   1 /*
   2  *      BIRD -- Routing Tables
   3  *
   4  *      (c) 1998--2000 Martin Mares <mj@ucw.cz>
   5  *
   6  *      Can be freely distributed and used under the terms of the GNU GPL.
   7  */
   8
   9 /**
  10  * DOC: Routing tables
  11  *
  12  * Routing tables are probably the most important structures BIRD uses. They
  13  * hold all the information about known networks, the associated routes and
  14  * their attributes.
  15  *
  16  * There are multiple routing tables (a primary one together with any
  17  * number of secondary ones if requested by the configuration). Each table
  18  * is basically a FIB containing entries describing the individual
  19  * destination networks. For each network (represented by structure &net),
  20  * there is a one-way linked list of route entries (&rte), the first entry
  21  * on the list being the best one (i.e., the one we currently use
  22  * for routing), the order of the other ones is undetermined.
  23  *
  24  * The &rte contains information about the route. There are net and src, which
  25  * together forms a key identifying the route in a routing table. There is a
  26  * pointer to a &rta structure (see the route attribute module for a precise
  27  * explanation) holding the route attributes, which are primary data about the
  28  * route. There are several technical fields used by routing table code (route
  29  * id, REF_* flags), There is also the pflags field, holding protocol-specific
  30  * flags. They are not used by routing table code, but by protocol-specific
  31  * hooks. In contrast to route attributes, they are not primary data and their
  32  * validity is also limited to the routing table.
  33  *
  34  * There are several mechanisms that allow automatic update of routes in one
  35  * routing table (dst) as a result of changes in another routing table (src).
  36  * They handle issues of recursive next hop resolving, flowspec validation and
  37  * RPKI validation.
  38  *
  39  * The first such mechanism is handling of recursive next hops. A route in the
  40  * dst table has an indirect next hop address, which is resolved through a route
  41  * in the src table (which may also be the same table) to get an immediate next
  42  * hop. This is implemented using structure &hostcache attached to the src
  43  * table, which contains &hostentry structures for each tracked next hop
  44  * address. These structures are linked from recursive routes in dst tables,
  45  * possibly multiple routes sharing one hostentry (as many routes may have the
  46  * same indirect next hop). There is also a trie in the hostcache, which matches
  47  * all prefixes that may influence resolving of tracked next hops.
  48  *
  49  * When a best route changes in the src table, the hostcache is notified using
  50  * rt_notify_hostcache(), which immediately checks using the trie whether the
  51  * change is relevant and if it is, then it schedules asynchronous hostcache
  52  * recomputation. The recomputation is done by rt_update_hostcache() (called
  53  * from rt_event() of src table), it walks through all hostentries and resolves
  54  * them (by rt_update_hostentry()). It also updates the trie. If a change in
  55  * hostentry resolution was found, then it schedules asynchronous nexthop
  56  * recomputation of associated dst table. That is done by rt_next_hop_update()
  57  * (called from rt_event() of dst table), it iterates over all routes in the dst
  58  * table and re-examines their hostentries for changes. Note that in contrast to
  59  * hostcache update, next hop update can be interrupted by main loop. These two
  60  * full-table walks (over hostcache and dst table) are necessary due to absence
  61  * of direct lookups (route -> affected nexthop, nexthop -> its route).
  62  *
  63  * The second mechanism is for flowspec validation, where validity of flowspec
  64  * routes depends of resolving their network prefixes in IP routing tables. This
  65  * is similar to the recursive next hop mechanism, but simpler as there are no
  66  * intermediate hostcache and hostentries (because flows are less likely to
  67  * share common net prefix than routes sharing a common next hop). In src table,
  68  * there is a list of dst tables (list flowspec_links), this list is updated by
  69  * flowpsec channels (by rt_flowspec_link() and rt_flowspec_unlink() during
  70  * channel start/stop). Each dst table has its own trie of prefixes that may
  71  * influence validation of flowspec routes in it (flowspec_trie).
  72  *
  73  * When a best route changes in the src table, rt_flowspec_notify() immediately
  74  * checks all dst tables from the list using their tries to see whether the
  75  * change is relevant for them. If it is, then an asynchronous re-validation of
  76  * flowspec routes in the dst table is scheduled. That is also done by function
  77  * rt_next_hop_update(), like nexthop recomputation above. It iterates over all
  78  * flowspec routes and re-validates them. It also recalculates the trie.
  79  *
  80  * Note that in contrast to the hostcache update, here the trie is recalculated
  81  * during the rt_next_hop_update(), which may be interleaved with IP route
  82  * updates. The trie is flushed at the beginning of recalculation, which means
  83  * that such updates may use partial trie to see if they are relevant. But it
  84  * works anyway! Either affected flowspec was already re-validated and added to
  85  * the trie, then IP route change would match the trie and trigger a next round
  86  * of re-validation, or it was not yet re-validated and added to the trie, but
  87  * will be re-validated later in this round anyway.
  88  *
  89  * The third mechanism is used for RPKI re-validation of IP routes and it is the
  90  * simplest. It is just a list of subscribers in src table, who are notified
  91  * when any change happened, but only after a settle time. Also, in RPKI case
  92  * the dst is not a table, but a channel, who refeeds routes through a filter.
  93  */
  94
  95 #undef LOCAL_DEBUG
  96
  97 #include "nest/bird.h"
  98 #include "nest/route.h"
  99 #include "nest/protocol.h"
 100 #include "nest/iface.h"
 101 #include "nest/mpls.h"
 102 #include "lib/resource.h"
 103 #include "lib/event.h"
 104 #include "lib/timer.h"
 105 #include "lib/string.h"
 106 #include "conf/conf.h"
 107 #include "filter/filter.h"
 108 #include "filter/data.h"
 109 #include "lib/hash.h"
 110 #include "lib/string.h"
 111 #include "lib/alloca.h"
 112 #include "lib/flowspec.h"
 113
 114 #ifdef CONFIG_BGP
 115 #include "proto/bgp/bgp.h"
 116 #endif
 117
 118 pool *rt_table_pool;
 119
 120 static slab *rte_slab;
 121 linpool *rte_update_pool;
 122
 123 list routing_tables;
 124
 125 static void rt_free_hostcache(rtable *tab);
 126 static void rt_notify_hostcache(rtable *tab, net *net);
 127 static void rt_update_hostcache(rtable *tab);
 128 static void rt_next_hop_update(rtable *tab);
 129 static inline void rt_prune_table(rtable *tab);
 130 static inline void rt_schedule_notify(rtable *tab);
 131 static void rt_flowspec_notify(rtable *tab, net *net);
 132 static void rt_kick_prune_timer(rtable *tab);
 133
 134
 135 static void
 136 net_init_with_trie(struct fib *f, void *N)
 137 {
 138   rtable *tab = SKIP_BACK(rtable, fib, f);
 139   net *n = N;
 140
 141   if (tab->trie)
 142     trie_add_prefix(tab->trie, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen);
 143
 144   if (tab->trie_new)
 145     trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen);
 146 }
 147
 148 static inline net *
 149 net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0)
 150 {
 151   TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n)
 152   {
 153     net *r;
 154     if (r = net_find_valid(t, (net_addr *) &n))
 155       return r;
 156   }
 157   TRIE_WALK_TO_ROOT_END;
 158
 159   return NULL;
 160 }
 161
 162 static inline net *
 163 net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0)
 164 {
 165   TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px)
 166   {
 167     net_addr_vpn4 n = NET_ADDR_VPN4(px.prefix, px.pxlen, n0->rd);
 168
 169     net *r;
 170     if (r = net_find_valid(t, (net_addr *) &n))
 171       return r;
 172   }
 173   TRIE_WALK_TO_ROOT_END;
 174
 175   return NULL;
 176 }
 177
 178 static inline net *
 179 net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0)
 180 {
 181   TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n)
 182   {
 183     net *r;
 184     if (r = net_find_valid(t, (net_addr *) &n))
 185       return r;
 186   }
 187   TRIE_WALK_TO_ROOT_END;
 188
 189   return NULL;
 190 }
 191
 192 static inline net *
 193 net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0)
 194 {
 195   TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px)
 196   {
 197     net_addr_vpn6 n = NET_ADDR_VPN6(px.prefix, px.pxlen, n0->rd);
 198
 199     net *r;
 200     if (r = net_find_valid(t, (net_addr *) &n))
 201       return r;
 202   }
 203   TRIE_WALK_TO_ROOT_END;
 204
 205   return NULL;
 206 }
 207
 208 static inline void *
 209 net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0)
 210 {
 211   TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px)
 212   {
 213     net_addr_ip6_sadr n = NET_ADDR_IP6_SADR(px.prefix, px.pxlen, n0->src_prefix, n0->src_pxlen);
 214     net *best = NULL;
 215     int best_pxlen = 0;
 216
 217     /* We need to do dst first matching. Since sadr addresses are hashed on dst
 218        prefix only, find the hash table chain and go through it to find the
 219        match with the longest matching src prefix. */
 220     for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next)
 221     {
 222       net_addr_ip6_sadr *a = (void *) fn->addr;
 223
 224       if (net_equal_dst_ip6_sadr(&n, a) &&
 225           net_in_net_src_ip6_sadr(&n, a) &&
 226           (a->src_pxlen >= best_pxlen))
 227       {
 228         best = fib_node_to_user(&t->fib, fn);
 229         best_pxlen = a->src_pxlen;
 230       }
 231     }
 232
 233     if (best)
 234       return best;
 235   }
 236   TRIE_WALK_TO_ROOT_END;
 237
 238   return NULL;
 239 }
 240
 241 static inline net *
 242 net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0)
 243 {
 244   net_addr_ip4 n;
 245   net_copy_ip4(&n, n0);
 246
 247   net *r;
 248   while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0))
 249   {
 250     n.pxlen--;
 251     ip4_clrbit(&n.prefix, n.pxlen);
 252   }
 253
 254   return r;
 255 }
 256
 257 static inline net *
 258 net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0)
 259 {
 260   net_addr_vpn4 n;
 261   net_copy_vpn4(&n, n0);
 262
 263   net *r;
 264   while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0))
 265   {
 266     n.pxlen--;
 267     ip4_clrbit(&n.prefix, n.pxlen);
 268   }
 269
 270   return r;
 271 }
 272
 273 static inline net *
 274 net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0)
 275 {
 276   net_addr_ip6 n;
 277   net_copy_ip6(&n, n0);
 278
 279   net *r;
 280   while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0))
 281   {
 282     n.pxlen--;
 283     ip6_clrbit(&n.prefix, n.pxlen);
 284   }
 285
 286   return r;
 287 }
 288
 289 static inline net *
 290 net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0)
 291 {
 292   net_addr_vpn6 n;
 293   net_copy_vpn6(&n, n0);
 294
 295   net *r;
 296   while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0))
 297   {
 298     n.pxlen--;
 299     ip6_clrbit(&n.prefix, n.pxlen);
 300   }
 301
 302   return r;
 303 }
 304
 305 static inline void *
 306 net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0)
 307 {
 308   net_addr_ip6_sadr n;
 309   net_copy_ip6_sadr(&n, n0);
 310
 311   while (1)
 312   {
 313     net *best = NULL;
 314     int best_pxlen = 0;
 315
 316     /* We need to do dst first matching. Since sadr addresses are hashed on dst
 317        prefix only, find the hash table chain and go through it to find the
 318        match with the longest matching src prefix. */
 319     for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next)
 320     {
 321       net_addr_ip6_sadr *a = (void *) fn->addr;
 322
 323       if (net_equal_dst_ip6_sadr(&n, a) &&
 324           net_in_net_src_ip6_sadr(&n, a) &&
 325           (a->src_pxlen >= best_pxlen))
 326       {
 327         best = fib_node_to_user(&t->fib, fn);
 328         best_pxlen = a->src_pxlen;
 329       }
 330     }
 331
 332     if (best)
 333       return best;
 334
 335     if (!n.dst_pxlen)
 336       break;
 337
 338     n.dst_pxlen--;
 339     ip6_clrbit(&n.dst_prefix, n.dst_pxlen);
 340   }
 341
 342   return NULL;
 343 }
 344
 345 net *
 346 net_route(rtable *tab, const net_addr *n)
 347 {
 348   ASSERT(tab->addr_type == n->type);
 349
 350   switch (n->type)
 351   {
 352   case NET_IP4:
 353     if (tab->trie)
 354       return net_route_ip4_trie(tab, (net_addr_ip4 *) n);
 355     else
 356       return net_route_ip4_fib (tab, (net_addr_ip4 *) n);
 357
 358   case NET_VPN4:
 359     if (tab->trie)
 360       return net_route_vpn4_trie(tab, (net_addr_vpn4 *) n);
 361     else
 362       return net_route_vpn4_fib (tab, (net_addr_vpn4 *) n);
 363
 364   case NET_IP6:
 365     if (tab->trie)
 366       return net_route_ip6_trie(tab, (net_addr_ip6 *) n);
 367     else
 368       return net_route_ip6_fib (tab, (net_addr_ip6 *) n);
 369
 370   case NET_VPN6:
 371     if (tab->trie)
 372       return net_route_vpn6_trie(tab, (net_addr_vpn6 *) n);
 373     else
 374       return net_route_vpn6_fib (tab, (net_addr_vpn6 *) n);
 375
 376   case NET_IP6_SADR:
 377     if (tab->trie)
 378       return net_route_ip6_sadr_trie(tab, (net_addr_ip6_sadr *) n);
 379     else
 380       return net_route_ip6_sadr_fib (tab, (net_addr_ip6_sadr *) n);
 381
 382   default:
 383     return NULL;
 384   }
 385 }
 386
 387
 388 static int
 389 net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn)
 390 {
 391   int anything = 0;
 392
 393   TRIE_WALK_TO_ROOT_IP4(tab->trie, px, px0)
 394   {
 395     net_addr_roa4 roa0 = NET_ADDR_ROA4(px0.prefix, px0.pxlen, 0, 0);
 396
 397     struct fib_node *fn;
 398     for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next)
 399     {
 400       net_addr_roa4 *roa = (void *) fn->addr;
 401       net *r = fib_node_to_user(&tab->fib, fn);
 402
 403       if (net_equal_prefix_roa4(roa, &roa0) && rte_is_valid(r->routes))
 404       {
 405         anything = 1;
 406         if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
 407           return ROA_VALID;
 408       }
 409     }
 410   }
 411   TRIE_WALK_TO_ROOT_END;
 412
 413   return anything ? ROA_INVALID : ROA_UNKNOWN;
 414 }
 415
 416 static int
 417 net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn)
 418 {
 419   struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
 420   struct fib_node *fn;
 421   int anything = 0;
 422
 423   while (1)
 424   {
 425     for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
 426     {
 427       net_addr_roa4 *roa = (void *) fn->addr;
 428       net *r = fib_node_to_user(&tab->fib, fn);
 429
 430       if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
 431       {
 432         anything = 1;
 433         if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
 434           return ROA_VALID;
 435       }
 436     }
 437
 438     if (n.pxlen == 0)
 439       break;
 440
 441     n.pxlen--;
 442     ip4_clrbit(&n.prefix, n.pxlen);
 443   }
 444
 445   return anything ? ROA_INVALID : ROA_UNKNOWN;
 446 }
 447
 448 static int
 449 net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn)
 450 {
 451   int anything = 0;
 452
 453   TRIE_WALK_TO_ROOT_IP6(tab->trie, px, px0)
 454   {
 455     net_addr_roa6 roa0 = NET_ADDR_ROA6(px0.prefix, px0.pxlen, 0, 0);
 456
 457     struct fib_node *fn;
 458     for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next)
 459     {
 460       net_addr_roa6 *roa = (void *) fn->addr;
 461       net *r = fib_node_to_user(&tab->fib, fn);
 462
 463       if (net_equal_prefix_roa6(roa, &roa0) && rte_is_valid(r->routes))
 464       {
 465         anything = 1;
 466         if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
 467           return ROA_VALID;
 468       }
 469     }
 470   }
 471   TRIE_WALK_TO_ROOT_END;
 472
 473   return anything ? ROA_INVALID : ROA_UNKNOWN;
 474 }
 475
 476 static int
 477 net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn)
 478 {
 479   struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
 480   struct fib_node *fn;
 481   int anything = 0;
 482
 483   while (1)
 484   {
 485     for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
 486     {
 487       net_addr_roa6 *roa = (void *) fn->addr;
 488       net *r = fib_node_to_user(&tab->fib, fn);
 489
 490       if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
 491       {
 492         anything = 1;
 493         if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
 494           return ROA_VALID;
 495       }
 496     }
 497
 498     if (n.pxlen == 0)
 499       break;
 500
 501     n.pxlen--;
 502     ip6_clrbit(&n.prefix, n.pxlen);
 503   }
 504
 505   return anything ? ROA_INVALID : ROA_UNKNOWN;
 506 }
 507
 508 /**
 509  * roa_check - check validity of route origination in a ROA table
 510  * @tab: ROA table
 511  * @n: network prefix to check
 512  * @asn: AS number of network prefix
 513  *
 514  * Implements RFC 6483 route validation for the given network prefix. The
 515  * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
 516  * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
 517  * a candidate ROA with matching ASN and maxlen field greater than or equal to
 518  * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
 519  * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
 520  * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
 521  * must have type NET_IP4 or NET_IP6, respectively.
 522  */
 523 int
 524 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
 525 {
 526   if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
 527   {
 528     if (tab->trie)
 529       return net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn);
 530     else
 531       return net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn);
 532   }
 533   else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
 534   {
 535     if (tab->trie)
 536       return net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn);
 537     else
 538       return net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn);
 539   }
 540   else
 541     return ROA_UNKNOWN; /* Should not happen */
 542 }
 543
 544 /**
 545  * rte_find - find a route
 546  * @net: network node
 547  * @src: route source
 548  *
 549  * The rte_find() function returns a route for destination @net
 550  * which is from route source @src.
 551  */
 552 rte *
 553 rte_find(net *net, struct rte_src *src)
 554 {
 555   rte *e = net->routes;
 556
 557   while (e && e->src != src)
 558     e = e->next;
 559   return e;
 560 }
 561
 562 /**
 563  * rte_get_temp - get a temporary &rte
 564  * @a: attributes to assign to the new route (a &rta; in case it's
 565  * un-cached, rte_update() will create a cached copy automatically)
 566  * @src: route source
 567  *
 568  * Create a temporary &rte and bind it with the attributes @a.
 569  */
 570 rte *
 571 rte_get_temp(rta *a, struct rte_src *src)
 572 {
 573   rte *e = sl_alloc(rte_slab);
 574
 575   e->attrs = a;
 576   e->id = 0;
 577   e->flags = 0;
 578   e->pflags = 0;
 579   rt_lock_source(e->src = src);
 580   return e;
 581 }
 582
 583 rte *
 584 rte_do_cow(rte *r)
 585 {
 586   rte *e = sl_alloc(rte_slab);
 587
 588   memcpy(e, r, sizeof(rte));
 589
 590   rt_lock_source(e->src);
 591   e->attrs = rta_clone(r->attrs);
 592   e->flags = 0;
 593   return e;
 594 }
 595
 596 /**
 597  * rte_cow_rta - get a private writable copy of &rte with writable &rta
 598  * @r: a route entry to be copied
 599  * @lp: a linpool from which to allocate &rta
 600  *
 601  * rte_cow_rta() takes a &rte and prepares it and associated &rta for
 602  * modification. There are three possibilities: First, both &rte and &rta are
 603  * private copies, in that case they are returned unchanged.  Second, &rte is
 604  * private copy, but &rta is cached, in that case &rta is duplicated using
 605  * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
 606  * both structures are duplicated by rte_do_cow() and rta_do_cow().
 607  *
 608  * Note that in the second case, cached &rta loses one reference, while private
 609  * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
 610  * nexthops, ...) with it. To work properly, original shared &rta should have
 611  * another reference during the life of created private copy.
 612  *
 613  * Result: a pointer to the new writable &rte with writable &rta.
 614  */
 615 rte *
 616 rte_cow_rta(rte *r, linpool *lp)
 617 {
 618   if (!rta_is_cached(r->attrs))
 619     return r;
 620
 621   r = rte_cow(r);
 622   rta *a = rta_do_cow(r->attrs, lp);
 623   rta_free(r->attrs);
 624   r->attrs = a;
 625   return r;
 626 }
 627
 628 static int                              /* Actually better or at least as good as */
 629 rte_better(rte *new, rte *old)
 630 {
 631   int (*better)(rte *, rte *);
 632
 633   if (!rte_is_valid(old))
 634     return 1;
 635   if (!rte_is_valid(new))
 636     return 0;
 637
 638   if (new->attrs->pref > old->attrs->pref)
 639     return 1;
 640   if (new->attrs->pref < old->attrs->pref)
 641     return 0;
 642   if (new->src->proto->proto != old->src->proto->proto)
 643     {
 644       /*
 645        *  If the user has configured protocol preferences, so that two different protocols
 646        *  have the same preference, try to break the tie by comparing addresses. Not too
 647        *  useful, but keeps the ordering of routes unambiguous.
 648        */
 649       return new->src->proto->proto > old->src->proto->proto;
 650     }
 651   if (better = new->src->proto->rte_better)
 652     return better(new, old);
 653   return 0;
 654 }
 655
 656 static int
 657 rte_mergable(rte *pri, rte *sec)
 658 {
 659   int (*mergable)(rte *, rte *);
 660
 661   if (!rte_is_valid(pri) || !rte_is_valid(sec))
 662     return 0;
 663
 664   if (pri->attrs->pref != sec->attrs->pref)
 665     return 0;
 666
 667   if (pri->src->proto->proto != sec->src->proto->proto)
 668     return 0;
 669
 670   if (mergable = pri->src->proto->rte_mergable)
 671     return mergable(pri, sec);
 672
 673   return 0;
 674 }
 675
 676 static void
 677 rte_trace(struct channel *c, rte *e, int dir, char *msg)
 678 {
 679   log(L_TRACE "%s.%s %c %s %N %luL %uG %s",
 680       c->proto->name, c->name ?: "?", dir, msg, e->net->n.addr, e->src->private_id, e->src->global_id,
 681       rta_dest_name(e->attrs->dest));
 682 }
 683
 684 static inline void
 685 rte_trace_in(uint flag, struct channel *c, rte *e, char *msg)
 686 {
 687   if ((c->debug & flag) || (c->proto->debug & flag))
 688     rte_trace(c, e, '>', msg);
 689 }
 690
 691 static inline void
 692 rte_trace_out(uint flag, struct channel *c, rte *e, char *msg)
 693 {
 694   if ((c->debug & flag) || (c->proto->debug & flag))
 695     rte_trace(c, e, '<', msg);
 696 }
 697
 698 static rte *
 699 export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent)
 700 {
 701   struct proto *p = c->proto;
 702   const struct filter *filter = c->out_filter;
 703   struct proto_stats *stats = &c->stats;
 704   rte *rt;
 705   int v;
 706
 707   rt = rt0;
 708   *rt_free = NULL;
 709
 710   v = p->preexport ? p->preexport(c, rt) : 0;
 711   if (v < 0)
 712     {
 713       if (silent)
 714         goto reject;
 715
 716       stats->exp_updates_rejected++;
 717       if (v == RIC_REJECT)
 718         rte_trace_out(D_FILTERS, c, rt, "rejected by protocol");
 719       goto reject;
 720     }
 721   if (v > 0)
 722     {
 723       if (!silent)
 724         rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol");
 725       goto accept;
 726     }
 727
 728   v = filter && ((filter == FILTER_REJECT) ||
 729                  (f_run(filter, &rt, pool,
 730                         (silent ? FF_SILENT : 0)) > F_ACCEPT));
 731   if (v)
 732     {
 733       if (silent)
 734         goto reject;
 735
 736       stats->exp_updates_filtered++;
 737       rte_trace_out(D_FILTERS, c, rt, "filtered out");
 738       goto reject;
 739     }
 740
 741  accept:
 742   if (rt != rt0)
 743     *rt_free = rt;
 744   return rt;
 745
 746  reject:
 747   /* Discard temporary rte */
 748   if (rt != rt0)
 749     rte_free(rt);
 750   return NULL;
 751 }
 752
 753 static inline rte *
 754 export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent)
 755 {
 756   return export_filter_(c, rt0, rt_free, rte_update_pool, silent);
 757 }
 758
 759 static void
 760 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed)
 761 {
 762   struct proto *p = c->proto;
 763   struct proto_stats *stats = &c->stats;
 764
 765   if (refeed && new)
 766     c->refeed_count++;
 767
 768   /* Apply export limit */
 769   struct channel_limit *l = &c->out_limit;
 770   if (l->action && !old && new)
 771   {
 772     if (stats->exp_routes >= l->limit)
 773       channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
 774
 775     if (l->state == PLS_BLOCKED)
 776     {
 777       stats->exp_updates_rejected++;
 778       rte_trace_out(D_FILTERS, c, new, "rejected [limit]");
 779       return;
 780     }
 781   }
 782
 783   /* Apply export table */
 784   if (c->out_table && !rte_update_out(c, net->n.addr, new, old, refeed))
 785     return;
 786
 787   if (new)
 788     stats->exp_updates_accepted++;
 789   else
 790     stats->exp_withdraws_accepted++;
 791
 792   if (old)
 793   {
 794     bmap_clear(&c->export_map, old->id);
 795     stats->exp_routes--;
 796   }
 797
 798   if (new)
 799   {
 800     bmap_set(&c->export_map, new->id);
 801     stats->exp_routes++;
 802   }
 803
 804   if (p->debug & D_ROUTES)
 805   {
 806     if (new && old)
 807       rte_trace_out(D_ROUTES, c, new, "replaced");
 808     else if (new)
 809       rte_trace_out(D_ROUTES, c, new, "added");
 810     else if (old)
 811       rte_trace_out(D_ROUTES, c, old, "removed");
 812   }
 813
 814   p->rt_notify(p, c, net, new, old);
 815 }
 816
 817 static void
 818 rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed)
 819 {
 820   // struct proto *p = c->proto;
 821   rte *new_free = NULL;
 822
 823   if (new)
 824     c->stats.exp_updates_received++;
 825   else
 826     c->stats.exp_withdraws_received++;
 827
 828   if (new)
 829     new = export_filter(c, new, &new_free, 0);
 830
 831   if (old && !bmap_test(&c->export_map, old->id))
 832     old = NULL;
 833
 834   if (!new && !old)
 835     return;
 836
 837   do_rt_notify(c, net, new, old, refeed);
 838
 839   /* Discard temporary rte */
 840   if (new_free)
 841     rte_free(new_free);
 842 }
 843
 844 static void
 845 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed)
 846 {
 847   // struct proto *p = c->proto;
 848   rte *new_best = NULL;
 849   rte *old_best = NULL;
 850   rte *new_free = NULL;
 851   int new_first = 0;
 852
 853   /*
 854    * We assume that there are no changes in net route order except (added)
 855    * new_changed and (removed) old_changed. Therefore, the function is not
 856    * compatible with deterministic_med (where nontrivial reordering can happen
 857    * as a result of a route change) and with recomputation of recursive routes
 858    * due to next hop update (where many routes can be changed in one step).
 859    *
 860    * Note that we need this assumption just for optimizations, we could just
 861    * run full new_best recomputation otherwise.
 862    *
 863    * There are three cases:
 864    * feed or old_best is old_changed -> we need to recompute new_best
 865    * old_best is before new_changed -> new_best is old_best, ignore
 866    * old_best is after new_changed -> try new_changed, otherwise old_best
 867    */
 868
 869   if (net->routes)
 870     c->stats.exp_updates_received++;
 871   else
 872     c->stats.exp_withdraws_received++;
 873
 874   /* Find old_best - either old_changed, or route for net->routes */
 875   if (old_changed && bmap_test(&c->export_map, old_changed->id))
 876     old_best = old_changed;
 877   else
 878   {
 879     for (rte *r = net->routes; rte_is_valid(r); r = r->next)
 880     {
 881       if (bmap_test(&c->export_map, r->id))
 882       {
 883         old_best = r;
 884         break;
 885       }
 886
 887       /* Note if new_changed found before old_best */
 888       if (r == new_changed)
 889         new_first = 1;
 890     }
 891   }
 892
 893   /* Find new_best */
 894   if ((new_changed == old_changed) || (old_best == old_changed))
 895   {
 896     /* Feed or old_best changed -> find first accepted by filters */
 897     for (rte *r = net->routes; rte_is_valid(r); r = r->next)
 898       if (new_best = export_filter(c, r, &new_free, 0))
 899         break;
 900   }
 901   else
 902   {
 903     /* Other cases -> either new_changed, or old_best (and nothing changed) */
 904     if (new_first && (new_changed = export_filter(c, new_changed, &new_free, 0)))
 905       new_best = new_changed;
 906     else
 907       return;
 908   }
 909
 910   if (!new_best && !old_best)
 911     return;
 912
 913   do_rt_notify(c, net, new_best, old_best, refeed);
 914
 915   /* Discard temporary rte */
 916   if (new_free)
 917     rte_free(new_free);
 918 }
 919
 920
 921 static struct nexthop *
 922 nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
 923 {
 924   return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
 925 }
 926
 927 rte *
 928 rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent)
 929 {
 930   // struct proto *p = c->proto;
 931   struct nexthop *nhs = NULL;
 932   rte *best0, *best, *rt0, *rt, *tmp;
 933
 934   best0 = net->routes;
 935   *rt_free = NULL;
 936
 937   if (!rte_is_valid(best0))
 938     return NULL;
 939
 940   best = export_filter_(c, best0, rt_free, pool, silent);
 941
 942   if (!best || !rte_is_reachable(best))
 943     return best;
 944
 945   for (rt0 = best0->next; rt0; rt0 = rt0->next)
 946   {
 947     if (!rte_mergable(best0, rt0))
 948       continue;
 949
 950     rt = export_filter_(c, rt0, &tmp, pool, 1);
 951
 952     if (!rt)
 953       continue;
 954
 955     if (rte_is_reachable(rt))
 956       nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
 957
 958     if (tmp)
 959       rte_free(tmp);
 960   }
 961
 962   if (nhs)
 963   {
 964     nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
 965
 966     if (nhs->next)
 967     {
 968       best = rte_cow_rta(best, pool);
 969       nexthop_link(best->attrs, nhs);
 970     }
 971   }
 972
 973   if (best != best0)
 974     *rt_free = best;
 975
 976   return best;
 977 }
 978
 979 static void
 980 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
 981                  rte *new_best, rte *old_best, int refeed)
 982 {
 983   // struct proto *p = c->proto;
 984   rte *new_free = NULL;
 985
 986   /* We assume that all rte arguments are either NULL or rte_is_valid() */
 987
 988   /* This check should be done by the caller */
 989   if (!new_best && !old_best)
 990     return;
 991
 992   /* Check whether the change is relevant to the merged route */
 993   if ((new_best == old_best) &&
 994       (new_changed != old_changed) &&
 995       !rte_mergable(new_best, new_changed) &&
 996       !rte_mergable(old_best, old_changed))
 997     return;
 998
 999   if (new_best)
1000     c->stats.exp_updates_received++;
1001   else
1002     c->stats.exp_withdraws_received++;
1003
1004   /* Prepare new merged route */
1005   if (new_best)
1006     new_best = rt_export_merged(c, net, &new_free, rte_update_pool, 0);
1007
1008   /* Check old merged route */
1009   if (old_best && !bmap_test(&c->export_map, old_best->id))
1010     old_best = NULL;
1011
1012   if (!new_best && !old_best)
1013     return;
1014
1015   do_rt_notify(c, net, new_best, old_best, refeed);
1016
1017   /* Discard temporary rte */
1018   if (new_free)
1019     rte_free(new_free);
1020 }
1021
1022
1023 /**
1024  * rte_announce - announce a routing table change
1025  * @tab: table the route has been added to
1026  * @type: type of route announcement (RA_UNDEF or RA_ANY)
1027  * @net: network in question
1028  * @new: the new or changed route
1029  * @old: the previous route replaced by the new one
1030  * @new_best: the new best route for the same network
1031  * @old_best: the previous best route for the same network
1032  *
1033  * This function gets a routing table update and announces it to all protocols
1034  * that are connected to the same table by their channels.
1035  *
1036  * There are two ways of how routing table changes are announced. First, there
1037  * is a change of just one route in @net (which may caused a change of the best
1038  * route of the network). In this case @new and @old describes the changed route
1039  * and @new_best and @old_best describes best routes. Other routes are not
1040  * affected, but in sorted table the order of other routes might change.
1041  *
1042  * Second, There is a bulk change of multiple routes in @net, with shared best
1043  * route selection. In such case separate route changes are described using
1044  * @type of %RA_ANY, with @new and @old specifying the changed route, while
1045  * @new_best and @old_best are NULL. After that, another notification is done
1046  * where @new_best and @old_best are filled (may be the same), but @new and @old
1047  * are NULL.
1048  *
1049  * The function announces the change to all associated channels. For each
1050  * channel, an appropriate preprocessing is done according to channel &ra_mode.
1051  * For example, %RA_OPTIMAL channels receive just changes of best routes.
1052  *
1053  * In general, we first call preexport() hook of a protocol, which performs
1054  * basic checks on the route (each protocol has a right to veto or force accept
1055  * of the route before any filter is asked). Then we consult an export filter
1056  * of the channel and verify the old route in an export map of the channel.
1057  * Finally, the rt_notify() hook of the protocol gets called.
1058  *
1059  * Note that there are also calls of rt_notify() hooks due to feed, but that is
1060  * done outside of scope of rte_announce().
1061  */
1062 static void
1063 rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old,
1064              rte *new_best, rte *old_best)
1065 {
1066   if (!rte_is_valid(new))
1067     new = NULL;
1068
1069   if (!rte_is_valid(old))
1070     old = NULL;
1071
1072   if (!rte_is_valid(new_best))
1073     new_best = NULL;
1074
1075   if (!rte_is_valid(old_best))
1076     old_best = NULL;
1077
1078   if (!new && !old && !new_best && !old_best)
1079     return;
1080
1081   if (new_best != old_best)
1082   {
1083     if (new_best)
1084       new_best->sender->stats.pref_routes++;
1085     if (old_best)
1086       old_best->sender->stats.pref_routes--;
1087
1088     if (tab->hostcache)
1089       rt_notify_hostcache(tab, net);
1090
1091     if (!EMPTY_LIST(tab->flowspec_links))
1092       rt_flowspec_notify(tab, net);
1093   }
1094
1095   rt_schedule_notify(tab);
1096
1097   struct channel *c; node *n;
1098   WALK_LIST2(c, n, tab->channels, table_node)
1099   {
1100     if (c->export_state == ES_DOWN)
1101       continue;
1102
1103     if (type && (type != c->ra_mode))
1104       continue;
1105
1106     switch (c->ra_mode)
1107     {
1108     case RA_OPTIMAL:
1109       if (new_best != old_best)
1110         rt_notify_basic(c, net, new_best, old_best, 0);
1111       break;
1112
1113     case RA_ANY:
1114       if (new != old)
1115         rt_notify_basic(c, net, new, old, 0);
1116       break;
1117
1118     case RA_ACCEPTED:
1119       /*
1120        * The (new != old) condition is problematic here, as it would break
1121        * the second usage pattern (announcement after bulk change, used in
1122        * rt_next_hop_update_net(), which sends both new and old as NULL).
1123        *
1124        * But recursive next hops do not work with sorted tables anyways,
1125        * such configuration is forbidden in BGP and not supported in
1126        * rt_notify_accepted().
1127        *
1128        * The condition is needed to eliminate spurious announcements where
1129        * both old and new routes are not valid (so they are NULL).
1130        */
1131       if (new != old)
1132         rt_notify_accepted(c, net, new, old, 0);
1133       break;
1134
1135     case RA_MERGED:
1136       rt_notify_merged(c, net, new, old, new_best, old_best, 0);
1137       break;
1138     }
1139   }
1140 }
1141
1142 static inline int
1143 rte_validate(rte *e)
1144 {
1145   int c;
1146   net *n = e->net;
1147
1148   if (!net_validate(n->n.addr))
1149   {
1150     log(L_WARN "Ignoring bogus prefix %N received via %s",
1151         n->n.addr, e->sender->proto->name);
1152     return 0;
1153   }
1154
1155   /* FIXME: better handling different nettypes */
1156   c = !net_is_flow(n->n.addr) ?
1157     net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE);
1158   if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1159   {
1160     log(L_WARN "Ignoring bogus route %N received via %s",
1161         n->n.addr, e->sender->proto->name);
1162     return 0;
1163   }
1164
1165   if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
1166   {
1167     /* Exception for flowspec that failed validation */
1168     if (net_is_flow(n->n.addr) && (e->attrs->dest == RTD_UNREACHABLE))
1169       return 1;
1170
1171     log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
1172         n->n.addr, e->attrs->dest, e->sender->proto->name);
1173     return 0;
1174   }
1175
1176   if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
1177   {
1178     log(L_WARN "Ignoring unsorted multipath route %N received via %s",
1179         n->n.addr, e->sender->proto->name);
1180     return 0;
1181   }
1182
1183   return 1;
1184 }
1185
1186 /**
1187  * rte_free - delete a &rte
1188  * @e: &rte to be deleted
1189  *
1190  * rte_free() deletes the given &rte from the routing table it's linked to.
1191  */
1192 void
1193 rte_free(rte *e)
1194 {
1195   rt_unlock_source(e->src);
1196   if (rta_is_cached(e->attrs))
1197     rta_free(e->attrs);
1198   sl_free(e);
1199 }
1200
1201 static inline void
1202 rte_free_quick(rte *e)
1203 {
1204   rt_unlock_source(e->src);
1205   rta_free(e->attrs);
1206   sl_free(e);
1207 }
1208
1209 int
1210 rte_same(rte *x, rte *y)
1211 {
1212   /* rte.flags / rte.pflags are not checked, as they are internal to rtable */
1213   return
1214     x->attrs == y->attrs &&
1215     x->src == y->src &&
1216     rte_is_filtered(x) == rte_is_filtered(y);
1217 }
1218
1219 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
1220
1221 static void
1222 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
1223 {
1224   struct proto *p = c->proto;
1225   struct rtable *table = c->table;
1226   struct proto_stats *stats = &c->stats;
1227   static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
1228   rte *before_old = NULL;
1229   rte *old_best = net->routes;
1230   rte *old = NULL;
1231   rte **k;
1232
1233   k = &net->routes;                     /* Find and remove original route from the same protocol */
1234   while (old = *k)
1235     {
1236       if (old->src == src)
1237         {
1238           /* If there is the same route in the routing table but from
1239            * a different sender, then there are two paths from the
1240            * source protocol to this routing table through transparent
1241            * pipes, which is not allowed.
1242            *
1243            * We log that and ignore the route. If it is withdraw, we
1244            * ignore it completely (there might be 'spurious withdraws',
1245            * see FIXME in do_rte_announce())
1246            */
1247           if (old->sender->proto != p)
1248             {
1249               if (new)
1250                 {
1251                   log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
1252                       net->n.addr, table->name);
1253                   rte_free_quick(new);
1254                 }
1255               return;
1256             }
1257
1258           if (new && rte_same(old, new))
1259             {
1260               /* No changes, ignore the new route and refresh the old one */
1261
1262               old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
1263
1264               if (!rte_is_filtered(new))
1265                 {
1266                   stats->imp_updates_ignored++;
1267                   rte_trace_in(D_ROUTES, c, new, "ignored");
1268                 }
1269
1270               rte_free_quick(new);
1271               return;
1272             }
1273           *k = old->next;
1274           table->rt_count--;
1275           break;
1276         }
1277       k = &old->next;
1278       before_old = old;
1279     }
1280
1281   /* Save the last accessed position */
1282   rte **pos = k;
1283
1284   if (!old)
1285     before_old = NULL;
1286
1287   if (!old && !new)
1288     {
1289       stats->imp_withdraws_ignored++;
1290       return;
1291     }
1292
1293   int new_ok = rte_is_ok(new);
1294   int old_ok = rte_is_ok(old);
1295
1296   struct channel_limit *l = &c->rx_limit;
1297   if (l->action && !old && new && !c->in_table)
1298     {
1299       u32 all_routes = stats->imp_routes + stats->filt_routes;
1300
1301       if (all_routes >= l->limit)
1302         channel_notify_limit(c, l, PLD_RX, all_routes);
1303
1304       if (l->state == PLS_BLOCKED)
1305         {
1306           /* In receive limit the situation is simple, old is NULL so
1307              we just free new and exit like nothing happened */
1308
1309           stats->imp_updates_ignored++;
1310           rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
1311           rte_free_quick(new);
1312           return;
1313         }
1314     }
1315
1316   l = &c->in_limit;
1317   if (l->action && !old_ok && new_ok)
1318     {
1319       if (stats->imp_routes >= l->limit)
1320         channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1321
1322       if (l->state == PLS_BLOCKED)
1323         {
1324           /* In import limit the situation is more complicated. We
1325              shouldn't just drop the route, we should handle it like
1326              it was filtered. We also have to continue the route
1327              processing if old or new is non-NULL, but we should exit
1328              if both are NULL as this case is probably assumed to be
1329              already handled. */
1330
1331           stats->imp_updates_ignored++;
1332           rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
1333
1334           if (c->in_keep_filtered)
1335             new->flags |= REF_FILTERED;
1336           else
1337             { rte_free_quick(new); new = NULL; }
1338
1339           /* Note that old && !new could be possible when
1340              c->in_keep_filtered changed in the recent past. */
1341
1342           if (!old && !new)
1343             return;
1344
1345           new_ok = 0;
1346           goto skip_stats1;
1347         }
1348     }
1349
1350   if (new_ok)
1351     stats->imp_updates_accepted++;
1352   else if (old_ok)
1353     stats->imp_withdraws_accepted++;
1354   else
1355     stats->imp_withdraws_ignored++;
1356
1357   if (old_ok || new_ok)
1358     table->last_rt_change = current_time();
1359
1360  skip_stats1:
1361
1362   if (new)
1363     rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1364   if (old)
1365     rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1366
1367   if (table->config->sorted)
1368     {
1369       /* If routes are sorted, just insert new route to appropriate position */
1370       if (new)
1371         {
1372           if (before_old && !rte_better(new, before_old))
1373             k = &before_old->next;
1374           else
1375             k = &net->routes;
1376
1377           for (; *k; k=&(*k)->next)
1378             if (rte_better(new, *k))
1379               break;
1380
1381           new->next = *k;
1382           *k = new;
1383
1384           table->rt_count++;
1385         }
1386     }
1387   else
1388     {
1389       /* If routes are not sorted, find the best route and move it on
1390          the first position. There are several optimized cases. */
1391
1392       if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1393         goto do_recalculate;
1394
1395       if (new && rte_better(new, old_best))
1396         {
1397           /* The first case - the new route is cleary optimal,
1398              we link it at the first position */
1399
1400           new->next = net->routes;
1401           net->routes = new;
1402
1403           table->rt_count++;
1404         }
1405       else if (old == old_best)
1406         {
1407           /* The second case - the old best route disappeared, we add the
1408              new route (if we have any) to the list (we don't care about
1409              position) and then we elect the new optimal route and relink
1410              that route at the first position and announce it. New optimal
1411              route might be NULL if there is no more routes */
1412
1413         do_recalculate:
1414           /* Add the new route to the list */
1415           if (new)
1416             {
1417               new->next = *pos;
1418               *pos = new;
1419
1420               table->rt_count++;
1421             }
1422
1423           /* Find a new optimal route (if there is any) */
1424           if (net->routes)
1425             {
1426               rte **bp = &net->routes;
1427               for (k=&(*bp)->next; *k; k=&(*k)->next)
1428                 if (rte_better(*k, *bp))
1429                   bp = k;
1430
1431               /* And relink it */
1432               rte *best = *bp;
1433               *bp = best->next;
1434               best->next = net->routes;
1435               net->routes = best;
1436             }
1437         }
1438       else if (new)
1439         {
1440           /* The third case - the new route is not better than the old
1441              best route (therefore old_best != NULL) and the old best
1442              route was not removed (therefore old_best == net->routes).
1443              We just link the new route to the old/last position. */
1444
1445           new->next = *pos;
1446           *pos = new;
1447
1448           table->rt_count++;
1449         }
1450       /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1451     }
1452
1453   if (new)
1454     {
1455       new->lastmod = current_time();
1456
1457       if (!old)
1458         {
1459           new->id = hmap_first_zero(&table->id_map);
1460           hmap_set(&table->id_map, new->id);
1461         }
1462       else
1463         new->id = old->id;
1464     }
1465
1466   /* Log the route change */
1467   if ((c->debug & D_ROUTES) || (p->debug & D_ROUTES))
1468     {
1469       if (new_ok)
1470         rte_trace(c, new, '>', new == net->routes ? "added [best]" : "added");
1471       else if (old_ok)
1472         {
1473           if (old != old_best)
1474             rte_trace(c, old, '>', "removed");
1475           else if (rte_is_ok(net->routes))
1476             rte_trace(c, old, '>', "removed [replaced]");
1477           else
1478             rte_trace(c, old, '>', "removed [sole]");
1479         }
1480     }
1481
1482   /* Propagate the route change */
1483   rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best);
1484
1485   if (!net->routes &&
1486       (table->gc_counter++ >= table->config->gc_threshold))
1487     rt_kick_prune_timer(table);
1488
1489   if (old_ok && p->rte_remove)
1490     p->rte_remove(net, old);
1491   if (new_ok && p->rte_insert)
1492     p->rte_insert(net, new);
1493
1494   if (old)
1495     {
1496       if (!new)
1497         hmap_clear(&table->id_map, old->id);
1498
1499       rte_free_quick(old);
1500     }
1501 }
1502
1503 static int rte_update_nest_cnt;         /* Nesting counter to allow recursive updates */
1504
1505 static inline void
1506 rte_update_lock(void)
1507 {
1508   rte_update_nest_cnt++;
1509 }
1510
1511 static inline void
1512 rte_update_unlock(void)
1513 {
1514   if (!--rte_update_nest_cnt)
1515     lp_flush(rte_update_pool);
1516 }
1517
1518 /**
1519  * rte_update - enter a new update to a routing table
1520  * @table: table to be updated
1521  * @c: channel doing the update
1522  * @net: network node
1523  * @p: protocol submitting the update
1524  * @src: protocol originating the update
1525  * @new: a &rte representing the new route or %NULL for route removal.
1526  *
1527  * This function is called by the routing protocols whenever they discover
1528  * a new route or wish to update/remove an existing route. The right announcement
1529  * sequence is to build route attributes first (either un-cached with @aflags set
1530  * to zero or a cached one using rta_lookup(); in this case please note that
1531  * you need to increase the use count of the attributes yourself by calling
1532  * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1533  * the appropriate data and finally submit the new &rte by calling rte_update().
1534  *
1535  * @src specifies the protocol that originally created the route and the meaning
1536  * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1537  * same value as @new->attrs->proto. @p specifies the protocol that called
1538  * rte_update(). In most cases it is the same protocol as @src. rte_update()
1539  * stores @p in @new->sender;
1540  *
1541  * When rte_update() gets any route, it automatically validates it (checks,
1542  * whether the network and next hop address are valid IP addresses and also
1543  * whether a normal routing protocol doesn't try to smuggle a host or link
1544  * scope route to the table), converts all protocol dependent attributes stored
1545  * in the &rte to temporary extended attributes, consults import filters of the
1546  * protocol to see if the route should be accepted and/or its attributes modified,
1547  * stores the temporary attributes back to the &rte.
1548  *
1549  * Now, having a "public" version of the route, we
1550  * automatically find any old route defined by the protocol @src
1551  * for network @n, replace it by the new one (or removing it if @new is %NULL),
1552  * recalculate the optimal route for this destination and finally broadcast
1553  * the change (if any) to all routing protocols by calling rte_announce().
1554  *
1555  * All memory used for attribute lists and other temporary allocations is taken
1556  * from a special linear pool @rte_update_pool and freed when rte_update()
1557  * finishes.
1558  */
1559
1560 void
1561 rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1562 {
1563   struct proto *p = c->proto;
1564   struct proto_stats *stats = &c->stats;
1565   const struct filter *filter = c->in_filter;
1566   struct mpls_fec *fec = NULL;
1567   net *nn;
1568
1569   ASSERT(c->channel_state == CS_UP);
1570
1571   rte_update_lock();
1572   if (new)
1573     {
1574       /* Create a temporary table node */
1575       nn = alloca(sizeof(net) + n->length);
1576       memset(nn, 0, sizeof(net) + n->length);
1577       net_copy(nn->n.addr, n);
1578
1579       new->net = nn;
1580       new->sender = c;
1581
1582       stats->imp_updates_received++;
1583       if (!rte_validate(new))
1584         {
1585           rte_trace_in(D_FILTERS, c, new, "invalid");
1586           stats->imp_updates_invalid++;
1587           goto drop;
1588         }
1589
1590       if (filter == FILTER_REJECT)
1591         {
1592           stats->imp_updates_filtered++;
1593           rte_trace_in(D_FILTERS, c, new, "filtered out");
1594
1595           if (! c->in_keep_filtered)
1596             goto drop;
1597
1598           /* new is a private copy, i could modify it */
1599           new->flags |= REF_FILTERED;
1600         }
1601       else if (filter)
1602         {
1603           int fr = f_run(filter, &new, rte_update_pool, 0);
1604           if (fr > F_ACCEPT)
1605           {
1606             stats->imp_updates_filtered++;
1607             rte_trace_in(D_FILTERS, c, new, "filtered out");
1608
1609             if (! c->in_keep_filtered)
1610               goto drop;
1611
1612             new->flags |= REF_FILTERED;
1613           }
1614         }
1615
1616       if (p->mpls_map)
1617         {
1618           if (mpls_handle_rte(p->mpls_map, n, new, rte_update_pool, &fec) < 0)
1619             {
1620               rte_trace_in(D_FILTERS, c, new, "invalid");
1621               stats->imp_updates_invalid++;
1622               goto drop;
1623             }
1624         }
1625
1626       if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1627         new->attrs = rta_lookup(new->attrs);
1628       new->flags |= REF_COW;
1629
1630       /* Use the actual struct network, not the dummy one */
1631       nn = net_get(c->table, n);
1632       new->net = nn;
1633     }
1634   else
1635     {
1636       stats->imp_withdraws_received++;
1637
1638       if (!(nn = net_find(c->table, n)) || !src)
1639         {
1640           stats->imp_withdraws_ignored++;
1641           rte_update_unlock();
1642           return;
1643         }
1644     }
1645
1646  recalc:
1647   /* And recalculate the best route */
1648   rte_recalculate(c, nn, new, src);
1649
1650   if (p->mpls_map)
1651     mpls_handle_rte_cleanup(p->mpls_map, &fec);
1652
1653   rte_update_unlock();
1654   return;
1655
1656  drop:
1657   rte_free(new);
1658   new = NULL;
1659   if (nn = net_find(c->table, n))
1660     goto recalc;
1661
1662   rte_update_unlock();
1663 }
1664
1665 /* Independent call to rte_announce(), used from next hop
1666    recalculation, outside of rte_update(). new must be non-NULL */
1667 static inline void
1668 rte_announce_i(rtable *tab, uint type, net *net, rte *new, rte *old,
1669                rte *new_best, rte *old_best)
1670 {
1671   rte_update_lock();
1672   rte_announce(tab, type, net, new, old, new_best, old_best);
1673   rte_update_unlock();
1674 }
1675
1676 static inline void
1677 rte_discard(rte *old)   /* Non-filtered route deletion, used during garbage collection */
1678 {
1679   rte_update_lock();
1680   rte_recalculate(old->sender, old->net, NULL, old->src);
1681   rte_update_unlock();
1682 }
1683
1684 /* Modify existing route by protocol hook, used for long-lived graceful restart */
1685 static inline void
1686 rte_modify(rte *old)
1687 {
1688   rte_update_lock();
1689
1690   rte *new = old->sender->proto->rte_modify(old, rte_update_pool);
1691   if (new != old)
1692   {
1693     if (new)
1694     {
1695       if (!rta_is_cached(new->attrs))
1696         new->attrs = rta_lookup(new->attrs);
1697       new->flags = (old->flags & ~REF_MODIFY) | REF_COW;
1698     }
1699
1700     rte_recalculate(old->sender, old->net, new, old->src);
1701   }
1702
1703   rte_update_unlock();
1704 }
1705
1706 /* Check rtable for best route to given net whether it would be exported do p */
1707 int
1708 rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter)
1709 {
1710   struct proto *p = c->proto;
1711   net *n = net_find(t, a);
1712   rte *rt = n ? n->routes : NULL;
1713
1714   if (!rte_is_valid(rt))
1715     return 0;
1716
1717   rte_update_lock();
1718
1719   /* Rest is stripped down export_filter() */
1720   int v = p->preexport ? p->preexport(c, rt) : 0;
1721   if (v == RIC_PROCESS)
1722     v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT);
1723
1724   /* Discard temporary rte */
1725   if (rt != n->routes)
1726     rte_free(rt);
1727
1728   rte_update_unlock();
1729
1730   return v > 0;
1731 }
1732
1733
1734 /**
1735  * rt_refresh_begin - start a refresh cycle
1736  * @t: related routing table
1737  * @c related channel
1738  *
1739  * This function starts a refresh cycle for given routing table and announce
1740  * hook. The refresh cycle is a sequence where the protocol sends all its valid
1741  * routes to the routing table (by rte_update()). After that, all protocol
1742  * routes (more precisely routes with @c as @sender) not sent during the
1743  * refresh cycle but still in the table from the past are pruned. This is
1744  * implemented by marking all related routes as stale by REF_STALE flag in
1745  * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1746  * flag in rt_refresh_end() and then removing such routes in the prune loop.
1747  */
1748 void
1749 rt_refresh_begin(rtable *t, struct channel *c)
1750 {
1751   if (c->debug & D_EVENTS)
1752     log(L_TRACE "%s.%s: Route refresh begin", c->proto->name, c->name);
1753
1754   FIB_WALK(&t->fib, net, n)
1755     {
1756       rte *e;
1757       for (e = n->routes; e; e = e->next)
1758         if (e->sender == c)
1759           e->flags |= REF_STALE;
1760     }
1761   FIB_WALK_END;
1762 }
1763
1764 /**
1765  * rt_refresh_end - end a refresh cycle
1766  * @t: related routing table
1767  * @c: related channel
1768  *
1769  * This function ends a refresh cycle for given routing table and announce
1770  * hook. See rt_refresh_begin() for description of refresh cycles.
1771  */
1772 void
1773 rt_refresh_end(rtable *t, struct channel *c)
1774 {
1775   if (c->debug & D_EVENTS)
1776     log(L_TRACE "%s.%s: Route refresh end", c->proto->name, c->name);
1777
1778   int prune = 0;
1779
1780   FIB_WALK(&t->fib, net, n)
1781     {
1782       rte *e;
1783       for (e = n->routes; e; e = e->next)
1784         if ((e->sender == c) && (e->flags & REF_STALE))
1785           {
1786             e->flags |= REF_DISCARD;
1787             prune = 1;
1788           }
1789     }
1790   FIB_WALK_END;
1791
1792   if (prune)
1793     rt_schedule_prune(t);
1794 }
1795
1796 void
1797 rt_modify_stale(rtable *t, struct channel *c)
1798 {
1799   int prune = 0;
1800
1801   FIB_WALK(&t->fib, net, n)
1802     {
1803       rte *e;
1804       for (e = n->routes; e; e = e->next)
1805         if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED))
1806           {
1807             e->flags |= REF_MODIFY;
1808             prune = 1;
1809           }
1810     }
1811   FIB_WALK_END;
1812
1813   if (prune)
1814     rt_schedule_prune(t);
1815 }
1816
1817 /**
1818  * rte_dump - dump a route
1819  * @e: &rte to be dumped
1820  *
1821  * This functions dumps contents of a &rte to debug output.
1822  */
1823 void
1824 rte_dump(rte *e)
1825 {
1826   net *n = e->net;
1827   debug("%-1N ", n->n.addr);
1828   debug("PF=%02x ", e->pflags);
1829   rta_dump(e->attrs);
1830   debug("\n");
1831 }
1832
1833 /**
1834  * rt_dump - dump a routing table
1835  * @t: routing table to be dumped
1836  *
1837  * This function dumps contents of a given routing table to debug output.
1838  */
1839 void
1840 rt_dump(rtable *t)
1841 {
1842   debug("Dump of routing table <%s>\n", t->name);
1843 #ifdef DEBUGGING
1844   fib_check(&t->fib);
1845 #endif
1846   FIB_WALK(&t->fib, net, n)
1847     {
1848       rte *e;
1849       for(e=n->routes; e; e=e->next)
1850         rte_dump(e);
1851     }
1852   FIB_WALK_END;
1853   debug("\n");
1854 }
1855
1856 /**
1857  * rt_dump_all - dump all routing tables
1858  *
1859  * This function dumps contents of all routing tables to debug output.
1860  */
1861 void
1862 rt_dump_all(void)
1863 {
1864   rtable *t;
1865   node *n;
1866
1867   WALK_LIST2(t, n, routing_tables, n)
1868     rt_dump(t);
1869 }
1870
1871 static inline void
1872 rt_schedule_hcu(rtable *tab)
1873 {
1874   if (tab->hcu_scheduled)
1875     return;
1876
1877   tab->hcu_scheduled = 1;
1878   ev_schedule(tab->rt_event);
1879 }
1880
1881 static inline void
1882 rt_schedule_nhu(rtable *tab)
1883 {
1884   if (tab->nhu_state == NHU_CLEAN)
1885     ev_schedule(tab->rt_event);
1886
1887   /* state change:
1888    *   NHU_CLEAN   -> NHU_SCHEDULED
1889    *   NHU_RUNNING -> NHU_DIRTY
1890    */
1891   tab->nhu_state |= NHU_SCHEDULED;
1892 }
1893
1894 void
1895 rt_schedule_prune(rtable *tab)
1896 {
1897   if (tab->prune_state == 0)
1898     ev_schedule(tab->rt_event);
1899
1900   /* state change 0->1, 2->3 */
1901   tab->prune_state |= 1;
1902 }
1903
1904
1905 static void
1906 rt_event(void *ptr)
1907 {
1908   rtable *tab = ptr;
1909
1910   rt_lock_table(tab);
1911
1912   if (tab->hcu_scheduled)
1913     rt_update_hostcache(tab);
1914
1915   if (tab->nhu_state)
1916     rt_next_hop_update(tab);
1917
1918   if (tab->prune_state)
1919     rt_prune_table(tab);
1920
1921   rt_unlock_table(tab);
1922 }
1923
1924
1925 static void
1926 rt_prune_timer(timer *t)
1927 {
1928   rtable *tab = t->data;
1929
1930   if (tab->gc_counter >= tab->config->gc_threshold)
1931     rt_schedule_prune(tab);
1932 }
1933
1934 static void
1935 rt_kick_prune_timer(rtable *tab)
1936 {
1937   /* Return if prune is already scheduled */
1938   if (tm_active(tab->prune_timer) || (tab->prune_state & 1))
1939     return;
1940
1941   /* Randomize GC period to +/- 50% */
1942   btime gc_period = tab->config->gc_period;
1943   gc_period = (gc_period / 2) + (random_u32() % (uint) gc_period);
1944   tm_start(tab->prune_timer, gc_period);
1945 }
1946
1947
1948 static inline btime
1949 rt_settled_time(rtable *tab)
1950 {
1951   ASSUME(tab->base_settle_time != 0);
1952
1953   return MIN(tab->last_rt_change + tab->config->min_settle_time,
1954              tab->base_settle_time + tab->config->max_settle_time);
1955 }
1956
1957 static void
1958 rt_settle_timer(timer *t)
1959 {
1960   rtable *tab = t->data;
1961
1962   if (!tab->base_settle_time)
1963     return;
1964
1965   btime settled_time = rt_settled_time(tab);
1966   if (current_time() < settled_time)
1967   {
1968     tm_set(tab->settle_timer, settled_time);
1969     return;
1970   }
1971
1972   /* Settled */
1973   tab->base_settle_time = 0;
1974
1975   struct rt_subscription *s;
1976   WALK_LIST(s, tab->subscribers)
1977     s->hook(s);
1978 }
1979
1980 static void
1981 rt_kick_settle_timer(rtable *tab)
1982 {
1983   tab->base_settle_time = current_time();
1984
1985   if (!tab->settle_timer)
1986     tab->settle_timer = tm_new_init(tab->rp, rt_settle_timer, tab, 0, 0);
1987
1988   if (!tm_active(tab->settle_timer))
1989     tm_set(tab->settle_timer, rt_settled_time(tab));
1990 }
1991
1992 static inline void
1993 rt_schedule_notify(rtable *tab)
1994 {
1995   if (EMPTY_LIST(tab->subscribers))
1996     return;
1997
1998   if (tab->base_settle_time)
1999     return;
2000
2001   rt_kick_settle_timer(tab);
2002 }
2003
2004 void
2005 rt_subscribe(rtable *tab, struct rt_subscription *s)
2006 {
2007   s->tab = tab;
2008   rt_lock_table(tab);
2009   add_tail(&tab->subscribers, &s->n);
2010 }
2011
2012 void
2013 rt_unsubscribe(struct rt_subscription *s)
2014 {
2015   rem_node(&s->n);
2016   rt_unlock_table(s->tab);
2017 }
2018
2019 static struct rt_flowspec_link *
2020 rt_flowspec_find_link(rtable *src, rtable *dst)
2021 {
2022   struct rt_flowspec_link *ln;
2023   WALK_LIST(ln, src->flowspec_links)
2024     if ((ln->src == src) && (ln->dst == dst))
2025       return ln;
2026
2027   return NULL;
2028 }
2029
2030 void
2031 rt_flowspec_link(rtable *src, rtable *dst)
2032 {
2033   ASSERT(rt_is_ip(src));
2034   ASSERT(rt_is_flow(dst));
2035
2036   struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst);
2037
2038   if (!ln)
2039   {
2040     rt_lock_table(src);
2041     rt_lock_table(dst);
2042
2043     ln = mb_allocz(src->rp, sizeof(struct rt_flowspec_link));
2044     ln->src = src;
2045     ln->dst = dst;
2046     add_tail(&src->flowspec_links, &ln->n);
2047   }
2048
2049   ln->uc++;
2050 }
2051
2052 void
2053 rt_flowspec_unlink(rtable *src, rtable *dst)
2054 {
2055   struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst);
2056
2057   ASSERT(ln && (ln->uc > 0));
2058
2059   ln->uc--;
2060
2061   if (!ln->uc)
2062   {
2063     rem_node(&ln->n);
2064     mb_free(ln);
2065
2066     rt_unlock_table(src);
2067     rt_unlock_table(dst);
2068   }
2069 }
2070
2071 static void
2072 rt_flowspec_notify(rtable *src, net *net)
2073 {
2074   /* Only IP tables are src links */
2075   ASSERT(rt_is_ip(src));
2076
2077   struct rt_flowspec_link *ln;
2078   WALK_LIST(ln, src->flowspec_links)
2079   {
2080     rtable *dst = ln->dst;
2081     ASSERT(rt_is_flow(dst));
2082
2083     /* No need to inspect it further if recalculation is already active */
2084     if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY))
2085       continue;
2086
2087     if (trie_match_net(dst->flowspec_trie, net->n.addr))
2088       rt_schedule_nhu(dst);
2089   }
2090 }
2091
2092 static void
2093 rt_flowspec_reset_trie(rtable *tab)
2094 {
2095   linpool *lp = tab->flowspec_trie->lp;
2096   int ipv4 = tab->flowspec_trie->ipv4;
2097
2098   lp_flush(lp);
2099   tab->flowspec_trie = f_new_trie(lp, 0);
2100   tab->flowspec_trie->ipv4 = ipv4;
2101 }
2102
2103 static void
2104 rt_free(resource *_r)
2105 {
2106   rtable *r = (rtable *) _r;
2107
2108   DBG("Deleting routing table %s\n", r->name);
2109   ASSERT_DIE(r->use_count == 0);
2110
2111   if (r->internal)
2112     return;
2113
2114   r->config->table = NULL;
2115   rem_node(&r->n);
2116
2117   if (r->hostcache)
2118     rt_free_hostcache(r);
2119
2120   /* Freed automagically by the resource pool
2121   fib_free(&r->fib);
2122   hmap_free(&r->id_map);
2123   rfree(r->rt_event);
2124   rfree(r->settle_timer);
2125   mb_free(r);
2126   */
2127 }
2128
2129 static void
2130 rt_res_dump(resource *_r)
2131 {
2132   rtable *r = (rtable *) _r;
2133   debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n",
2134       r->name, net_label[r->addr_type], r->rt_count, r->use_count);
2135 }
2136
2137 static struct resclass rt_class = {
2138   .name = "Routing table",
2139   .size = sizeof(struct rtable),
2140   .free = rt_free,
2141   .dump = rt_res_dump,
2142   .lookup = NULL,
2143   .memsize = NULL,
2144 };
2145
2146 rtable *
2147 rt_setup(pool *pp, struct rtable_config *cf)
2148 {
2149   pool *p = rp_newf(pp, "Routing table %s", cf->name);
2150
2151   rtable *t = ralloc(p, &rt_class);
2152   t->rp = p;
2153
2154   t->name = cf->name;
2155   t->config = cf;
2156   t->addr_type = cf->addr_type;
2157
2158   fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
2159
2160   if (cf->trie_used)
2161   {
2162     t->trie = f_new_trie(lp_new_default(p), 0);
2163     t->trie->ipv4 = net_val_match(t->addr_type, NB_IP4 | NB_VPN4 | NB_ROA4);
2164
2165     t->fib.init = net_init_with_trie;
2166   }
2167
2168   init_list(&t->channels);
2169   init_list(&t->flowspec_links);
2170   init_list(&t->subscribers);
2171
2172   hmap_init(&t->id_map, p, 1024);
2173   hmap_set(&t->id_map, 0);
2174
2175   if (!(t->internal = cf->internal))
2176   {
2177     t->rt_event = ev_new_init(p, rt_event, t);
2178     t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0);
2179     t->last_rt_change = t->gc_time = current_time();
2180
2181     if (rt_is_flow(t))
2182     {
2183       t->flowspec_trie = f_new_trie(lp_new_default(p), 0);
2184       t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4);
2185     }
2186   }
2187
2188   return t;
2189 }
2190
2191 /**
2192  * rt_init - initialize routing tables
2193  *
2194  * This function is called during BIRD startup. It initializes the
2195  * routing table module.
2196  */
2197 void
2198 rt_init(void)
2199 {
2200   rta_init();
2201   rt_table_pool = rp_new(&root_pool, "Routing tables");
2202   rte_update_pool = lp_new_default(rt_table_pool);
2203   rte_slab = sl_new(rt_table_pool, sizeof(rte));
2204   init_list(&routing_tables);
2205 }
2206
2207
2208 /**
2209  * rt_prune_table - prune a routing table
2210  *
2211  * The prune loop scans routing tables and removes routes belonging to flushing
2212  * protocols, discarded routes and also stale network entries. It is called from
2213  * rt_event(). The event is rescheduled if the current iteration do not finish
2214  * the table. The pruning is directed by the prune state (@prune_state),
2215  * specifying whether the prune cycle is scheduled or running, and there
2216  * is also a persistent pruning iterator (@prune_fit).
2217  *
2218  * The prune loop is used also for channel flushing. For this purpose, the
2219  * channels to flush are marked before the iteration and notified after the
2220  * iteration.
2221  */
2222 static void
2223 rt_prune_table(rtable *tab)
2224 {
2225   struct fib_iterator *fit = &tab->prune_fit;
2226   int limit = 2000;
2227
2228   struct channel *c;
2229   node *n, *x;
2230
2231   DBG("Pruning route table %s\n", tab->name);
2232 #ifdef DEBUGGING
2233   fib_check(&tab->fib);
2234 #endif
2235
2236   if (tab->prune_state == 0)
2237     return;
2238
2239   if (tab->prune_state == 1)
2240   {
2241     /* Mark channels to flush */
2242     WALK_LIST2(c, n, tab->channels, table_node)
2243       if (c->channel_state == CS_FLUSHING)
2244         c->flush_active = 1;
2245
2246     FIB_ITERATE_INIT(fit, &tab->fib);
2247     tab->prune_state = 2;
2248
2249     tab->gc_counter = 0;
2250     tab->gc_time = current_time();
2251
2252     if (tab->prune_trie)
2253     {
2254       /* Init prefix trie pruning */
2255       tab->trie_new = f_new_trie(lp_new_default(tab->rp), 0);
2256       tab->trie_new->ipv4 = tab->trie->ipv4;
2257     }
2258   }
2259
2260 again:
2261   FIB_ITERATE_START(&tab->fib, fit, net, n)
2262     {
2263       rte *e;
2264
2265     rescan:
2266       if (limit <= 0)
2267       {
2268         FIB_ITERATE_PUT(fit);
2269         ev_schedule(tab->rt_event);
2270         return;
2271       }
2272
2273       for (e=n->routes; e; e=e->next)
2274       {
2275         if (e->sender->flush_active || (e->flags & REF_DISCARD))
2276           {
2277             rte_discard(e);
2278             limit--;
2279
2280             goto rescan;
2281           }
2282
2283         if (e->flags & REF_MODIFY)
2284           {
2285             rte_modify(e);
2286             limit--;
2287
2288             goto rescan;
2289           }
2290       }
2291
2292       if (!n->routes)           /* Orphaned FIB entry */
2293         {
2294           FIB_ITERATE_PUT(fit);
2295           fib_delete(&tab->fib, n);
2296           goto again;
2297         }
2298
2299       if (tab->trie_new)
2300       {
2301         trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen);
2302         limit--;
2303       }
2304     }
2305   FIB_ITERATE_END;
2306
2307 #ifdef DEBUGGING
2308   fib_check(&tab->fib);
2309 #endif
2310
2311   /* state change 2->0, 3->1 */
2312   tab->prune_state &= 1;
2313
2314   if (tab->trie_new)
2315   {
2316     /* Finish prefix trie pruning */
2317
2318     if (!tab->trie_lock_count)
2319     {
2320       rfree(tab->trie->lp);
2321     }
2322     else
2323     {
2324       ASSERT(!tab->trie_old);
2325       tab->trie_old = tab->trie;
2326       tab->trie_old_lock_count = tab->trie_lock_count;
2327       tab->trie_lock_count = 0;
2328     }
2329
2330     tab->trie = tab->trie_new;
2331     tab->trie_new = NULL;
2332     tab->prune_trie = 0;
2333   }
2334   else
2335   {
2336     /* Schedule prefix trie pruning */
2337     if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries)))
2338     {
2339       /* state change 0->1, 2->3 */
2340       tab->prune_state |= 1;
2341       tab->prune_trie = 1;
2342     }
2343   }
2344
2345   if (tab->prune_state > 0)
2346     ev_schedule(tab->rt_event);
2347
2348   /* FIXME: This should be handled in a better way */
2349   rt_prune_sources();
2350
2351   /* Close flushed channels */
2352   WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
2353     if (c->flush_active)
2354       {
2355         c->flush_active = 0;
2356         channel_set_state(c, CS_DOWN);
2357       }
2358
2359   return;
2360 }
2361
2362 /**
2363  * rt_lock_trie - lock a prefix trie of a routing table
2364  * @tab: routing table with prefix trie to be locked
2365  *
2366  * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state
2367  * structures. Therefore, asynchronous walks should lock the prefix trie using
2368  * this function. That allows the prune loop to rebuild the trie, but postpones
2369  * its freeing until all walks are done (unlocked by rt_unlock_trie()).
2370  *
2371  * Return a current trie that will be locked, the value should be passed back to
2372  * rt_unlock_trie() for unlocking.
2373  *
2374  */
2375 struct f_trie *
2376 rt_lock_trie(rtable *tab)
2377 {
2378   ASSERT(tab->trie);
2379
2380   tab->trie_lock_count++;
2381   return tab->trie;
2382 }
2383
2384 /**
2385  * rt_unlock_trie - unlock a prefix trie of a routing table
2386  * @tab: routing table with prefix trie to be locked
2387  * @trie: value returned by matching rt_lock_trie()
2388  *
2389  * Done for trie locked by rt_lock_trie() after walk over the trie is done.
2390  * It may free the trie and schedule next trie pruning.
2391  */
2392 void
2393 rt_unlock_trie(rtable *tab, struct f_trie *trie)
2394 {
2395   ASSERT(trie);
2396
2397   if (trie == tab->trie)
2398   {
2399     /* Unlock the current prefix trie */
2400     ASSERT(tab->trie_lock_count);
2401     tab->trie_lock_count--;
2402   }
2403   else if (trie == tab->trie_old)
2404   {
2405     /* Unlock the old prefix trie */
2406     ASSERT(tab->trie_old_lock_count);
2407     tab->trie_old_lock_count--;
2408
2409     /* Free old prefix trie that is no longer needed */
2410     if (!tab->trie_old_lock_count)
2411     {
2412       rfree(tab->trie_old->lp);
2413       tab->trie_old = NULL;
2414
2415       /* Kick prefix trie pruning that was postponed */
2416       if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries)))
2417       {
2418         tab->prune_trie = 1;
2419         rt_schedule_prune(tab);
2420       }
2421     }
2422   }
2423   else
2424     log(L_BUG "Invalid arg to rt_unlock_trie()");
2425 }
2426
2427
2428 void
2429 rt_preconfig(struct config *c)
2430 {
2431   init_list(&c->tables);
2432
2433   rt_new_table(cf_get_symbol(c, "master4"), NET_IP4);
2434   rt_new_table(cf_get_symbol(c, "master6"), NET_IP6);
2435 }
2436
2437 void
2438 rt_postconfig(struct config *c)
2439 {
2440   uint num_tables = list_length(&c->tables);
2441   btime def_gc_period = 400 MS * num_tables;
2442   def_gc_period = MAX(def_gc_period, 10 S);
2443   def_gc_period = MIN(def_gc_period, 600 S);
2444
2445   struct rtable_config *rc;
2446   WALK_LIST(rc, c->tables)
2447     if (rc->gc_period == (uint) -1)
2448       rc->gc_period = (uint) def_gc_period;
2449 }
2450
2451
2452 /*
2453  * Some functions for handing internal next hop updates
2454  * triggered by rt_schedule_nhu().
2455  */
2456
2457 void
2458 rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
2459 {
2460   a->hostentry = he;
2461   a->dest = he->dest;
2462   a->igp_metric = he->igp_metric;
2463
2464   if (a->dest != RTD_UNICAST)
2465   {
2466     /* No nexthop */
2467 no_nexthop:
2468     a->nh = (struct nexthop) {};
2469     if (mls)
2470     { /* Store the label stack for later changes */
2471       a->nh.labels_orig = a->nh.labels = mls->len;
2472       memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
2473     }
2474     return;
2475   }
2476
2477   if (((!mls) || (!mls->len)) && he->nexthop_linkable)
2478   { /* Just link the nexthop chain, no label append happens. */
2479     memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
2480     return;
2481   }
2482
2483   struct nexthop *nhp = NULL, *nhr = NULL;
2484   int skip_nexthop = 0;
2485
2486   for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
2487   {
2488     if (skip_nexthop)
2489       skip_nexthop--;
2490     else
2491     {
2492       nhr = nhp;
2493       nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
2494     }
2495
2496     memset(nhp, 0, NEXTHOP_MAX_SIZE);
2497     nhp->iface = nh->iface;
2498     nhp->weight = nh->weight;
2499
2500     if (mls)
2501     {
2502       nhp->labels = nh->labels + mls->len;
2503       nhp->labels_orig = mls->len;
2504       if (nhp->labels <= MPLS_MAX_LABEL_STACK)
2505       {
2506         memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
2507         memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
2508       }
2509       else
2510       {
2511         log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
2512             nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
2513         skip_nexthop++;
2514         continue;
2515       }
2516     }
2517     else if (nh->labels)
2518     {
2519       nhp->labels = nh->labels;
2520       nhp->labels_orig = 0;
2521       memcpy(nhp->label, nh->label, nh->labels * sizeof(u32));
2522     }
2523
2524     if (ipa_nonzero(nh->gw))
2525     {
2526       nhp->gw = nh->gw;                 /* Router nexthop */
2527       nhp->flags |= (nh->flags & RNF_ONLINK);
2528     }
2529     else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK))
2530       nhp->gw = IPA_NONE;               /* PtP link - no need for nexthop */
2531     else if (ipa_nonzero(he->link))
2532       nhp->gw = he->link;               /* Device nexthop with link-local address known */
2533     else
2534       nhp->gw = he->addr;               /* Device nexthop with link-local address unknown */
2535   }
2536
2537   if (skip_nexthop)
2538     if (nhr)
2539       nhr->next = NULL;
2540     else
2541     {
2542       a->dest = RTD_UNREACHABLE;
2543       log(L_WARN "No valid nexthop remaining, setting route unreachable");
2544       goto no_nexthop;
2545     }
2546 }
2547
2548 static inline int
2549 rta_next_hop_outdated(rta *a)
2550 {
2551   struct hostentry *he = a->hostentry;
2552
2553   if (!he)
2554     return 0;
2555
2556   if (!he->src)
2557     return a->dest != RTD_UNREACHABLE;
2558
2559   return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
2560     (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
2561 }
2562
2563 static inline rte *
2564 rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
2565 {
2566   if (!rta_next_hop_outdated(old->attrs))
2567     return NULL;
2568
2569   rta *a = alloca(RTA_MAX_SIZE);
2570   memcpy(a, old->attrs, rta_size(old->attrs));
2571
2572   mpls_label_stack mls = { .len = a->nh.labels_orig };
2573   memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
2574
2575   rta_apply_hostentry(a, old->attrs->hostentry, &mls);
2576   a->cached = 0;
2577
2578   rte *e = sl_alloc(rte_slab);
2579   memcpy(e, old, sizeof(rte));
2580   e->attrs = rta_lookup(a);
2581   rt_lock_source(e->src);
2582
2583   return e;
2584 }
2585
2586
2587 #ifdef CONFIG_BGP
2588
2589 static inline int
2590 net_flow_has_dst_prefix(const net_addr *n)
2591 {
2592   ASSUME(net_is_flow(n));
2593
2594   if (n->pxlen)
2595     return 1;
2596
2597   if (n->type == NET_FLOW4)
2598   {
2599     const net_addr_flow4 *n4 = (void *) n;
2600     return (n4->length > sizeof(net_addr_flow4)) && (n4->data[0] == FLOW_TYPE_DST_PREFIX);
2601   }
2602   else
2603   {
2604     const net_addr_flow6 *n6 = (void *) n;
2605     return (n6->length > sizeof(net_addr_flow6)) && (n6->data[0] == FLOW_TYPE_DST_PREFIX);
2606   }
2607 }
2608
2609 static inline int
2610 rta_as_path_is_empty(rta *a)
2611 {
2612   eattr *e = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2613   return !e || (as_path_getlen(e->u.ptr) == 0);
2614 }
2615
2616 static inline u32
2617 rta_get_first_asn(rta *a)
2618 {
2619   eattr *e = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2620   u32 asn;
2621
2622   return (e && as_path_get_first_regular(e->u.ptr, &asn)) ? asn : 0;
2623 }
2624
2625 int
2626 rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior)
2627 {
2628   ASSERT(rt_is_ip(tab_ip));
2629   ASSERT(rt_is_flow(tab_flow));
2630   ASSERT(tab_ip->trie);
2631
2632   /* RFC 8955 6. a) Flowspec has defined dst prefix */
2633   if (!net_flow_has_dst_prefix(n))
2634     return 0;
2635
2636   /* RFC 9117 4.1. Accept  AS_PATH is empty (fr */
2637   if (interior && rta_as_path_is_empty(a))
2638     return 1;
2639
2640
2641   /* RFC 8955 6. b) Flowspec and its best-match route have the same originator */
2642
2643   /* Find flowspec dst prefix */
2644   net_addr dst;
2645   if (n->type == NET_FLOW4)
2646     net_fill_ip4(&dst, net4_prefix(n), net4_pxlen(n));
2647   else
2648     net_fill_ip6(&dst, net6_prefix(n), net6_pxlen(n));
2649
2650   /* Find best-match BGP unicast route for flowspec dst prefix */
2651   net *nb = net_route(tab_ip, &dst);
2652   rte *rb = nb ? nb->routes : NULL;
2653
2654   /* Register prefix to trie for tracking further changes */
2655   int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH;
2656   trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen);
2657
2658   /* No best-match BGP route -> no flowspec */
2659   if (!rb || (rb->attrs->source != RTS_BGP))
2660     return 0;
2661
2662   /* Find ORIGINATOR_ID values */
2663   u32 orig_a = ea_get_int(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID), 0);
2664   u32 orig_b = ea_get_int(rb->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID), 0);
2665
2666   /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */
2667   if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal(a->from, rb->attrs->from)))
2668     return 0;
2669
2670
2671   /* Find ASN of the best-match route, for use in next checks */
2672   u32 asn_b = rta_get_first_asn(rb->attrs);
2673   if (!asn_b)
2674     return 0;
2675
2676   /* RFC 9117 4.2. For EBGP, flowspec and its best-match route are from the same AS */
2677   if (!interior && (rta_get_first_asn(a) != asn_b))
2678     return 0;
2679
2680   /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */
2681   TRIE_WALK(tab_ip->trie, subnet, &dst)
2682   {
2683     net *nc = net_find_valid(tab_ip, &subnet);
2684     if (!nc)
2685       continue;
2686
2687     rte *rc = nc->routes;
2688     if (rc->attrs->source != RTS_BGP)
2689       return 0;
2690
2691     if (rta_get_first_asn(rc->attrs) != asn_b)
2692       return 0;
2693   }
2694   TRIE_WALK_END;
2695
2696   return 1;
2697 }
2698
2699 #endif /* CONFIG_BGP */
2700
2701 static rte *
2702 rt_flowspec_update_rte(rtable *tab, rte *r)
2703 {
2704 #ifdef CONFIG_BGP
2705   if ((r->attrs->source != RTS_BGP) || (r->sender->proto != r->src->proto))
2706     return NULL;
2707
2708   struct bgp_channel *bc = (struct bgp_channel *) r->sender;
2709   if (!bc->base_table)
2710     return NULL;
2711
2712   const net_addr *n = r->net->n.addr;
2713   struct bgp_proto *p = (void *) r->src->proto;
2714   int valid = rt_flowspec_check(bc->base_table, tab, n, r->attrs, p->is_interior);
2715   int dest = valid ? RTD_NONE : RTD_UNREACHABLE;
2716
2717   if (dest == r->attrs->dest)
2718     return NULL;
2719
2720   rta *a = alloca(RTA_MAX_SIZE);
2721   memcpy(a, r->attrs, rta_size(r->attrs));
2722   a->dest = dest;
2723   a->cached = 0;
2724
2725   rte *new = sl_alloc(rte_slab);
2726   memcpy(new, r, sizeof(rte));
2727   new->attrs = rta_lookup(a);
2728   rt_lock_source(new->src);
2729
2730   return new;
2731 #else
2732   return NULL;
2733 #endif
2734 }
2735
2736
2737 static inline int
2738 rt_next_hop_update_net(rtable *tab, net *n)
2739 {
2740   rte **k, *e, *new, *old_best, **new_best;
2741   int count = 0;
2742   int free_old_best = 0;
2743
2744   old_best = n->routes;
2745   if (!old_best)
2746     return 0;
2747
2748   for (k = &n->routes; e = *k; k = &e->next)
2749   {
2750     if (!net_is_flow(n->n.addr))
2751       new = rt_next_hop_update_rte(tab, e);
2752     else
2753       new = rt_flowspec_update_rte(tab, e);
2754
2755     if (new)
2756       {
2757         *k = new;
2758
2759         rte_trace_in(D_ROUTES, new->sender, new, "updated");
2760         rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
2761
2762         /* Call a pre-comparison hook */
2763         /* Not really an efficient way to compute this */
2764         if (e->src->proto->rte_recalculate)
2765           e->src->proto->rte_recalculate(tab, n, new, e, NULL);
2766
2767         if (e != old_best)
2768           rte_free_quick(e);
2769         else /* Freeing of the old best rte is postponed */
2770           free_old_best = 1;
2771
2772         e = new;
2773         count++;
2774       }
2775   }
2776
2777   if (!count)
2778     return 0;
2779
2780   /* Find the new best route */
2781   new_best = NULL;
2782   for (k = &n->routes; e = *k; k = &e->next)
2783     {
2784       if (!new_best || rte_better(e, *new_best))
2785         new_best = k;
2786     }
2787
2788   /* Relink the new best route to the first position */
2789   new = *new_best;
2790   if (new != n->routes)
2791     {
2792       *new_best = new->next;
2793       new->next = n->routes;
2794       n->routes = new;
2795     }
2796
2797   /* Announce the new best route */
2798   if (new != old_best)
2799     rte_trace_in(D_ROUTES, new->sender, new, "updated [best]");
2800
2801   /* Propagate changes */
2802   rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best);
2803
2804   if (free_old_best)
2805     rte_free_quick(old_best);
2806
2807   return count;
2808 }
2809
2810 static void
2811 rt_next_hop_update(rtable *tab)
2812 {
2813   struct fib_iterator *fit = &tab->nhu_fit;
2814   int max_feed = 32;
2815
2816   if (tab->nhu_state == NHU_CLEAN)
2817     return;
2818
2819   if (tab->nhu_state == NHU_SCHEDULED)
2820     {
2821       FIB_ITERATE_INIT(fit, &tab->fib);
2822       tab->nhu_state = NHU_RUNNING;
2823
2824       if (tab->flowspec_trie)
2825         rt_flowspec_reset_trie(tab);
2826     }
2827
2828   FIB_ITERATE_START(&tab->fib, fit, net, n)
2829     {
2830       if (max_feed <= 0)
2831         {
2832           FIB_ITERATE_PUT(fit);
2833           ev_schedule(tab->rt_event);
2834           return;
2835         }
2836       max_feed -= rt_next_hop_update_net(tab, n);
2837     }
2838   FIB_ITERATE_END;
2839
2840   /* State change:
2841    *   NHU_DIRTY   -> NHU_SCHEDULED
2842    *   NHU_RUNNING -> NHU_CLEAN
2843    */
2844   tab->nhu_state &= 1;
2845
2846   if (tab->nhu_state != NHU_CLEAN)
2847     ev_schedule(tab->rt_event);
2848 }
2849
2850
2851 struct rtable_config *
2852 rt_new_table(struct symbol *s, uint addr_type)
2853 {
2854   /* Hack that allows to 'redefine' the master table */
2855   if ((s->class == SYM_TABLE) &&
2856       (s->table == new_config->def_tables[addr_type]) &&
2857       ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
2858     return s->table;
2859
2860   struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
2861
2862   cf_define_symbol(new_config, s, SYM_TABLE, table, c);
2863   c->name = s->name;
2864   c->addr_type = addr_type;
2865   c->gc_threshold = 1000;
2866   c->gc_period = (uint) -1;     /* set in rt_postconfig() */
2867   c->min_settle_time = 1 S;
2868   c->max_settle_time = 20 S;
2869
2870   add_tail(&new_config->tables, &c->n);
2871
2872   /* First table of each type is kept as default */
2873   if (! new_config->def_tables[addr_type])
2874     new_config->def_tables[addr_type] = c;
2875
2876   return c;
2877 }
2878
2879 /**
2880  * rt_lock_table - lock a routing table
2881  * @r: routing table to be locked
2882  *
2883  * Lock a routing table, because it's in use by a protocol,
2884  * preventing it from being freed when it gets undefined in a new
2885  * configuration.
2886  */
2887 void
2888 rt_lock_table(rtable *r)
2889 {
2890   r->use_count++;
2891 }
2892
2893 /**
2894  * rt_unlock_table - unlock a routing table
2895  * @r: routing table to be unlocked
2896  *
2897  * Unlock a routing table formerly locked by rt_lock_table(),
2898  * that is decrease its use count and delete it if it's scheduled
2899  * for deletion by configuration changes.
2900  */
2901 void
2902 rt_unlock_table(rtable *r)
2903 {
2904   if (!--r->use_count && r->deleted)
2905     {
2906       struct config *conf = r->deleted;
2907
2908       /* Delete the routing table by freeing its pool */
2909       rt_shutdown(r);
2910       config_del_obstacle(conf);
2911     }
2912 }
2913
2914 static int
2915 rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old)
2916 {
2917   if ((new->addr_type != old->addr_type) ||
2918       (new->sorted != old->sorted) ||
2919       (new->trie_used != old->trie_used))
2920     return 0;
2921
2922   DBG("\t%s: same\n", new->name);
2923   new->table = tab;
2924   tab->name = new->name;
2925   tab->config = new;
2926
2927   return 1;
2928 }
2929
2930 static struct rtable_config *
2931 rt_find_table_config(struct config *cf, char *name)
2932 {
2933   struct symbol *sym = cf_find_symbol(cf, name);
2934   return (sym && (sym->class == SYM_TABLE)) ? sym->table : NULL;
2935 }
2936
2937 /**
2938  * rt_commit - commit new routing table configuration
2939  * @new: new configuration
2940  * @old: original configuration or %NULL if it's boot time config
2941  *
2942  * Scan differences between @old and @new configuration and modify
2943  * the routing tables according to these changes. If @new defines a
2944  * previously unknown table, create it, if it omits a table existing
2945  * in @old, schedule it for deletion (it gets deleted when all protocols
2946  * disconnect from it by calling rt_unlock_table()), if it exists
2947  * in both configurations, leave it unchanged.
2948  */
2949 void
2950 rt_commit(struct config *new, struct config *old)
2951 {
2952   struct rtable_config *o, *r;
2953
2954   DBG("rt_commit:\n");
2955   if (old)
2956     {
2957       WALK_LIST(o, old->tables)
2958         {
2959           rtable *tab = o->table;
2960           if (tab->deleted)
2961             continue;
2962
2963           r = rt_find_table_config(new, o->name);
2964           if (r && !new->shutdown && rt_reconfigure(tab, r, o))
2965             continue;
2966
2967           DBG("\t%s: deleted\n", o->name);
2968           tab->deleted = old;
2969           config_add_obstacle(old);
2970           rt_lock_table(tab);
2971           rt_unlock_table(tab);
2972         }
2973     }
2974
2975   WALK_LIST(r, new->tables)
2976     if (!r->table)
2977       {
2978         r->table = rt_setup(rt_table_pool, r);
2979         DBG("\t%s: created\n", r->name);
2980         add_tail(&routing_tables, &r->table->n);
2981       }
2982   DBG("\tdone\n");
2983 }
2984
2985 static inline void
2986 do_feed_channel(struct channel *c, net *n, rte *e)
2987 {
2988   rte_update_lock();
2989   if (c->ra_mode == RA_ACCEPTED)
2990     rt_notify_accepted(c, n, NULL, NULL, c->refeeding);
2991   else if (c->ra_mode == RA_MERGED)
2992     rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding);
2993   else /* RA_BASIC */
2994     rt_notify_basic(c, n, e, e, c->refeeding);
2995   rte_update_unlock();
2996 }
2997
2998 /**
2999  * rt_feed_channel - advertise all routes to a channel
3000  * @c: channel to be fed
3001  *
3002  * This function performs one pass of advertisement of routes to a channel that
3003  * is in the ES_FEEDING state. It is called by the protocol code as long as it
3004  * has something to do. (We avoid transferring all the routes in single pass in
3005  * order not to monopolize CPU time.)
3006  */
3007 int
3008 rt_feed_channel(struct channel *c)
3009 {
3010   struct fib_iterator *fit = &c->feed_fit;
3011   int max_feed = 256;
3012
3013   ASSERT(c->export_state == ES_FEEDING);
3014
3015   if (!c->feed_active)
3016     {
3017       FIB_ITERATE_INIT(fit, &c->table->fib);
3018       c->feed_active = 1;
3019     }
3020
3021   FIB_ITERATE_START(&c->table->fib, fit, net, n)
3022     {
3023       rte *e = n->routes;
3024       if (max_feed <= 0)
3025         {
3026           FIB_ITERATE_PUT(fit);
3027           return 0;
3028         }
3029
3030       if ((c->ra_mode == RA_OPTIMAL) ||
3031           (c->ra_mode == RA_ACCEPTED) ||
3032           (c->ra_mode == RA_MERGED))
3033         if (rte_is_valid(e))
3034           {
3035             /* In the meantime, the protocol may fell down */
3036             if (c->export_state != ES_FEEDING)
3037               goto done;
3038
3039             do_feed_channel(c, n, e);
3040             max_feed--;
3041           }
3042
3043       if (c->ra_mode == RA_ANY)
3044         for(e = n->routes; e; e = e->next)
3045           {
3046             /* In the meantime, the protocol may fell down */
3047             if (c->export_state != ES_FEEDING)
3048               goto done;
3049
3050             if (!rte_is_valid(e))
3051               continue;
3052
3053             do_feed_channel(c, n, e);
3054             max_feed--;
3055           }
3056     }
3057   FIB_ITERATE_END;
3058
3059 done:
3060   c->feed_active = 0;
3061   return 1;
3062 }
3063
3064 /**
3065  * rt_feed_baby_abort - abort protocol feeding
3066  * @c: channel
3067  *
3068  * This function is called by the protocol code when the protocol stops or
3069  * ceases to exist during the feeding.
3070  */
3071 void
3072 rt_feed_channel_abort(struct channel *c)
3073 {
3074   if (c->feed_active)
3075     {
3076       /* Unlink the iterator */
3077       fit_get(&c->table->fib, &c->feed_fit);
3078       c->feed_active = 0;
3079     }
3080 }
3081
3082
3083 /*
3084  *      Import table
3085  */
3086
3087 int
3088 rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
3089 {
3090   struct rtable *tab = c->in_table;
3091   rte *old, **pos;
3092   net *net;
3093
3094   if (new)
3095   {
3096     net = net_get(tab, n);
3097
3098     if (!rta_is_cached(new->attrs))
3099       new->attrs = rta_lookup(new->attrs);
3100   }
3101   else
3102   {
3103     net = net_find(tab, n);
3104
3105     if (!net)
3106       goto drop_withdraw;
3107   }
3108
3109   /* Find the old rte */
3110   for (pos = &net->routes; old = *pos; pos = &old->next)
3111     if (old->src == src)
3112     {
3113       if (new && rte_same(old, new))
3114       {
3115         /* Refresh the old rte, continue with update to main rtable */
3116         if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
3117         {
3118           old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
3119
3120           return 1;
3121         }
3122
3123         goto drop_update;
3124       }
3125
3126       /* Move iterator if needed */
3127       if (old == c->reload_next_rte)
3128         c->reload_next_rte = old->next;
3129
3130       /* Remove the old rte */
3131       *pos = old->next;
3132       tab->rt_count--;
3133       break;
3134     }
3135
3136   if (!old && !new)
3137     goto drop_withdraw;
3138
3139   struct channel_limit *l = &c->rx_limit;
3140   if (l->action && !old && new)
3141   {
3142     if (tab->rt_count >= l->limit)
3143       channel_notify_limit(c, l, PLD_RX, tab->rt_count);
3144
3145     if (l->state == PLS_BLOCKED)
3146     {
3147       /* Required by rte_trace_in() */
3148       new->net = net;
3149
3150       rte_trace_in(D_FILTERS, c, new, "ignored [limit]");
3151       goto drop_update;
3152     }
3153   }
3154
3155   if (new)
3156   {
3157     /* Insert the new rte */
3158     rte *e = rte_do_cow(new);
3159     e->flags |= REF_COW;
3160     e->net = net;
3161     e->sender = c;
3162     e->lastmod = current_time();
3163     e->next = *pos;
3164     *pos = new = e;
3165     tab->rt_count++;
3166
3167     if (!old)
3168     {
3169       new->id = hmap_first_zero(&tab->id_map);
3170       hmap_set(&tab->id_map, new->id);
3171     }
3172     else
3173       new->id = old->id;
3174   }
3175
3176   rte_announce(tab, RA_ANY, net, new, old, NULL, NULL);
3177
3178   if (old)
3179   {
3180     if (!new)
3181       hmap_clear(&tab->id_map, old->id);
3182
3183     rte_free_quick(old);
3184   }
3185
3186   if (!net->routes)
3187     fib_delete(&tab->fib, net);
3188
3189   return 1;
3190
3191 drop_update:
3192   c->stats.imp_updates_received++;
3193   c->stats.imp_updates_ignored++;
3194   rte_free(new);
3195
3196   if (!net->routes)
3197     fib_delete(&tab->fib, net);
3198
3199   return 0;
3200
3201 drop_withdraw:
3202   c->stats.imp_withdraws_received++;
3203   c->stats.imp_withdraws_ignored++;
3204   return 0;
3205 }
3206
3207 int
3208 rt_reload_channel(struct channel *c)
3209 {
3210   struct rtable *tab = c->in_table;
3211   struct fib_iterator *fit = &c->reload_fit;
3212   int max_feed = 64;
3213
3214   ASSERT(c->channel_state == CS_UP);
3215
3216   if (!c->reload_active)
3217   {
3218     FIB_ITERATE_INIT(fit, &tab->fib);
3219     c->reload_active = 1;
3220   }
3221
3222   do {
3223     for (rte *e = c->reload_next_rte; e; e = e->next)
3224     {
3225       if (max_feed-- <= 0)
3226       {
3227         c->reload_next_rte = e;
3228         debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed);
3229         return 0;
3230       }
3231
3232       rte_update2(c, e->net->n.addr, rte_do_cow(e), e->src);
3233     }
3234
3235     c->reload_next_rte = NULL;
3236
3237     FIB_ITERATE_START(&tab->fib, fit, net, n)
3238     {
3239       if (c->reload_next_rte = n->routes)
3240       {
3241         FIB_ITERATE_PUT_NEXT(fit, &tab->fib);
3242         break;
3243       }
3244     }
3245     FIB_ITERATE_END;
3246   }
3247   while (c->reload_next_rte);
3248
3249   c->reload_active = 0;
3250   return 1;
3251 }
3252
3253 void
3254 rt_reload_channel_abort(struct channel *c)
3255 {
3256   if (c->reload_active)
3257   {
3258     /* Unlink the iterator */
3259     fit_get(&c->in_table->fib, &c->reload_fit);
3260     c->reload_next_rte = NULL;
3261     c->reload_active = 0;
3262   }
3263 }
3264
3265 void
3266 rt_prune_sync(rtable *t, int all)
3267 {
3268   struct fib_iterator fit;
3269
3270   FIB_ITERATE_INIT(&fit, &t->fib);
3271
3272 again:
3273   FIB_ITERATE_START(&t->fib, &fit, net, n)
3274   {
3275     rte *e, **ee = &n->routes;
3276
3277     while (e = *ee)
3278     {
3279       if (all || (e->flags & (REF_STALE | REF_DISCARD)))
3280       {
3281         *ee = e->next;
3282         rte_free_quick(e);
3283         t->rt_count--;
3284       }
3285       else
3286         ee = &e->next;
3287     }
3288
3289     if (all || !n->routes)
3290     {
3291       FIB_ITERATE_PUT(&fit);
3292       fib_delete(&t->fib, n);
3293       goto again;
3294     }
3295   }
3296   FIB_ITERATE_END;
3297 }
3298
3299
3300 /*
3301  *      Export table
3302  */
3303
3304 int
3305 rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed)
3306 {
3307   struct rtable *tab = c->out_table;
3308   struct rte_src *src;
3309   rte *old, **pos;
3310   net *net;
3311
3312   if (new)
3313   {
3314     net = net_get(tab, n);
3315     src = new->src;
3316
3317     if (!rta_is_cached(new->attrs))
3318       new->attrs = rta_lookup(new->attrs);
3319   }
3320   else
3321   {
3322     net = net_find(tab, n);
3323     src = old0->src;
3324
3325     if (!net)
3326       goto drop_withdraw;
3327   }
3328
3329   /* Find the old rte */
3330   for (pos = &net->routes; old = *pos; pos = &old->next)
3331     if ((c->ra_mode != RA_ANY) || (old->src == src))
3332     {
3333       if (new && rte_same(old, new))
3334       {
3335         /* REF_STALE / REF_DISCARD not used in export table */
3336         /*
3337         if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY))
3338         {
3339           old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY);
3340           return 1;
3341         }
3342         */
3343
3344         goto drop_update;
3345       }
3346
3347       /* Remove the old rte */
3348       *pos = old->next;
3349       rte_free_quick(old);
3350       tab->rt_count--;
3351
3352       break;
3353     }
3354
3355   if (!new)
3356   {
3357     if (!old)
3358       goto drop_withdraw;
3359
3360     if (!net->routes)
3361       fib_delete(&tab->fib, net);
3362
3363     return 1;
3364   }
3365
3366   /* Insert the new rte */
3367   rte *e = rte_do_cow(new);
3368   e->flags |= REF_COW;
3369   e->net = net;
3370   e->sender = c;
3371   e->lastmod = current_time();
3372   e->next = *pos;
3373   *pos = e;
3374   tab->rt_count++;
3375   return 1;
3376
3377 drop_update:
3378   return refeed;
3379
3380 drop_withdraw:
3381   return 0;
3382 }
3383
3384
3385 /*
3386  *      Hostcache
3387  */
3388
3389 static inline u32
3390 hc_hash(ip_addr a, rtable *dep)
3391 {
3392   return ipa_hash(a) ^ ptr_hash(dep);
3393 }
3394
3395 static inline void
3396 hc_insert(struct hostcache *hc, struct hostentry *he)
3397 {
3398   uint k = he->hash_key >> hc->hash_shift;
3399   he->next = hc->hash_table[k];
3400   hc->hash_table[k] = he;
3401 }
3402
3403 static inline void
3404 hc_remove(struct hostcache *hc, struct hostentry *he)
3405 {
3406   struct hostentry **hep;
3407   uint k = he->hash_key >> hc->hash_shift;
3408
3409   for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
3410   *hep = he->next;
3411 }
3412
3413 #define HC_DEF_ORDER 10
3414 #define HC_HI_MARK *4
3415 #define HC_HI_STEP 2
3416 #define HC_HI_ORDER 16                  /* Must be at most 16 */
3417 #define HC_LO_MARK /5
3418 #define HC_LO_STEP 2
3419 #define HC_LO_ORDER 10
3420
3421 static void
3422 hc_alloc_table(struct hostcache *hc, pool *p, unsigned order)
3423 {
3424   uint hsize = 1 << order;
3425   hc->hash_order = order;
3426   hc->hash_shift = 32 - order;
3427   hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
3428   hc->hash_min = (order <= HC_LO_ORDER) ?  0U : (hsize HC_LO_MARK);
3429
3430   hc->hash_table = mb_allocz(p, hsize * sizeof(struct hostentry *));
3431 }
3432
3433 static void
3434 hc_resize(struct hostcache *hc, pool *p, unsigned new_order)
3435 {
3436   struct hostentry **old_table = hc->hash_table;
3437   struct hostentry *he, *hen;
3438   uint old_size = 1 << hc->hash_order;
3439   uint i;
3440
3441   hc_alloc_table(hc, p, new_order);
3442   for (i = 0; i < old_size; i++)
3443     for (he = old_table[i]; he != NULL; he=hen)
3444       {
3445         hen = he->next;
3446         hc_insert(hc, he);
3447       }
3448   mb_free(old_table);
3449 }
3450
3451 static struct hostentry *
3452 hc_new_hostentry(struct hostcache *hc, pool *p, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
3453 {
3454   struct hostentry *he = sl_alloc(hc->slab);
3455
3456   *he = (struct hostentry) {
3457     .addr = a,
3458     .link = ll,
3459     .tab = dep,
3460     .hash_key = k,
3461   };
3462
3463   add_tail(&hc->hostentries, &he->ln);
3464   hc_insert(hc, he);
3465
3466   hc->hash_items++;
3467   if (hc->hash_items > hc->hash_max)
3468     hc_resize(hc, p, hc->hash_order + HC_HI_STEP);
3469
3470   return he;
3471 }
3472
3473 static void
3474 hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he)
3475 {
3476   rta_free(he->src);
3477
3478   rem_node(&he->ln);
3479   hc_remove(hc, he);
3480   sl_free(he);
3481
3482   hc->hash_items--;
3483   if (hc->hash_items < hc->hash_min)
3484     hc_resize(hc, p, hc->hash_order - HC_LO_STEP);
3485 }
3486
3487 static void
3488 rt_init_hostcache(rtable *tab)
3489 {
3490   struct hostcache *hc = mb_allocz(tab->rp, sizeof(struct hostcache));
3491   init_list(&hc->hostentries);
3492
3493   hc->hash_items = 0;
3494   hc_alloc_table(hc, tab->rp, HC_DEF_ORDER);
3495   hc->slab = sl_new(tab->rp, sizeof(struct hostentry));
3496
3497   hc->lp = lp_new(tab->rp);
3498   hc->trie = f_new_trie(hc->lp, 0);
3499
3500   tab->hostcache = hc;
3501 }
3502
3503 static void
3504 rt_free_hostcache(rtable *tab)
3505 {
3506   struct hostcache *hc = tab->hostcache;
3507
3508   node *n;
3509   WALK_LIST(n, hc->hostentries)
3510     {
3511       struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
3512       rta_free(he->src);
3513
3514       if (he->uc)
3515         log(L_ERR "Hostcache is not empty in table %s", tab->name);
3516     }
3517
3518   /* Freed automagically by the resource pool
3519   rfree(hc->slab);
3520   rfree(hc->lp);
3521   mb_free(hc->hash_table);
3522   mb_free(hc);
3523   */
3524 }
3525
3526 static void
3527 rt_notify_hostcache(rtable *tab, net *net)
3528 {
3529   if (tab->hcu_scheduled)
3530     return;
3531
3532   if (trie_match_net(tab->hostcache->trie, net->n.addr))
3533     rt_schedule_hcu(tab);
3534 }
3535
3536 static int
3537 if_local_addr(ip_addr a, struct iface *i)
3538 {
3539   struct ifa *b;
3540
3541   WALK_LIST(b, i->addrs)
3542     if (ipa_equal(a, b->ip))
3543       return 1;
3544
3545   return 0;
3546 }
3547
3548 u32
3549 rt_get_igp_metric(rte *rt)
3550 {
3551   eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
3552
3553   if (ea)
3554     return ea->u.data;
3555
3556   if (rt->attrs->source == RTS_DEVICE)
3557     return 0;
3558
3559   if (rt->src->proto->rte_igp_metric)
3560     return rt->src->proto->rte_igp_metric(rt);
3561
3562   return IGP_METRIC_UNKNOWN;
3563 }
3564
3565 static int
3566 rt_update_hostentry(rtable *tab, struct hostentry *he)
3567 {
3568   rta *old_src = he->src;
3569   int direct = 0;
3570   int pxlen = 0;
3571
3572   /* Reset the hostentry */
3573   he->src = NULL;
3574   he->dest = RTD_UNREACHABLE;
3575   he->nexthop_linkable = 0;
3576   he->igp_metric = 0;
3577
3578   net_addr he_addr;
3579   net_fill_ip_host(&he_addr, he->addr);
3580   net *n = net_route(tab, &he_addr);
3581   if (n)
3582     {
3583       rte *e = n->routes;
3584       rta *a = e->attrs;
3585       word pref = a->pref;
3586
3587       for (rte *ee = n->routes; ee; ee = ee->next)
3588         if ((ee->attrs->pref >= pref) && ee->attrs->hostentry)
3589         {
3590           /* Recursive route should not depend on another recursive route */
3591           log(L_WARN "Next hop address %I resolvable through recursive route for %N",
3592               he->addr, n->n.addr);
3593           goto done;
3594         }
3595
3596       pxlen = n->n.addr->pxlen;
3597
3598       if (a->dest == RTD_UNICAST)
3599         {
3600           for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
3601             if (ipa_zero(nh->gw))
3602               {
3603                 if (if_local_addr(he->addr, nh->iface))
3604                   {
3605                     /* The host address is a local address, this is not valid */
3606                     log(L_WARN "Next hop address %I is a local address of iface %s",
3607                         he->addr, nh->iface->name);
3608                     goto done;
3609                   }
3610
3611                 direct++;
3612               }
3613         }
3614
3615       he->src = rta_clone(a);
3616       he->dest = a->dest;
3617       he->nexthop_linkable = !direct;
3618       he->igp_metric = rt_get_igp_metric(e);
3619     }
3620
3621 done:
3622   /* Add a prefix range to the trie */
3623   trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
3624
3625   rta_free(old_src);
3626   return old_src != he->src;
3627 }
3628
3629 static void
3630 rt_update_hostcache(rtable *tab)
3631 {
3632   struct hostcache *hc = tab->hostcache;
3633   struct hostentry *he;
3634   node *n, *x;
3635
3636   /* Reset the trie */
3637   lp_flush(hc->lp);
3638   hc->trie = f_new_trie(hc->lp, 0);
3639
3640   WALK_LIST_DELSAFE(n, x, hc->hostentries)
3641     {
3642       he = SKIP_BACK(struct hostentry, ln, n);
3643       if (!he->uc)
3644         {
3645           hc_delete_hostentry(hc, tab->rp, he);
3646           continue;
3647         }
3648
3649       if (rt_update_hostentry(tab, he))
3650         rt_schedule_nhu(he->tab);
3651     }
3652
3653   tab->hcu_scheduled = 0;
3654 }
3655
3656 struct hostentry *
3657 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
3658 {
3659   ip_addr link = ipa_zero(ll) ? a : ll;
3660   struct hostentry *he;
3661
3662   if (!tab->hostcache)
3663     rt_init_hostcache(tab);
3664
3665   u32 k = hc_hash(a, dep);
3666   struct hostcache *hc = tab->hostcache;
3667   for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
3668     if (ipa_equal(he->addr, a) && ipa_equal(he->link, link) && (he->tab == dep))
3669       return he;
3670
3671   he = hc_new_hostentry(hc, tab->rp, a, link, dep, k);
3672   rt_update_hostentry(tab, he);
3673   return he;
3674 }
3675
3676
3677 /*
3678  *  Documentation for functions declared inline in route.h
3679  */
3680 #if 0
3681
3682 /**
3683  * net_find - find a network entry
3684  * @tab: a routing table
3685  * @addr: address of the network
3686  *
3687  * net_find() looks up the given network in routing table @tab and
3688  * returns a pointer to its &net entry or %NULL if no such network
3689  * exists.
3690  */
3691 static inline net *net_find(rtable *tab, net_addr *addr)
3692 { DUMMY; }
3693
3694 /**
3695  * net_get - obtain a network entry
3696  * @tab: a routing table
3697  * @addr: address of the network
3698  *
3699  * net_get() looks up the given network in routing table @tab and
3700  * returns a pointer to its &net entry. If no such entry exists, it's
3701  * created.
3702  */
3703 static inline net *net_get(rtable *tab, net_addr *addr)
3704 { DUMMY; }
3705
3706 /**
3707  * rte_cow - copy a route for writing
3708  * @r: a route entry to be copied
3709  *
3710  * rte_cow() takes a &rte and prepares it for modification. The exact action
3711  * taken depends on the flags of the &rte -- if it's a temporary entry, it's
3712  * just returned unchanged, else a new temporary entry with the same contents
3713  * is created.
3714  *
3715  * The primary use of this function is inside the filter machinery -- when
3716  * a filter wants to modify &rte contents (to change the preference or to
3717  * attach another set of attributes), it must ensure that the &rte is not
3718  * shared with anyone else (and especially that it isn't stored in any routing
3719  * table).
3720  *
3721  * Result: a pointer to the new writable &rte.
3722  */
3723 static inline rte * rte_cow(rte *r)
3724 { DUMMY; }
3725
3726 #endif