nest/rt-table.c

   1 /*
   2  *      BIRD -- Routing Tables
   3  *
   4  *      (c) 1998--2000 Martin Mares <mj@ucw.cz>
   5  *
   6  *      Can be freely distributed and used under the terms of the GNU GPL.
   7  */
   8
   9 /**
  10  * DOC: Routing tables
  11  *
  12  * Routing tables are probably the most important structures BIRD uses. They
  13  * hold all the information about known networks, the associated routes and
  14  * their attributes.
  15  *
  16  * There are multiple routing tables (a primary one together with any
  17  * number of secondary ones if requested by the configuration). Each table
  18  * is basically a FIB containing entries describing the individual
  19  * destination networks. For each network (represented by structure &net),
  20  * there is a one-way linked list of route entries (&rte), the first entry
  21  * on the list being the best one (i.e., the one we currently use
  22  * for routing), the order of the other ones is undetermined.
  23  *
  24  * The &rte contains information specific to the route (preference, protocol
  25  * metrics, time of last modification etc.) and a pointer to a &rta structure
  26  * (see the route attribute module for a precise explanation) holding the
  27  * remaining route attributes which are expected to be shared by multiple
  28  * routes in order to conserve memory.
  29  */
  30
  31 #undef LOCAL_DEBUG
  32
  33 #include "nest/bird.h"
  34 #include "nest/route.h"
  35 #include "nest/protocol.h"
  36 #include "nest/iface.h"
  37 #include "lib/resource.h"
  38 #include "lib/event.h"
  39 #include "lib/string.h"
  40 #include "conf/conf.h"
  41 #include "filter/filter.h"
  42 #include "lib/string.h"
  43 #include "lib/alloca.h"
  44
  45 pool *rt_table_pool;
  46
  47 static slab *rte_slab;
  48 static linpool *rte_update_pool;
  49
  50 static list routing_tables;
  51
  52 static void rt_free_hostcache(rtable *tab);
  53 static void rt_notify_hostcache(rtable *tab, net *net);
  54 static void rt_update_hostcache(rtable *tab);
  55 static void rt_next_hop_update(rtable *tab);
  56 static inline void rt_prune_table(rtable *tab);
  57
  58
  59 /* Like fib_route(), but skips empty net entries */
  60 static inline void *
  61 net_route_ip4(rtable *t, net_addr_ip4 *n)
  62 {
  63   net *r;
  64
  65   while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
  66   {
  67     n->pxlen--;
  68     ip4_clrbit(&n->prefix, n->pxlen);
  69   }
  70
  71   return r;
  72 }
  73
  74 static inline void *
  75 net_route_ip6(rtable *t, net_addr_ip6 *n)
  76 {
  77   net *r;
  78
  79   while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
  80   {
  81     n->pxlen--;
  82     ip6_clrbit(&n->prefix, n->pxlen);
  83   }
  84
  85   return r;
  86 }
  87
  88 void *
  89 net_route(rtable *tab, const net_addr *n)
  90 {
  91   ASSERT(tab->addr_type == n->type);
  92
  93   net_addr *n0 = alloca(n->length);
  94   net_copy(n0, n);
  95
  96   switch (n->type)
  97   {
  98   case NET_IP4:
  99   case NET_VPN4:
 100   case NET_ROA4:
 101     return net_route_ip4(tab, (net_addr_ip4 *) n0);
 102
 103   case NET_IP6:
 104   case NET_VPN6:
 105   case NET_ROA6:
 106     return net_route_ip6(tab, (net_addr_ip6 *) n0);
 107
 108   default:
 109     return NULL;
 110   }
 111 }
 112
 113
 114 static int
 115 net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
 116 {
 117   struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
 118   struct fib_node *fn;
 119   int anything = 0;
 120
 121   while (1)
 122   {
 123     for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
 124     {
 125       net_addr_roa4 *roa = (void *) fn->addr;
 126       net *r = fib_node_to_user(&tab->fib, fn);
 127
 128       if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
 129       {
 130         anything = 1;
 131         if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
 132           return ROA_VALID;
 133       }
 134     }
 135
 136     if (n.pxlen == 0)
 137       break;
 138
 139     n.pxlen--;
 140     ip4_clrbit(&n.prefix, n.pxlen);
 141   }
 142
 143   return anything ? ROA_INVALID : ROA_UNKNOWN;
 144 }
 145
 146 static int
 147 net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
 148 {
 149   struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
 150   struct fib_node *fn;
 151   int anything = 0;
 152
 153   while (1)
 154   {
 155     for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
 156     {
 157       net_addr_roa6 *roa = (void *) fn->addr;
 158       net *r = fib_node_to_user(&tab->fib, fn);
 159
 160       if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
 161       {
 162         anything = 1;
 163         if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
 164           return ROA_VALID;
 165       }
 166     }
 167
 168     if (n.pxlen == 0)
 169       break;
 170
 171     n.pxlen--;
 172     ip6_clrbit(&n.prefix, n.pxlen);
 173   }
 174
 175   return anything ? ROA_INVALID : ROA_UNKNOWN;
 176 }
 177
 178 /**
 179  * roa_check - check validity of route origination in a ROA table
 180  * @tab: ROA table
 181  * @n: network prefix to check
 182  * @asn: AS number of network prefix
 183  *
 184  * Implements RFC 6483 route validation for the given network prefix. The
 185  * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
 186  * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
 187  * a candidate ROA with matching ASN and maxlen field greater than or equal to
 188  * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
 189  * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
 190  * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
 191  * must have type NET_IP4 or NET_IP6, respectively.
 192  */
 193 int
 194 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
 195 {
 196   if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
 197     return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
 198   else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
 199     return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
 200   else
 201     return ROA_UNKNOWN; /* Should not happen */
 202 }
 203
 204 /**
 205  * rte_find - find a route
 206  * @net: network node
 207  * @src: route source
 208  *
 209  * The rte_find() function returns a route for destination @net
 210  * which is from route source @src.
 211  */
 212 rte *
 213 rte_find(net *net, struct rte_src *src)
 214 {
 215   rte *e = net->routes;
 216
 217   while (e && e->attrs->src != src)
 218     e = e->next;
 219   return e;
 220 }
 221
 222 /**
 223  * rte_get_temp - get a temporary &rte
 224  * @a: attributes to assign to the new route (a &rta; in case it's
 225  * un-cached, rte_update() will create a cached copy automatically)
 226  *
 227  * Create a temporary &rte and bind it with the attributes @a.
 228  * Also set route preference to the default preference set for
 229  * the protocol.
 230  */
 231 rte *
 232 rte_get_temp(rta *a)
 233 {
 234   rte *e = sl_alloc(rte_slab);
 235
 236   e->attrs = a;
 237   e->flags = 0;
 238   e->pref = 0;
 239   return e;
 240 }
 241
 242 rte *
 243 rte_do_cow(rte *r)
 244 {
 245   rte *e = sl_alloc(rte_slab);
 246
 247   memcpy(e, r, sizeof(rte));
 248   e->attrs = rta_clone(r->attrs);
 249   e->flags = 0;
 250   return e;
 251 }
 252
 253 /**
 254  * rte_cow_rta - get a private writable copy of &rte with writable &rta
 255  * @r: a route entry to be copied
 256  * @lp: a linpool from which to allocate &rta
 257  *
 258  * rte_cow_rta() takes a &rte and prepares it and associated &rta for
 259  * modification. There are three possibilities: First, both &rte and &rta are
 260  * private copies, in that case they are returned unchanged.  Second, &rte is
 261  * private copy, but &rta is cached, in that case &rta is duplicated using
 262  * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
 263  * both structures are duplicated by rte_do_cow() and rta_do_cow().
 264  *
 265  * Note that in the second case, cached &rta loses one reference, while private
 266  * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
 267  * nexthops, ...) with it. To work properly, original shared &rta should have
 268  * another reference during the life of created private copy.
 269  *
 270  * Result: a pointer to the new writable &rte with writable &rta.
 271  */
 272 rte *
 273 rte_cow_rta(rte *r, linpool *lp)
 274 {
 275   if (!rta_is_cached(r->attrs))
 276     return r;
 277
 278   rte *e = rte_cow(r);
 279   rta *a = rta_do_cow(r->attrs, lp);
 280   rta_free(e->attrs);
 281   e->attrs = a;
 282   return e;
 283 }
 284
 285 static int                              /* Actually better or at least as good as */
 286 rte_better(rte *new, rte *old)
 287 {
 288   int (*better)(rte *, rte *);
 289
 290   if (!rte_is_valid(old))
 291     return 1;
 292   if (!rte_is_valid(new))
 293     return 0;
 294
 295   if (new->pref > old->pref)
 296     return 1;
 297   if (new->pref < old->pref)
 298     return 0;
 299   if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
 300     {
 301       /*
 302        *  If the user has configured protocol preferences, so that two different protocols
 303        *  have the same preference, try to break the tie by comparing addresses. Not too
 304        *  useful, but keeps the ordering of routes unambiguous.
 305        */
 306       return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
 307     }
 308   if (better = new->attrs->src->proto->rte_better)
 309     return better(new, old);
 310   return 0;
 311 }
 312
 313 static int
 314 rte_mergable(rte *pri, rte *sec)
 315 {
 316   int (*mergable)(rte *, rte *);
 317
 318   if (!rte_is_valid(pri) || !rte_is_valid(sec))
 319     return 0;
 320
 321   if (pri->pref != sec->pref)
 322     return 0;
 323
 324   if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
 325     return 0;
 326
 327   if (mergable = pri->attrs->src->proto->rte_mergable)
 328     return mergable(pri, sec);
 329
 330   return 0;
 331 }
 332
 333 static void
 334 rte_trace(struct proto *p, rte *e, int dir, char *msg)
 335 {
 336   log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest));
 337 }
 338
 339 static inline void
 340 rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
 341 {
 342   if (p->debug & flag)
 343     rte_trace(p, e, '>', msg);
 344 }
 345
 346 static inline void
 347 rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
 348 {
 349   if (p->debug & flag)
 350     rte_trace(p, e, '<', msg);
 351 }
 352
 353 static rte *
 354 export_filter_(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
 355 {
 356   struct proto *p = c->proto;
 357   struct filter *filter = c->out_filter;
 358   struct proto_stats *stats = &c->stats;
 359   ea_list *tmpb = NULL;
 360   rte *rt;
 361   int v;
 362
 363   rt = rt0;
 364   *rt_free = NULL;
 365
 366   if (!tmpa)
 367     tmpa = &tmpb;
 368
 369   *tmpa = rte_make_tmp_attrs(rt, pool);
 370
 371   v = p->import_control ? p->import_control(p, &rt, tmpa, pool) : 0;
 372   if (v < 0)
 373     {
 374       if (silent)
 375         goto reject;
 376
 377       stats->exp_updates_rejected++;
 378       if (v == RIC_REJECT)
 379         rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
 380       goto reject;
 381     }
 382   if (v > 0)
 383     {
 384       if (!silent)
 385         rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
 386       goto accept;
 387     }
 388
 389   v = filter && ((filter == FILTER_REJECT) ||
 390                  (f_run(filter, &rt, tmpa, pool, FF_FORCE_TMPATTR) > F_ACCEPT));
 391   if (v)
 392     {
 393       if (silent)
 394         goto reject;
 395
 396       stats->exp_updates_filtered++;
 397       rte_trace_out(D_FILTERS, p, rt, "filtered out");
 398       goto reject;
 399     }
 400
 401  accept:
 402   if (rt != rt0)
 403     *rt_free = rt;
 404   return rt;
 405
 406  reject:
 407   /* Discard temporary rte */
 408   if (rt != rt0)
 409     rte_free(rt);
 410   return NULL;
 411 }
 412
 413 static inline rte *
 414 export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
 415 {
 416   return export_filter_(c, rt0, rt_free, tmpa, rte_update_pool, silent);
 417 }
 418
 419 static void
 420 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
 421 {
 422   struct proto *p = c->proto;
 423   struct proto_stats *stats = &c->stats;
 424
 425
 426   /*
 427    * First, apply export limit.
 428    *
 429    * Export route limits has several problems. Because exp_routes
 430    * counter is reset before refeed, we don't really know whether
 431    * limit is breached and whether the update is new or not. Therefore
 432    * the number of really exported routes may exceed the limit
 433    * temporarily (routes exported before and new routes in refeed).
 434    *
 435    * Minor advantage is that if the limit is decreased and refeed is
 436    * requested, the number of exported routes really decrease.
 437    *
 438    * Second problem is that with export limits, we don't know whether
 439    * old was really exported (it might be blocked by limit). When a
 440    * withdraw is exported, we announce it even when the previous
 441    * update was blocked. This is not a big issue, but the same problem
 442    * is in updating exp_routes counter. Therefore, to be consistent in
 443    * increases and decreases of exp_routes, we count exported routes
 444    * regardless of blocking by limits.
 445    *
 446    * Similar problem is in handling updates - when a new route is
 447    * received and blocking is active, the route would be blocked, but
 448    * when an update for the route will be received later, the update
 449    * would be propagated (as old != NULL). Therefore, we have to block
 450    * also non-new updates (contrary to import blocking).
 451    */
 452
 453   struct channel_limit *l = &c->out_limit;
 454   if (l->action && new)
 455     {
 456       if ((!old || refeed) && (stats->exp_routes >= l->limit))
 457         channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
 458
 459       if (l->state == PLS_BLOCKED)
 460         {
 461           stats->exp_routes++;  /* see note above */
 462           stats->exp_updates_rejected++;
 463           rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
 464           new = NULL;
 465
 466           if (!old)
 467             return;
 468         }
 469     }
 470
 471
 472   if (new)
 473     stats->exp_updates_accepted++;
 474   else
 475     stats->exp_withdraws_accepted++;
 476
 477   /* Hack: We do not decrease exp_routes during refeed, we instead
 478      reset exp_routes at the start of refeed. */
 479   if (new)
 480     stats->exp_routes++;
 481   if (old && !refeed)
 482     stats->exp_routes--;
 483
 484   if (p->debug & D_ROUTES)
 485     {
 486       if (new && old)
 487         rte_trace_out(D_ROUTES, p, new, "replaced");
 488       else if (new)
 489         rte_trace_out(D_ROUTES, p, new, "added");
 490       else if (old)
 491         rte_trace_out(D_ROUTES, p, old, "removed");
 492     }
 493   if (!new)
 494     p->rt_notify(p, c, net, NULL, old, NULL);
 495   else if (tmpa)
 496     {
 497       ea_list *t = tmpa;
 498       while (t->next)
 499         t = t->next;
 500       t->next = new->attrs->eattrs;
 501       p->rt_notify(p, c, net, new, old, tmpa);
 502       t->next = NULL;
 503     }
 504   else
 505     p->rt_notify(p, c, net, new, old, new->attrs->eattrs);
 506 }
 507
 508 static void
 509 rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
 510 {
 511   struct proto *p = c->proto;
 512
 513   rte *new = new0;
 514   rte *old = old0;
 515   rte *new_free = NULL;
 516   rte *old_free = NULL;
 517   ea_list *tmpa = NULL;
 518
 519   if (new)
 520     c->stats.exp_updates_received++;
 521   else
 522     c->stats.exp_withdraws_received++;
 523
 524   /*
 525    * This is a tricky part - we don't know whether route 'old' was
 526    * exported to protocol 'p' or was filtered by the export filter.
 527    * We try to run the export filter to know this to have a correct
 528    * value in 'old' argument of rte_update (and proper filter value)
 529    *
 530    * FIXME - this is broken because 'configure soft' may change
 531    * filters but keep routes. Refeed is expected to be called after
 532    * change of the filters and with old == new, therefore we do not
 533    * even try to run the filter on an old route, This may lead to
 534    * 'spurious withdraws' but ensure that there are no 'missing
 535    * withdraws'.
 536    *
 537    * This is not completely safe as there is a window between
 538    * reconfiguration and the end of refeed - if a newly filtered
 539    * route disappears during this period, proper withdraw is not
 540    * sent (because old would be also filtered) and the route is
 541    * not refeeded (because it disappeared before that).
 542    */
 543
 544   if (new)
 545     new = export_filter(c, new, &new_free, &tmpa, 0);
 546
 547   if (old && !refeed)
 548     old = export_filter(c, old, &old_free, NULL, 1);
 549
 550   if (!new && !old)
 551   {
 552     /*
 553      * As mentioned above, 'old' value may be incorrect in some race conditions.
 554      * We generally ignore it with the exception of withdraw to pipe protocol.
 555      * In that case we rather propagate unfiltered withdraws regardless of
 556      * export filters to ensure that when a protocol is flushed, its routes are
 557      * removed from all tables. Possible spurious unfiltered withdraws are not
 558      * problem here as they are ignored if there is no corresponding route at
 559      * the other end of the pipe. We directly call rt_notify() hook instead of
 560      * do_rt_notify() to avoid logging and stat counters.
 561      */
 562
 563 #ifdef CONFIG_PIPE
 564     if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
 565       p->rt_notify(p, c, net, NULL, old0, NULL);
 566 #endif
 567
 568     return;
 569   }
 570
 571   do_rt_notify(c, net, new, old, tmpa, refeed);
 572
 573   /* Discard temporary rte's */
 574   if (new_free)
 575     rte_free(new_free);
 576   if (old_free)
 577     rte_free(old_free);
 578 }
 579
 580 static void
 581 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
 582 {
 583   // struct proto *p = c->proto;
 584
 585   rte *r;
 586   rte *new_best = NULL;
 587   rte *old_best = NULL;
 588   rte *new_free = NULL;
 589   rte *old_free = NULL;
 590   ea_list *tmpa = NULL;
 591
 592   /* Used to track whether we met old_changed position. If before_old is NULL
 593      old_changed was the first and we met it implicitly before current best route. */
 594   int old_meet = old_changed && !before_old;
 595
 596   /* Note that before_old is either NULL or valid (not rejected) route.
 597      If old_changed is valid, before_old have to be too. If old changed route
 598      was not valid, caller must use NULL for both old_changed and before_old. */
 599
 600   if (new_changed)
 601     c->stats.exp_updates_received++;
 602   else
 603     c->stats.exp_withdraws_received++;
 604
 605   /* First, find the new_best route - first accepted by filters */
 606   for (r=net->routes; rte_is_valid(r); r=r->next)
 607     {
 608       if (new_best = export_filter(c, r, &new_free, &tmpa, 0))
 609         break;
 610
 611       /* Note if we walked around the position of old_changed route */
 612       if (r == before_old)
 613         old_meet = 1;
 614     }
 615
 616   /*
 617    * Second, handle the feed case. That means we do not care for
 618    * old_best. It is NULL for feed, and the new_best for refeed.
 619    * For refeed, there is a hack similar to one in rt_notify_basic()
 620    * to ensure withdraws in case of changed filters
 621    */
 622   if (feed)
 623     {
 624       if (feed == 2)    /* refeed */
 625         old_best = new_best ? new_best :
 626           (rte_is_valid(net->routes) ? net->routes : NULL);
 627       else
 628         old_best = NULL;
 629
 630       if (!new_best && !old_best)
 631         return;
 632
 633       goto found;
 634     }
 635
 636   /*
 637    * Now, we find the old_best route. Generally, it is the same as the
 638    * new_best, unless new_best is the same as new_changed or
 639    * old_changed is accepted before new_best.
 640    *
 641    * There are four cases:
 642    *
 643    * - We would find and accept old_changed before new_best, therefore
 644    *   old_changed is old_best. In remaining cases we suppose this
 645    *   is not true.
 646    *
 647    * - We found no new_best, therefore there is also no old_best and
 648    *   we ignore this withdraw.
 649    *
 650    * - We found new_best different than new_changed, therefore
 651    *   old_best is the same as new_best and we ignore this update.
 652    *
 653    * - We found new_best the same as new_changed, therefore it cannot
 654    *   be old_best and we have to continue search for old_best.
 655    */
 656
 657   /* First case */
 658   if (old_meet)
 659     if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
 660       goto found;
 661
 662   /* Second case */
 663   if (!new_best)
 664     return;
 665
 666   /* Third case, we use r instead of new_best, because export_filter() could change it */
 667   if (r != new_changed)
 668     {
 669       if (new_free)
 670         rte_free(new_free);
 671       return;
 672     }
 673
 674   /* Fourth case */
 675   for (r=r->next; rte_is_valid(r); r=r->next)
 676     {
 677       if (old_best = export_filter(c, r, &old_free, NULL, 1))
 678         goto found;
 679
 680       if (r == before_old)
 681         if (old_best = export_filter(c, old_changed, &old_free, NULL, 1))
 682           goto found;
 683     }
 684
 685   /* Implicitly, old_best is NULL and new_best is non-NULL */
 686
 687  found:
 688   do_rt_notify(c, net, new_best, old_best, tmpa, (feed == 2));
 689
 690   /* Discard temporary rte's */
 691   if (new_free)
 692     rte_free(new_free);
 693   if (old_free)
 694     rte_free(old_free);
 695 }
 696
 697
 698 static struct nexthop *
 699 nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
 700 {
 701   return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
 702 }
 703
 704 rte *
 705 rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent)
 706 {
 707   // struct proto *p = c->proto;
 708   struct nexthop *nhs = NULL;
 709   rte *best0, *best, *rt0, *rt, *tmp;
 710
 711   best0 = net->routes;
 712   *rt_free = NULL;
 713
 714   if (!rte_is_valid(best0))
 715     return NULL;
 716
 717   best = export_filter_(c, best0, rt_free, tmpa, pool, silent);
 718
 719   if (!best || !rte_is_reachable(best))
 720     return best;
 721
 722   for (rt0 = best0->next; rt0; rt0 = rt0->next)
 723   {
 724     if (!rte_mergable(best0, rt0))
 725       continue;
 726
 727     rt = export_filter_(c, rt0, &tmp, NULL, pool, 1);
 728
 729     if (!rt)
 730       continue;
 731
 732     if (rte_is_reachable(rt))
 733       nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
 734
 735     if (tmp)
 736       rte_free(tmp);
 737   }
 738
 739   if (nhs)
 740   {
 741     nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
 742
 743     if (nhs->next)
 744     {
 745       best = rte_cow_rta(best, pool);
 746       nexthop_link(best->attrs, nhs);
 747     }
 748   }
 749
 750   if (best != best0)
 751     *rt_free = best;
 752
 753   return best;
 754 }
 755
 756
 757 static void
 758 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
 759                  rte *new_best, rte*old_best, int refeed)
 760 {
 761   // struct proto *p = c->proto;
 762
 763   rte *new_best_free = NULL;
 764   rte *old_best_free = NULL;
 765   rte *new_changed_free = NULL;
 766   rte *old_changed_free = NULL;
 767   ea_list *tmpa = NULL;
 768
 769   /* We assume that all rte arguments are either NULL or rte_is_valid() */
 770
 771   /* This check should be done by the caller */
 772   if (!new_best && !old_best)
 773     return;
 774
 775   /* Check whether the change is relevant to the merged route */
 776   if ((new_best == old_best) && !refeed)
 777   {
 778     new_changed = rte_mergable(new_best, new_changed) ?
 779       export_filter(c, new_changed, &new_changed_free, NULL, 1) : NULL;
 780
 781     old_changed = rte_mergable(old_best, old_changed) ?
 782       export_filter(c, old_changed, &old_changed_free, NULL, 1) : NULL;
 783
 784     if (!new_changed && !old_changed)
 785       return;
 786   }
 787
 788   if (new_best)
 789     c->stats.exp_updates_received++;
 790   else
 791     c->stats.exp_withdraws_received++;
 792
 793   /* Prepare new merged route */
 794   if (new_best)
 795     new_best = rt_export_merged(c, net, &new_best_free, &tmpa, rte_update_pool, 0);
 796
 797   /* Prepare old merged route (without proper merged next hops) */
 798   /* There are some issues with running filter on old route - see rt_notify_basic() */
 799   if (old_best && !refeed)
 800     old_best = export_filter(c, old_best, &old_best_free, NULL, 1);
 801
 802   if (new_best || old_best)
 803     do_rt_notify(c, net, new_best, old_best, tmpa, refeed);
 804
 805   /* Discard temporary rte's */
 806   if (new_best_free)
 807     rte_free(new_best_free);
 808   if (old_best_free)
 809     rte_free(old_best_free);
 810   if (new_changed_free)
 811     rte_free(new_changed_free);
 812   if (old_changed_free)
 813     rte_free(old_changed_free);
 814 }
 815
 816
 817 /**
 818  * rte_announce - announce a routing table change
 819  * @tab: table the route has been added to
 820  * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
 821  * @net: network in question
 822  * @new: the new route to be announced
 823  * @old: the previous route for the same network
 824  * @new_best: the new best route for the same network
 825  * @old_best: the previous best route for the same network
 826  * @before_old: The previous route before @old for the same network.
 827  *              If @before_old is NULL @old was the first.
 828  *
 829  * This function gets a routing table update and announces it
 830  * to all protocols that acccepts given type of route announcement
 831  * and are connected to the same table by their announcement hooks.
 832  *
 833  * Route announcement of type %RA_OPTIMAL si generated when optimal
 834  * route (in routing table @tab) changes. In that case @old stores the
 835  * old optimal route.
 836  *
 837  * Route announcement of type %RA_ANY si generated when any route (in
 838  * routing table @tab) changes In that case @old stores the old route
 839  * from the same protocol.
 840  *
 841  * For each appropriate protocol, we first call its import_control()
 842  * hook which performs basic checks on the route (each protocol has a
 843  * right to veto or force accept of the route before any filter is
 844  * asked) and adds default values of attributes specific to the new
 845  * protocol (metrics, tags etc.).  Then it consults the protocol's
 846  * export filter and if it accepts the route, the rt_notify() hook of
 847  * the protocol gets called.
 848  */
 849 static void
 850 rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
 851              rte *new_best, rte *old_best, rte *before_old)
 852 {
 853   if (!rte_is_valid(new))
 854     new = NULL;
 855
 856   if (!rte_is_valid(old))
 857     old = before_old = NULL;
 858
 859   if (!rte_is_valid(new_best))
 860     new_best = NULL;
 861
 862   if (!rte_is_valid(old_best))
 863     old_best = NULL;
 864
 865   if (!old && !new)
 866     return;
 867
 868   if ((type == RA_OPTIMAL) && tab->hostcache)
 869     rt_notify_hostcache(tab, net);
 870
 871   struct channel *c; node *n;
 872   WALK_LIST2(c, n, tab->channels, table_node)
 873     {
 874       if (c->export_state == ES_DOWN)
 875         continue;
 876
 877       if (c->ra_mode == type)
 878         if (type == RA_ACCEPTED)
 879           rt_notify_accepted(c, net, new, old, before_old, 0);
 880         else if (type == RA_MERGED)
 881           rt_notify_merged(c, net, new, old, new_best, old_best, 0);
 882         else
 883           rt_notify_basic(c, net, new, old, 0);
 884     }
 885 }
 886
 887 static inline int
 888 rte_validate(rte *e)
 889 {
 890   int c;
 891   net *n = e->net;
 892
 893   if (!net_validate(n->n.addr))
 894   {
 895     log(L_WARN "Ignoring bogus prefix %N received via %s",
 896         n->n.addr, e->sender->proto->name);
 897     return 0;
 898   }
 899
 900   c = net_classify(n->n.addr);
 901   if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
 902   {
 903     log(L_WARN "Ignoring bogus route %N received via %s",
 904         n->n.addr, e->sender->proto->name);
 905     return 0;
 906   }
 907
 908   if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
 909   {
 910     log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
 911         n->n.addr, e->attrs->dest, e->sender->proto->name);
 912     return 0;
 913   }
 914
 915   if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
 916   {
 917     log(L_WARN "Ignoring unsorted multipath route %N received via %s",
 918         n->n.addr, e->sender->proto->name);
 919     return 0;
 920   }
 921
 922   return 1;
 923 }
 924
 925 /**
 926  * rte_free - delete a &rte
 927  * @e: &rte to be deleted
 928  *
 929  * rte_free() deletes the given &rte from the routing table it's linked to.
 930  */
 931 void
 932 rte_free(rte *e)
 933 {
 934   if (rta_is_cached(e->attrs))
 935     rta_free(e->attrs);
 936   sl_free(rte_slab, e);
 937 }
 938
 939 static inline void
 940 rte_free_quick(rte *e)
 941 {
 942   rta_free(e->attrs);
 943   sl_free(rte_slab, e);
 944 }
 945
 946 static int
 947 rte_same(rte *x, rte *y)
 948 {
 949   return
 950     x->attrs == y->attrs &&
 951     x->flags == y->flags &&
 952     x->pflags == y->pflags &&
 953     x->pref == y->pref &&
 954     (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
 955 }
 956
 957 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
 958
 959 static void
 960 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
 961 {
 962   struct proto *p = c->proto;
 963   struct rtable *table = c->table;
 964   struct proto_stats *stats = &c->stats;
 965   static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
 966   rte *before_old = NULL;
 967   rte *old_best = net->routes;
 968   rte *old = NULL;
 969   rte **k;
 970
 971   k = &net->routes;                     /* Find and remove original route from the same protocol */
 972   while (old = *k)
 973     {
 974       if (old->attrs->src == src)
 975         {
 976           /* If there is the same route in the routing table but from
 977            * a different sender, then there are two paths from the
 978            * source protocol to this routing table through transparent
 979            * pipes, which is not allowed.
 980            *
 981            * We log that and ignore the route. If it is withdraw, we
 982            * ignore it completely (there might be 'spurious withdraws',
 983            * see FIXME in do_rte_announce())
 984            */
 985           if (old->sender->proto != p)
 986             {
 987               if (new)
 988                 {
 989                   log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
 990                       net->n.addr, table->name);
 991                   rte_free_quick(new);
 992                 }
 993               return;
 994             }
 995
 996           if (new && rte_same(old, new))
 997             {
 998               /* No changes, ignore the new route */
 999
1000               if (!rte_is_filtered(new))
1001                 {
1002                   stats->imp_updates_ignored++;
1003                   rte_trace_in(D_ROUTES, p, new, "ignored");
1004                 }
1005
1006               rte_free_quick(new);
1007               return;
1008             }
1009           *k = old->next;
1010           break;
1011         }
1012       k = &old->next;
1013       before_old = old;
1014     }
1015
1016   if (!old)
1017     before_old = NULL;
1018
1019   if (!old && !new)
1020     {
1021       stats->imp_withdraws_ignored++;
1022       return;
1023     }
1024
1025   int new_ok = rte_is_ok(new);
1026   int old_ok = rte_is_ok(old);
1027
1028   struct channel_limit *l = &c->rx_limit;
1029   if (l->action && !old && new)
1030     {
1031       u32 all_routes = stats->imp_routes + stats->filt_routes;
1032
1033       if (all_routes >= l->limit)
1034         channel_notify_limit(c, l, PLD_RX, all_routes);
1035
1036       if (l->state == PLS_BLOCKED)
1037         {
1038           /* In receive limit the situation is simple, old is NULL so
1039              we just free new and exit like nothing happened */
1040
1041           stats->imp_updates_ignored++;
1042           rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1043           rte_free_quick(new);
1044           return;
1045         }
1046     }
1047
1048   l = &c->in_limit;
1049   if (l->action && !old_ok && new_ok)
1050     {
1051       if (stats->imp_routes >= l->limit)
1052         channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1053
1054       if (l->state == PLS_BLOCKED)
1055         {
1056           /* In import limit the situation is more complicated. We
1057              shouldn't just drop the route, we should handle it like
1058              it was filtered. We also have to continue the route
1059              processing if old or new is non-NULL, but we should exit
1060              if both are NULL as this case is probably assumed to be
1061              already handled. */
1062
1063           stats->imp_updates_ignored++;
1064           rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1065
1066           if (c->in_keep_filtered)
1067             new->flags |= REF_FILTERED;
1068           else
1069             { rte_free_quick(new); new = NULL; }
1070
1071           /* Note that old && !new could be possible when
1072              c->in_keep_filtered changed in the recent past. */
1073
1074           if (!old && !new)
1075             return;
1076
1077           new_ok = 0;
1078           goto skip_stats1;
1079         }
1080     }
1081
1082   if (new_ok)
1083     stats->imp_updates_accepted++;
1084   else if (old_ok)
1085     stats->imp_withdraws_accepted++;
1086   else
1087     stats->imp_withdraws_ignored++;
1088
1089  skip_stats1:
1090
1091   if (new)
1092     rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1093   if (old)
1094     rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1095
1096   if (table->config->sorted)
1097     {
1098       /* If routes are sorted, just insert new route to appropriate position */
1099       if (new)
1100         {
1101           if (before_old && !rte_better(new, before_old))
1102             k = &before_old->next;
1103           else
1104             k = &net->routes;
1105
1106           for (; *k; k=&(*k)->next)
1107             if (rte_better(new, *k))
1108               break;
1109
1110           new->next = *k;
1111           *k = new;
1112         }
1113     }
1114   else
1115     {
1116       /* If routes are not sorted, find the best route and move it on
1117          the first position. There are several optimized cases. */
1118
1119       if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1120         goto do_recalculate;
1121
1122       if (new && rte_better(new, old_best))
1123         {
1124           /* The first case - the new route is cleary optimal,
1125              we link it at the first position */
1126
1127           new->next = net->routes;
1128           net->routes = new;
1129         }
1130       else if (old == old_best)
1131         {
1132           /* The second case - the old best route disappeared, we add the
1133              new route (if we have any) to the list (we don't care about
1134              position) and then we elect the new optimal route and relink
1135              that route at the first position and announce it. New optimal
1136              route might be NULL if there is no more routes */
1137
1138         do_recalculate:
1139           /* Add the new route to the list */
1140           if (new)
1141             {
1142               new->next = net->routes;
1143               net->routes = new;
1144             }
1145
1146           /* Find a new optimal route (if there is any) */
1147           if (net->routes)
1148             {
1149               rte **bp = &net->routes;
1150               for (k=&(*bp)->next; *k; k=&(*k)->next)
1151                 if (rte_better(*k, *bp))
1152                   bp = k;
1153
1154               /* And relink it */
1155               rte *best = *bp;
1156               *bp = best->next;
1157               best->next = net->routes;
1158               net->routes = best;
1159             }
1160         }
1161       else if (new)
1162         {
1163           /* The third case - the new route is not better than the old
1164              best route (therefore old_best != NULL) and the old best
1165              route was not removed (therefore old_best == net->routes).
1166              We just link the new route after the old best route. */
1167
1168           ASSERT(net->routes != NULL);
1169           new->next = net->routes->next;
1170           net->routes->next = new;
1171         }
1172       /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1173     }
1174
1175   if (new)
1176     new->lastmod = now;
1177
1178   /* Log the route change */
1179   if (p->debug & D_ROUTES)
1180     {
1181       if (new_ok)
1182         rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1183       else if (old_ok)
1184         {
1185           if (old != old_best)
1186             rte_trace(p, old, '>', "removed");
1187           else if (rte_is_ok(net->routes))
1188             rte_trace(p, old, '>', "removed [replaced]");
1189           else
1190             rte_trace(p, old, '>', "removed [sole]");
1191         }
1192     }
1193
1194   /* Propagate the route change */
1195   rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1196   if (net->routes != old_best)
1197     rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1198   if (table->config->sorted)
1199     rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1200   rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1201
1202   if (!net->routes &&
1203       (table->gc_counter++ >= table->config->gc_max_ops) &&
1204       (table->gc_time + table->config->gc_min_time <= now))
1205     rt_schedule_prune(table);
1206
1207   if (old_ok && p->rte_remove)
1208     p->rte_remove(net, old);
1209   if (new_ok && p->rte_insert)
1210     p->rte_insert(net, new);
1211
1212   if (old)
1213     rte_free_quick(old);
1214 }
1215
1216 static int rte_update_nest_cnt;         /* Nesting counter to allow recursive updates */
1217
1218 static inline void
1219 rte_update_lock(void)
1220 {
1221   rte_update_nest_cnt++;
1222 }
1223
1224 static inline void
1225 rte_update_unlock(void)
1226 {
1227   if (!--rte_update_nest_cnt)
1228     lp_flush(rte_update_pool);
1229 }
1230
1231 static inline void
1232 rte_hide_dummy_routes(net *net, rte **dummy)
1233 {
1234   if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1235   {
1236     *dummy = net->routes;
1237     net->routes = (*dummy)->next;
1238   }
1239 }
1240
1241 static inline void
1242 rte_unhide_dummy_routes(net *net, rte **dummy)
1243 {
1244   if (*dummy)
1245   {
1246     (*dummy)->next = net->routes;
1247     net->routes = *dummy;
1248   }
1249 }
1250
1251 /**
1252  * rte_update - enter a new update to a routing table
1253  * @table: table to be updated
1254  * @c: channel doing the update
1255  * @net: network node
1256  * @p: protocol submitting the update
1257  * @src: protocol originating the update
1258  * @new: a &rte representing the new route or %NULL for route removal.
1259  *
1260  * This function is called by the routing protocols whenever they discover
1261  * a new route or wish to update/remove an existing route. The right announcement
1262  * sequence is to build route attributes first (either un-cached with @aflags set
1263  * to zero or a cached one using rta_lookup(); in this case please note that
1264  * you need to increase the use count of the attributes yourself by calling
1265  * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1266  * the appropriate data and finally submit the new &rte by calling rte_update().
1267  *
1268  * @src specifies the protocol that originally created the route and the meaning
1269  * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1270  * same value as @new->attrs->proto. @p specifies the protocol that called
1271  * rte_update(). In most cases it is the same protocol as @src. rte_update()
1272  * stores @p in @new->sender;
1273  *
1274  * When rte_update() gets any route, it automatically validates it (checks,
1275  * whether the network and next hop address are valid IP addresses and also
1276  * whether a normal routing protocol doesn't try to smuggle a host or link
1277  * scope route to the table), converts all protocol dependent attributes stored
1278  * in the &rte to temporary extended attributes, consults import filters of the
1279  * protocol to see if the route should be accepted and/or its attributes modified,
1280  * stores the temporary attributes back to the &rte.
1281  *
1282  * Now, having a "public" version of the route, we
1283  * automatically find any old route defined by the protocol @src
1284  * for network @n, replace it by the new one (or removing it if @new is %NULL),
1285  * recalculate the optimal route for this destination and finally broadcast
1286  * the change (if any) to all routing protocols by calling rte_announce().
1287  *
1288  * All memory used for attribute lists and other temporary allocations is taken
1289  * from a special linear pool @rte_update_pool and freed when rte_update()
1290  * finishes.
1291  */
1292
1293 void
1294 rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1295 {
1296   struct proto *p = c->proto;
1297   struct proto_stats *stats = &c->stats;
1298   struct filter *filter = c->in_filter;
1299   ea_list *tmpa = NULL;
1300   rte *dummy = NULL;
1301   net *nn;
1302
1303   ASSERT(c->channel_state == CS_UP);
1304
1305   rte_update_lock();
1306   if (new)
1307     {
1308       nn = net_get(c->table, n);
1309
1310       new->net = nn;
1311       new->sender = c;
1312
1313       if (!new->pref)
1314         new->pref = c->preference;
1315
1316       stats->imp_updates_received++;
1317       if (!rte_validate(new))
1318         {
1319           rte_trace_in(D_FILTERS, p, new, "invalid");
1320           stats->imp_updates_invalid++;
1321           goto drop;
1322         }
1323
1324       if (filter == FILTER_REJECT)
1325         {
1326           stats->imp_updates_filtered++;
1327           rte_trace_in(D_FILTERS, p, new, "filtered out");
1328
1329           if (! c->in_keep_filtered)
1330             goto drop;
1331
1332           /* new is a private copy, i could modify it */
1333           new->flags |= REF_FILTERED;
1334         }
1335       else
1336         {
1337           tmpa = rte_make_tmp_attrs(new, rte_update_pool);
1338           if (filter && (filter != FILTER_REJECT))
1339             {
1340               ea_list *old_tmpa = tmpa;
1341               int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
1342               if (fr > F_ACCEPT)
1343                 {
1344                   stats->imp_updates_filtered++;
1345                   rte_trace_in(D_FILTERS, p, new, "filtered out");
1346
1347                   if (! c->in_keep_filtered)
1348                     goto drop;
1349
1350                   new->flags |= REF_FILTERED;
1351                 }
1352               if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
1353                 src->proto->store_tmp_attrs(new, tmpa);
1354             }
1355         }
1356       if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1357         new->attrs = rta_lookup(new->attrs);
1358       new->flags |= REF_COW;
1359     }
1360   else
1361     {
1362       stats->imp_withdraws_received++;
1363
1364       if (!(nn = net_find(c->table, n)) || !src)
1365         {
1366           stats->imp_withdraws_ignored++;
1367           rte_update_unlock();
1368           return;
1369         }
1370     }
1371
1372  recalc:
1373   rte_hide_dummy_routes(nn, &dummy);
1374   rte_recalculate(c, nn, new, src);
1375   rte_unhide_dummy_routes(nn, &dummy);
1376   rte_update_unlock();
1377   return;
1378
1379  drop:
1380   rte_free(new);
1381   new = NULL;
1382   goto recalc;
1383 }
1384
1385 /* Independent call to rte_announce(), used from next hop
1386    recalculation, outside of rte_update(). new must be non-NULL */
1387 static inline void
1388 rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1389                rte *new_best, rte *old_best)
1390 {
1391   rte_update_lock();
1392   rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1393   rte_update_unlock();
1394 }
1395
1396 static inline void
1397 rte_discard(rte *old)   /* Non-filtered route deletion, used during garbage collection */
1398 {
1399   rte_update_lock();
1400   rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1401   rte_update_unlock();
1402 }
1403
1404 /* Check rtable for best route to given net whether it would be exported do p */
1405 int
1406 rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1407 {
1408   net *n = net_find(t, a);
1409   rte *rt = n ? n->routes : NULL;
1410
1411   if (!rte_is_valid(rt))
1412     return 0;
1413
1414   rte_update_lock();
1415
1416   /* Rest is stripped down export_filter() */
1417   ea_list *tmpa = rte_make_tmp_attrs(rt, rte_update_pool);
1418   int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0;
1419   if (v == RIC_PROCESS)
1420     v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT);
1421
1422    /* Discard temporary rte */
1423   if (rt != n->routes)
1424     rte_free(rt);
1425
1426   rte_update_unlock();
1427
1428   return v > 0;
1429 }
1430
1431
1432 /**
1433  * rt_refresh_begin - start a refresh cycle
1434  * @t: related routing table
1435  * @c related channel
1436  *
1437  * This function starts a refresh cycle for given routing table and announce
1438  * hook. The refresh cycle is a sequence where the protocol sends all its valid
1439  * routes to the routing table (by rte_update()). After that, all protocol
1440  * routes (more precisely routes with @c as @sender) not sent during the
1441  * refresh cycle but still in the table from the past are pruned. This is
1442  * implemented by marking all related routes as stale by REF_STALE flag in
1443  * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1444  * flag in rt_refresh_end() and then removing such routes in the prune loop.
1445  */
1446 void
1447 rt_refresh_begin(rtable *t, struct channel *c)
1448 {
1449   FIB_WALK(&t->fib, net, n)
1450     {
1451       rte *e;
1452       for (e = n->routes; e; e = e->next)
1453         if (e->sender == c)
1454           e->flags |= REF_STALE;
1455     }
1456   FIB_WALK_END;
1457 }
1458
1459 /**
1460  * rt_refresh_end - end a refresh cycle
1461  * @t: related routing table
1462  * @c: related channel
1463  *
1464  * This function ends a refresh cycle for given routing table and announce
1465  * hook. See rt_refresh_begin() for description of refresh cycles.
1466  */
1467 void
1468 rt_refresh_end(rtable *t, struct channel *c)
1469 {
1470   int prune = 0;
1471
1472   FIB_WALK(&t->fib, net, n)
1473     {
1474       rte *e;
1475       for (e = n->routes; e; e = e->next)
1476         if ((e->sender == c) && (e->flags & REF_STALE))
1477           {
1478             e->flags |= REF_DISCARD;
1479             prune = 1;
1480           }
1481     }
1482   FIB_WALK_END;
1483
1484   if (prune)
1485     rt_schedule_prune(t);
1486 }
1487
1488
1489 /**
1490  * rte_dump - dump a route
1491  * @e: &rte to be dumped
1492  *
1493  * This functions dumps contents of a &rte to debug output.
1494  */
1495 void
1496 rte_dump(rte *e)
1497 {
1498   net *n = e->net;
1499   debug("%-1N ", n->n.addr);
1500   debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod);
1501   rta_dump(e->attrs);
1502   if (e->attrs->src->proto->proto->dump_attrs)
1503     e->attrs->src->proto->proto->dump_attrs(e);
1504   debug("\n");
1505 }
1506
1507 /**
1508  * rt_dump - dump a routing table
1509  * @t: routing table to be dumped
1510  *
1511  * This function dumps contents of a given routing table to debug output.
1512  */
1513 void
1514 rt_dump(rtable *t)
1515 {
1516   debug("Dump of routing table <%s>\n", t->name);
1517 #ifdef DEBUGGING
1518   fib_check(&t->fib);
1519 #endif
1520   FIB_WALK(&t->fib, net, n)
1521     {
1522       rte *e;
1523       for(e=n->routes; e; e=e->next)
1524         rte_dump(e);
1525     }
1526   FIB_WALK_END;
1527   debug("\n");
1528 }
1529
1530 /**
1531  * rt_dump_all - dump all routing tables
1532  *
1533  * This function dumps contents of all routing tables to debug output.
1534  */
1535 void
1536 rt_dump_all(void)
1537 {
1538   rtable *t;
1539
1540   WALK_LIST(t, routing_tables)
1541     rt_dump(t);
1542 }
1543
1544 static inline void
1545 rt_schedule_hcu(rtable *tab)
1546 {
1547   if (tab->hcu_scheduled)
1548     return;
1549
1550   tab->hcu_scheduled = 1;
1551   ev_schedule(tab->rt_event);
1552 }
1553
1554 static inline void
1555 rt_schedule_nhu(rtable *tab)
1556 {
1557   if (tab->nhu_state == NHU_CLEAN)
1558     ev_schedule(tab->rt_event);
1559
1560   /* state change:
1561    *   NHU_CLEAN   -> NHU_SCHEDULED
1562    *   NHU_RUNNING -> NHU_DIRTY
1563    */
1564   tab->nhu_state |= NHU_SCHEDULED;
1565 }
1566
1567 void
1568 rt_schedule_prune(rtable *tab)
1569 {
1570   if (tab->prune_state == 0)
1571     ev_schedule(tab->rt_event);
1572
1573   /* state change 0->1, 2->3 */
1574   tab->prune_state |= 1;
1575 }
1576
1577
1578 static void
1579 rt_event(void *ptr)
1580 {
1581   rtable *tab = ptr;
1582
1583   rt_lock_table(tab);
1584
1585   if (tab->hcu_scheduled)
1586     rt_update_hostcache(tab);
1587
1588   if (tab->nhu_state)
1589     rt_next_hop_update(tab);
1590
1591   if (tab->prune_state)
1592     rt_prune_table(tab);
1593
1594   rt_unlock_table(tab);
1595 }
1596
1597 void
1598 rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
1599 {
1600   bzero(t, sizeof(*t));
1601   t->name = name;
1602   t->config = cf;
1603   t->addr_type = cf ? cf->addr_type : NET_IP4;
1604   fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1605   init_list(&t->channels);
1606
1607   if (cf)
1608     {
1609       t->rt_event = ev_new(p);
1610       t->rt_event->hook = rt_event;
1611       t->rt_event->data = t;
1612       t->gc_time = now;
1613     }
1614 }
1615
1616 /**
1617  * rt_init - initialize routing tables
1618  *
1619  * This function is called during BIRD startup. It initializes the
1620  * routing table module.
1621  */
1622 void
1623 rt_init(void)
1624 {
1625   rta_init();
1626   rt_table_pool = rp_new(&root_pool, "Routing tables");
1627   rte_update_pool = lp_new_default(rt_table_pool);
1628   rte_slab = sl_new(rt_table_pool, sizeof(rte));
1629   init_list(&routing_tables);
1630 }
1631
1632
1633 /**
1634  * rt_prune_table - prune a routing table
1635  *
1636  * The prune loop scans routing tables and removes routes belonging to flushing
1637  * protocols, discarded routes and also stale network entries. It is called from
1638  * rt_event(). The event is rescheduled if the current iteration do not finish
1639  * the table. The pruning is directed by the prune state (@prune_state),
1640  * specifying whether the prune cycle is scheduled or running, and there
1641  * is also a persistent pruning iterator (@prune_fit).
1642  *
1643  * The prune loop is used also for channel flushing. For this purpose, the
1644  * channels to flush are marked before the iteration and notified after the
1645  * iteration.
1646  */
1647 static void
1648 rt_prune_table(rtable *tab)
1649 {
1650   struct fib_iterator *fit = &tab->prune_fit;
1651   int limit = 512;
1652
1653   struct channel *c;
1654   node *n, *x;
1655
1656   DBG("Pruning route table %s\n", tab->name);
1657 #ifdef DEBUGGING
1658   fib_check(&tab->fib);
1659 #endif
1660
1661   if (tab->prune_state == 0)
1662     return;
1663
1664   if (tab->prune_state == 1)
1665   {
1666     /* Mark channels to flush */
1667     WALK_LIST2(c, n, tab->channels, table_node)
1668       if (c->channel_state == CS_FLUSHING)
1669         c->flush_active = 1;
1670
1671     FIB_ITERATE_INIT(fit, &tab->fib);
1672     tab->prune_state = 2;
1673   }
1674
1675 again:
1676   FIB_ITERATE_START(&tab->fib, fit, net, n)
1677     {
1678       rte *e;
1679
1680     rescan:
1681       for (e=n->routes; e; e=e->next)
1682         if (e->sender->flush_active || (e->flags & REF_DISCARD))
1683           {
1684             if (limit <= 0)
1685               {
1686                 FIB_ITERATE_PUT(fit);
1687                 ev_schedule(tab->rt_event);
1688                 return;
1689               }
1690
1691             rte_discard(e);
1692             limit--;
1693
1694             goto rescan;
1695           }
1696
1697       if (!n->routes)           /* Orphaned FIB entry */
1698         {
1699           FIB_ITERATE_PUT(fit);
1700           fib_delete(&tab->fib, n);
1701           goto again;
1702         }
1703     }
1704   FIB_ITERATE_END;
1705
1706 #ifdef DEBUGGING
1707   fib_check(&tab->fib);
1708 #endif
1709
1710   tab->gc_counter = 0;
1711   tab->gc_time = now;
1712
1713   /* state change 2->0, 3->1 */
1714   tab->prune_state &= 1;
1715
1716   if (tab->prune_state > 0)
1717     ev_schedule(tab->rt_event);
1718
1719   /* FIXME: This should be handled in a better way */
1720   rt_prune_sources();
1721
1722   /* Close flushed channels */
1723   WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1724     if (c->flush_active)
1725       {
1726         c->flush_active = 0;
1727         channel_set_state(c, CS_DOWN);
1728       }
1729
1730   return;
1731 }
1732
1733 void
1734 rt_preconfig(struct config *c)
1735 {
1736   init_list(&c->tables);
1737
1738   rt_new_table(cf_get_symbol("master4"), NET_IP4);
1739   rt_new_table(cf_get_symbol("master6"), NET_IP6);
1740 }
1741
1742
1743 /*
1744  * Some functions for handing internal next hop updates
1745  * triggered by rt_schedule_nhu().
1746  */
1747
1748 static inline int
1749 rta_next_hop_outdated(rta *a)
1750 {
1751   struct hostentry *he = a->hostentry;
1752
1753   if (!he)
1754     return 0;
1755
1756   if (!he->src)
1757     return a->dest != RTD_UNREACHABLE;
1758
1759   return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1760     (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
1761 }
1762
1763 void
1764 rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
1765 {
1766   a->hostentry = he;
1767   a->dest = he->dest;
1768   a->igp_metric = he->igp_metric;
1769
1770   if (a->dest != RTD_UNICAST)
1771   {
1772     /* No nexthop */
1773 no_nexthop:
1774     a->nh = (struct nexthop) {};
1775     if (mls)
1776     { /* Store the label stack for later changes */
1777       a->nh.labels_orig = a->nh.labels = mls->len;
1778       memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
1779     }
1780     return;
1781   }
1782
1783   if (((!mls) || (!mls->len)) && he->nexthop_linkable)
1784   { /* Just link the nexthop chain, no label append happens. */
1785     memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
1786     return;
1787   }
1788
1789   struct nexthop *nhp = NULL, *nhr = NULL;
1790   int skip_nexthop = 0;
1791
1792   for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
1793   {
1794     if (skip_nexthop)
1795       skip_nexthop--;
1796     else
1797     {
1798       nhr = nhp;
1799       nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
1800     }
1801
1802     nhp->iface = nh->iface;
1803     nhp->weight = nh->weight;
1804     if (mls)
1805     {
1806       nhp->labels = nh->labels + mls->len;
1807       nhp->labels_orig = mls->len;
1808       if (nhp->labels <= MPLS_MAX_LABEL_STACK)
1809       {
1810         memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
1811         memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
1812       }
1813       else
1814       {
1815         log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
1816             nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
1817         skip_nexthop++;
1818         continue;
1819       }
1820     }
1821     if (ipa_nonzero(nh->gw))
1822       nhp->gw = nh->gw;         /* Router nexthop */
1823     else if (ipa_nonzero(he->link))
1824       nhp->gw = he->link;               /* Device nexthop with link-local address known */
1825     else
1826       nhp->gw = he->addr;               /* Device nexthop with link-local address unknown */
1827   }
1828
1829   if (skip_nexthop)
1830     if (nhr)
1831       nhr->next = NULL;
1832     else
1833     {
1834       a->dest = RTD_UNREACHABLE;
1835       log(L_WARN "No valid nexthop remaining, setting route unreachable");
1836       goto no_nexthop;
1837     }
1838 }
1839
1840 static inline rte *
1841 rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
1842 {
1843   rta *a = alloca(RTA_MAX_SIZE);
1844   memcpy(a, old->attrs, rta_size(old->attrs));
1845
1846   mpls_label_stack mls = { .len = a->nh.labels_orig };
1847   memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
1848
1849   rta_apply_hostentry(a, old->attrs->hostentry, &mls);
1850   a->aflags = 0;
1851
1852   rte *e = sl_alloc(rte_slab);
1853   memcpy(e, old, sizeof(rte));
1854   e->attrs = rta_lookup(a);
1855
1856   return e;
1857 }
1858
1859 static inline int
1860 rt_next_hop_update_net(rtable *tab, net *n)
1861 {
1862   rte **k, *e, *new, *old_best, **new_best;
1863   int count = 0;
1864   int free_old_best = 0;
1865
1866   old_best = n->routes;
1867   if (!old_best)
1868     return 0;
1869
1870   for (k = &n->routes; e = *k; k = &e->next)
1871     if (rta_next_hop_outdated(e->attrs))
1872       {
1873         new = rt_next_hop_update_rte(tab, e);
1874         *k = new;
1875
1876         rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1877         rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1878
1879         /* Call a pre-comparison hook */
1880         /* Not really an efficient way to compute this */
1881         if (e->attrs->src->proto->rte_recalculate)
1882           e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1883
1884         if (e != old_best)
1885           rte_free_quick(e);
1886         else /* Freeing of the old best rte is postponed */
1887           free_old_best = 1;
1888
1889         e = new;
1890         count++;
1891       }
1892
1893   if (!count)
1894     return 0;
1895
1896   /* Find the new best route */
1897   new_best = NULL;
1898   for (k = &n->routes; e = *k; k = &e->next)
1899     {
1900       if (!new_best || rte_better(e, *new_best))
1901         new_best = k;
1902     }
1903
1904   /* Relink the new best route to the first position */
1905   new = *new_best;
1906   if (new != n->routes)
1907     {
1908       *new_best = new->next;
1909       new->next = n->routes;
1910       n->routes = new;
1911     }
1912
1913   /* Announce the new best route */
1914   if (new != old_best)
1915     {
1916       rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
1917       rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
1918     }
1919
1920   /* FIXME: Better announcement of merged routes */
1921   rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
1922
1923   if (free_old_best)
1924     rte_free_quick(old_best);
1925
1926   return count;
1927 }
1928
1929 static void
1930 rt_next_hop_update(rtable *tab)
1931 {
1932   struct fib_iterator *fit = &tab->nhu_fit;
1933   int max_feed = 32;
1934
1935   if (tab->nhu_state == NHU_CLEAN)
1936     return;
1937
1938   if (tab->nhu_state == NHU_SCHEDULED)
1939     {
1940       FIB_ITERATE_INIT(fit, &tab->fib);
1941       tab->nhu_state = NHU_RUNNING;
1942     }
1943
1944   FIB_ITERATE_START(&tab->fib, fit, net, n)
1945     {
1946       if (max_feed <= 0)
1947         {
1948           FIB_ITERATE_PUT(fit);
1949           ev_schedule(tab->rt_event);
1950           return;
1951         }
1952       max_feed -= rt_next_hop_update_net(tab, n);
1953     }
1954   FIB_ITERATE_END;
1955
1956   /* State change:
1957    *   NHU_DIRTY   -> NHU_SCHEDULED
1958    *   NHU_RUNNING -> NHU_CLEAN
1959    */
1960   tab->nhu_state &= 1;
1961
1962   if (tab->nhu_state != NHU_CLEAN)
1963     ev_schedule(tab->rt_event);
1964 }
1965
1966
1967 struct rtable_config *
1968 rt_new_table(struct symbol *s, uint addr_type)
1969 {
1970   /* Hack that allows to 'redefine' the master table */
1971   if ((s->class == SYM_TABLE) &&
1972       (s->def == new_config->def_tables[addr_type]) &&
1973       ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
1974     return s->def;
1975
1976   struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
1977
1978   cf_define_symbol(s, SYM_TABLE, c);
1979   c->name = s->name;
1980   c->addr_type = addr_type;
1981   c->gc_max_ops = 1000;
1982   c->gc_min_time = 5;
1983
1984   add_tail(&new_config->tables, &c->n);
1985
1986   /* First table of each type is kept as default */
1987   if (! new_config->def_tables[addr_type])
1988     new_config->def_tables[addr_type] = c;
1989
1990   return c;
1991 }
1992
1993 /**
1994  * rt_lock_table - lock a routing table
1995  * @r: routing table to be locked
1996  *
1997  * Lock a routing table, because it's in use by a protocol,
1998  * preventing it from being freed when it gets undefined in a new
1999  * configuration.
2000  */
2001 void
2002 rt_lock_table(rtable *r)
2003 {
2004   r->use_count++;
2005 }
2006
2007 /**
2008  * rt_unlock_table - unlock a routing table
2009  * @r: routing table to be unlocked
2010  *
2011  * Unlock a routing table formerly locked by rt_lock_table(),
2012  * that is decrease its use count and delete it if it's scheduled
2013  * for deletion by configuration changes.
2014  */
2015 void
2016 rt_unlock_table(rtable *r)
2017 {
2018   if (!--r->use_count && r->deleted)
2019     {
2020       struct config *conf = r->deleted;
2021       DBG("Deleting routing table %s\n", r->name);
2022       r->config->table = NULL;
2023       if (r->hostcache)
2024         rt_free_hostcache(r);
2025       rem_node(&r->n);
2026       fib_free(&r->fib);
2027       rfree(r->rt_event);
2028       mb_free(r);
2029       config_del_obstacle(conf);
2030     }
2031 }
2032
2033 /**
2034  * rt_commit - commit new routing table configuration
2035  * @new: new configuration
2036  * @old: original configuration or %NULL if it's boot time config
2037  *
2038  * Scan differences between @old and @new configuration and modify
2039  * the routing tables according to these changes. If @new defines a
2040  * previously unknown table, create it, if it omits a table existing
2041  * in @old, schedule it for deletion (it gets deleted when all protocols
2042  * disconnect from it by calling rt_unlock_table()), if it exists
2043  * in both configurations, leave it unchanged.
2044  */
2045 void
2046 rt_commit(struct config *new, struct config *old)
2047 {
2048   struct rtable_config *o, *r;
2049
2050   DBG("rt_commit:\n");
2051   if (old)
2052     {
2053       WALK_LIST(o, old->tables)
2054         {
2055           rtable *ot = o->table;
2056           if (!ot->deleted)
2057             {
2058               struct symbol *sym = cf_find_symbol(new, o->name);
2059               if (sym && sym->class == SYM_TABLE && !new->shutdown)
2060                 {
2061                   DBG("\t%s: same\n", o->name);
2062                   r = sym->def;
2063                   r->table = ot;
2064                   ot->name = r->name;
2065                   ot->config = r;
2066                   if (o->sorted != r->sorted)
2067                     log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2068                 }
2069               else
2070                 {
2071                   DBG("\t%s: deleted\n", o->name);
2072                   ot->deleted = old;
2073                   config_add_obstacle(old);
2074                   rt_lock_table(ot);
2075                   rt_unlock_table(ot);
2076                 }
2077             }
2078         }
2079     }
2080
2081   WALK_LIST(r, new->tables)
2082     if (!r->table)
2083       {
2084         rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2085         DBG("\t%s: created\n", r->name);
2086         rt_setup(rt_table_pool, t, r->name, r);
2087         add_tail(&routing_tables, &t->n);
2088         r->table = t;
2089       }
2090   DBG("\tdone\n");
2091 }
2092
2093 static inline void
2094 do_feed_channel(struct channel *c, net *n, rte *e)
2095 {
2096   rte_update_lock();
2097   if (c->ra_mode == RA_ACCEPTED)
2098     rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2099   else if (c->ra_mode == RA_MERGED)
2100     rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2101   else /* RA_BASIC */
2102     rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
2103   rte_update_unlock();
2104 }
2105
2106 /**
2107  * rt_feed_channel - advertise all routes to a channel
2108  * @c: channel to be fed
2109  *
2110  * This function performs one pass of advertisement of routes to a channel that
2111  * is in the ES_FEEDING state. It is called by the protocol code as long as it
2112  * has something to do. (We avoid transferring all the routes in single pass in
2113  * order not to monopolize CPU time.)
2114  */
2115 int
2116 rt_feed_channel(struct channel *c)
2117 {
2118   struct fib_iterator *fit = &c->feed_fit;
2119   int max_feed = 256;
2120
2121   ASSERT(c->export_state == ES_FEEDING);
2122
2123   if (!c->feed_active)
2124     {
2125       FIB_ITERATE_INIT(fit, &c->table->fib);
2126       c->feed_active = 1;
2127     }
2128
2129   FIB_ITERATE_START(&c->table->fib, fit, net, n)
2130     {
2131       rte *e = n->routes;
2132       if (max_feed <= 0)
2133         {
2134           FIB_ITERATE_PUT(fit);
2135           return 0;
2136         }
2137
2138       /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2139
2140       if ((c->ra_mode == RA_OPTIMAL) ||
2141           (c->ra_mode == RA_ACCEPTED) ||
2142           (c->ra_mode == RA_MERGED))
2143         if (rte_is_valid(e))
2144           {
2145             /* In the meantime, the protocol may fell down */
2146             if (c->export_state != ES_FEEDING)
2147               goto done;
2148
2149             do_feed_channel(c, n, e);
2150             max_feed--;
2151           }
2152
2153       if (c->ra_mode == RA_ANY)
2154         for(e = n->routes; e; e = e->next)
2155           {
2156             /* In the meantime, the protocol may fell down */
2157             if (c->export_state != ES_FEEDING)
2158               goto done;
2159
2160             if (!rte_is_valid(e))
2161               continue;
2162
2163             do_feed_channel(c, n, e);
2164             max_feed--;
2165           }
2166     }
2167   FIB_ITERATE_END;
2168
2169 done:
2170   c->feed_active = 0;
2171   return 1;
2172 }
2173
2174 /**
2175  * rt_feed_baby_abort - abort protocol feeding
2176  * @c: channel
2177  *
2178  * This function is called by the protocol code when the protocol stops or
2179  * ceases to exist during the feeding.
2180  */
2181 void
2182 rt_feed_channel_abort(struct channel *c)
2183 {
2184   if (c->feed_active)
2185     {
2186       /* Unlink the iterator */
2187       fit_get(&c->table->fib, &c->feed_fit);
2188       c->feed_active = 0;
2189     }
2190 }
2191
2192 static inline unsigned
2193 ptr_hash(void *ptr)
2194 {
2195   uintptr_t p = (uintptr_t) ptr;
2196   return p ^ (p << 8) ^ (p >> 16);
2197 }
2198
2199 static inline u32
2200 hc_hash(ip_addr a, rtable *dep)
2201 {
2202   return ipa_hash(a) ^ ptr_hash(dep);
2203 }
2204
2205 static inline void
2206 hc_insert(struct hostcache *hc, struct hostentry *he)
2207 {
2208   uint k = he->hash_key >> hc->hash_shift;
2209   he->next = hc->hash_table[k];
2210   hc->hash_table[k] = he;
2211 }
2212
2213 static inline void
2214 hc_remove(struct hostcache *hc, struct hostentry *he)
2215 {
2216   struct hostentry **hep;
2217   uint k = he->hash_key >> hc->hash_shift;
2218
2219   for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2220   *hep = he->next;
2221 }
2222
2223 #define HC_DEF_ORDER 10
2224 #define HC_HI_MARK *4
2225 #define HC_HI_STEP 2
2226 #define HC_HI_ORDER 16                  /* Must be at most 16 */
2227 #define HC_LO_MARK /5
2228 #define HC_LO_STEP 2
2229 #define HC_LO_ORDER 10
2230
2231 static void
2232 hc_alloc_table(struct hostcache *hc, unsigned order)
2233 {
2234   uint hsize = 1 << order;
2235   hc->hash_order = order;
2236   hc->hash_shift = 32 - order;
2237   hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2238   hc->hash_min = (order <= HC_LO_ORDER) ?  0U : (hsize HC_LO_MARK);
2239
2240   hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2241 }
2242
2243 static void
2244 hc_resize(struct hostcache *hc, unsigned new_order)
2245 {
2246   struct hostentry **old_table = hc->hash_table;
2247   struct hostentry *he, *hen;
2248   uint old_size = 1 << hc->hash_order;
2249   uint i;
2250
2251   hc_alloc_table(hc, new_order);
2252   for (i = 0; i < old_size; i++)
2253     for (he = old_table[i]; he != NULL; he=hen)
2254       {
2255         hen = he->next;
2256         hc_insert(hc, he);
2257       }
2258   mb_free(old_table);
2259 }
2260
2261 static struct hostentry *
2262 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2263 {
2264   struct hostentry *he = sl_alloc(hc->slab);
2265
2266   *he = (struct hostentry) {
2267     .addr = a,
2268     .link = ll,
2269     .tab = dep,
2270     .hash_key = k,
2271   };
2272
2273   add_tail(&hc->hostentries, &he->ln);
2274   hc_insert(hc, he);
2275
2276   hc->hash_items++;
2277   if (hc->hash_items > hc->hash_max)
2278     hc_resize(hc, hc->hash_order + HC_HI_STEP);
2279
2280   return he;
2281 }
2282
2283 static void
2284 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2285 {
2286   rta_free(he->src);
2287
2288   rem_node(&he->ln);
2289   hc_remove(hc, he);
2290   sl_free(hc->slab, he);
2291
2292   hc->hash_items--;
2293   if (hc->hash_items < hc->hash_min)
2294     hc_resize(hc, hc->hash_order - HC_LO_STEP);
2295 }
2296
2297 static void
2298 rt_init_hostcache(rtable *tab)
2299 {
2300   struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2301   init_list(&hc->hostentries);
2302
2303   hc->hash_items = 0;
2304   hc_alloc_table(hc, HC_DEF_ORDER);
2305   hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2306
2307   hc->lp = lp_new(rt_table_pool, LP_GOOD_SIZE(1024));
2308   hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2309
2310   tab->hostcache = hc;
2311 }
2312
2313 static void
2314 rt_free_hostcache(rtable *tab)
2315 {
2316   struct hostcache *hc = tab->hostcache;
2317
2318   node *n;
2319   WALK_LIST(n, hc->hostentries)
2320     {
2321       struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2322       rta_free(he->src);
2323
2324       if (he->uc)
2325         log(L_ERR "Hostcache is not empty in table %s", tab->name);
2326     }
2327
2328   rfree(hc->slab);
2329   rfree(hc->lp);
2330   mb_free(hc->hash_table);
2331   mb_free(hc);
2332 }
2333
2334 static void
2335 rt_notify_hostcache(rtable *tab, net *net)
2336 {
2337   if (tab->hcu_scheduled)
2338     return;
2339
2340   if (trie_match_net(tab->hostcache->trie, net->n.addr))
2341     rt_schedule_hcu(tab);
2342 }
2343
2344 static int
2345 if_local_addr(ip_addr a, struct iface *i)
2346 {
2347   struct ifa *b;
2348
2349   WALK_LIST(b, i->addrs)
2350     if (ipa_equal(a, b->ip))
2351       return 1;
2352
2353   return 0;
2354 }
2355
2356 static u32
2357 rt_get_igp_metric(rte *rt)
2358 {
2359   eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2360
2361   if (ea)
2362     return ea->u.data;
2363
2364   rta *a = rt->attrs;
2365
2366 #ifdef CONFIG_OSPF
2367   if ((a->source == RTS_OSPF) ||
2368       (a->source == RTS_OSPF_IA) ||
2369       (a->source == RTS_OSPF_EXT1))
2370     return rt->u.ospf.metric1;
2371 #endif
2372
2373 #ifdef CONFIG_RIP
2374   if (a->source == RTS_RIP)
2375     return rt->u.rip.metric;
2376 #endif
2377
2378   if (a->source == RTS_DEVICE)
2379     return 0;
2380
2381   return IGP_METRIC_UNKNOWN;
2382 }
2383
2384 static int
2385 rt_update_hostentry(rtable *tab, struct hostentry *he)
2386 {
2387   rta *old_src = he->src;
2388   int pxlen = 0;
2389
2390   /* Reset the hostentry */
2391   he->src = NULL;
2392   he->nexthop_linkable = 0;
2393   he->dest = RTD_UNREACHABLE;
2394   he->igp_metric = 0;
2395
2396   net_addr he_addr;
2397   net_fill_ip_host(&he_addr, he->addr);
2398   net *n = net_route(tab, &he_addr);
2399   if (n)
2400     {
2401       rte *e = n->routes;
2402       rta *a = e->attrs;
2403       pxlen = n->n.addr->pxlen;
2404
2405       if (a->hostentry)
2406         {
2407           /* Recursive route should not depend on another recursive route */
2408           log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2409               he->addr, n->n.addr);
2410           goto done;
2411         }
2412
2413       he->dest = a->dest;
2414       he->nexthop_linkable = 1;
2415       if (he->dest == RTD_UNICAST)
2416         {
2417           for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
2418             if (ipa_zero(nh->gw))
2419               {
2420                 if (if_local_addr(he->addr, nh->iface))
2421                   {
2422                     /* The host address is a local address, this is not valid */
2423                     log(L_WARN "Next hop address %I is a local address of iface %s",
2424                         he->addr, nh->iface->name);
2425                     goto done;
2426                   }
2427
2428                 he->nexthop_linkable = 0;
2429                 break;
2430               }
2431         }
2432
2433       he->src = rta_clone(a);
2434       he->igp_metric = rt_get_igp_metric(e);
2435     }
2436
2437 done:
2438   /* Add a prefix range to the trie */
2439   trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2440
2441   rta_free(old_src);
2442   return old_src != he->src;
2443 }
2444
2445 static void
2446 rt_update_hostcache(rtable *tab)
2447 {
2448   struct hostcache *hc = tab->hostcache;
2449   struct hostentry *he;
2450   node *n, *x;
2451
2452   /* Reset the trie */
2453   lp_flush(hc->lp);
2454   hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2455
2456   WALK_LIST_DELSAFE(n, x, hc->hostentries)
2457     {
2458       he = SKIP_BACK(struct hostentry, ln, n);
2459       if (!he->uc)
2460         {
2461           hc_delete_hostentry(hc, he);
2462           continue;
2463         }
2464
2465       if (rt_update_hostentry(tab, he))
2466         rt_schedule_nhu(he->tab);
2467     }
2468
2469   tab->hcu_scheduled = 0;
2470 }
2471
2472 struct hostentry *
2473 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2474 {
2475   struct hostentry *he;
2476
2477   if (!tab->hostcache)
2478     rt_init_hostcache(tab);
2479
2480   u32 k = hc_hash(a, dep);
2481   struct hostcache *hc = tab->hostcache;
2482   for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2483     if (ipa_equal(he->addr, a) && (he->tab == dep))
2484       return he;
2485
2486   he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
2487   rt_update_hostentry(tab, he);
2488   return he;
2489 }
2490
2491
2492 /*
2493  *  Documentation for functions declared inline in route.h
2494  */
2495 #if 0
2496
2497 /**
2498  * net_find - find a network entry
2499  * @tab: a routing table
2500  * @addr: address of the network
2501  *
2502  * net_find() looks up the given network in routing table @tab and
2503  * returns a pointer to its &net entry or %NULL if no such network
2504  * exists.
2505  */
2506 static inline net *net_find(rtable *tab, net_addr *addr)
2507 { DUMMY; }
2508
2509 /**
2510  * net_get - obtain a network entry
2511  * @tab: a routing table
2512  * @addr: address of the network
2513  *
2514  * net_get() looks up the given network in routing table @tab and
2515  * returns a pointer to its &net entry. If no such entry exists, it's
2516  * created.
2517  */
2518 static inline net *net_get(rtable *tab, net_addr *addr)
2519 { DUMMY; }
2520
2521 /**
2522  * rte_cow - copy a route for writing
2523  * @r: a route entry to be copied
2524  *
2525  * rte_cow() takes a &rte and prepares it for modification. The exact action
2526  * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2527  * just returned unchanged, else a new temporary entry with the same contents
2528  * is created.
2529  *
2530  * The primary use of this function is inside the filter machinery -- when
2531  * a filter wants to modify &rte contents (to change the preference or to
2532  * attach another set of attributes), it must ensure that the &rte is not
2533  * shared with anyone else (and especially that it isn't stored in any routing
2534  * table).
2535  *
2536  * Result: a pointer to the new writable &rte.
2537  */
2538 static inline rte * rte_cow(rte *r)
2539 { DUMMY; }
2540
2541 #endif