nest/rt-table.c

   1 /*
   2  *      BIRD -- Routing Tables
   3  *
   4  *      (c) 1998--2000 Martin Mares <mj@ucw.cz>
   5  *
   6  *      Can be freely distributed and used under the terms of the GNU GPL.
   7  */
   8
   9 /**
  10  * DOC: Routing tables
  11  *
  12  * Routing tables are probably the most important structures BIRD uses. They
  13  * hold all the information about known networks, the associated routes and
  14  * their attributes.
  15  *
  16  * There are multiple routing tables (a primary one together with any
  17  * number of secondary ones if requested by the configuration). Each table
  18  * is basically a FIB containing entries describing the individual
  19  * destination networks. For each network (represented by structure &net),
  20  * there is a one-way linked list of route entries (&rte), the first entry
  21  * on the list being the best one (i.e., the one we currently use
  22  * for routing), the order of the other ones is undetermined.
  23  *
  24  * The &rte contains information specific to the route (preference, protocol
  25  * metrics, time of last modification etc.) and a pointer to a &rta structure
  26  * (see the route attribute module for a precise explanation) holding the
  27  * remaining route attributes which are expected to be shared by multiple
  28  * routes in order to conserve memory.
  29  */
  30
  31 #undef LOCAL_DEBUG
  32
  33 #include "nest/bird.h"
  34 #include "nest/route.h"
  35 #include "nest/protocol.h"
  36 #include "nest/iface.h"
  37 #include "lib/resource.h"
  38 #include "lib/event.h"
  39 #include "lib/string.h"
  40 #include "conf/conf.h"
  41 #include "filter/filter.h"
  42 #include "lib/hash.h"
  43 #include "lib/string.h"
  44 #include "lib/alloca.h"
  45
  46 pool *rt_table_pool;
  47
  48 static slab *rte_slab;
  49 static linpool *rte_update_pool;
  50
  51 list routing_tables;
  52
  53 static void rt_free_hostcache(rtable *tab);
  54 static void rt_notify_hostcache(rtable *tab, net *net);
  55 static void rt_update_hostcache(rtable *tab);
  56 static void rt_next_hop_update(rtable *tab);
  57 static inline void rt_prune_table(rtable *tab);
  58
  59
  60 /* Like fib_route(), but skips empty net entries */
  61 static inline void *
  62 net_route_ip4(rtable *t, net_addr_ip4 *n)
  63 {
  64   net *r;
  65
  66   while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
  67   {
  68     n->pxlen--;
  69     ip4_clrbit(&n->prefix, n->pxlen);
  70   }
  71
  72   return r;
  73 }
  74
  75 static inline void *
  76 net_route_ip6(rtable *t, net_addr_ip6 *n)
  77 {
  78   net *r;
  79
  80   while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
  81   {
  82     n->pxlen--;
  83     ip6_clrbit(&n->prefix, n->pxlen);
  84   }
  85
  86   return r;
  87 }
  88
  89 static inline void *
  90 net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n)
  91 {
  92   struct fib_node *fn;
  93
  94   while (1)
  95   {
  96     net *best = NULL;
  97     int best_pxlen = 0;
  98
  99     /* We need to do dst first matching. Since sadr addresses are hashed on dst
 100        prefix only, find the hash table chain and go through it to find the
 101        match with the smallest matching src prefix. */
 102     for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next)
 103     {
 104       net_addr_ip6_sadr *a = (void *) fn->addr;
 105
 106       if (net_equal_dst_ip6_sadr(n, a) &&
 107           net_in_net_src_ip6_sadr(n, a) &&
 108           (a->src_pxlen >= best_pxlen))
 109       {
 110         best = fib_node_to_user(&t->fib, fn);
 111         best_pxlen = a->src_pxlen;
 112       }
 113     }
 114
 115     if (best)
 116       return best;
 117
 118     if (!n->dst_pxlen)
 119       break;
 120
 121     n->dst_pxlen--;
 122     ip6_clrbit(&n->dst_prefix, n->dst_pxlen);
 123   }
 124
 125   return NULL;
 126 }
 127
 128 void *
 129 net_route(rtable *tab, const net_addr *n)
 130 {
 131   ASSERT(tab->addr_type == n->type);
 132
 133   net_addr *n0 = alloca(n->length);
 134   net_copy(n0, n);
 135
 136   switch (n->type)
 137   {
 138   case NET_IP4:
 139   case NET_VPN4:
 140   case NET_ROA4:
 141     return net_route_ip4(tab, (net_addr_ip4 *) n0);
 142
 143   case NET_IP6:
 144   case NET_VPN6:
 145   case NET_ROA6:
 146     return net_route_ip6(tab, (net_addr_ip6 *) n0);
 147
 148   case NET_IP6_SADR:
 149     return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0);
 150
 151   default:
 152     return NULL;
 153   }
 154 }
 155
 156
 157 static int
 158 net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
 159 {
 160   struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
 161   struct fib_node *fn;
 162   int anything = 0;
 163
 164   while (1)
 165   {
 166     for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
 167     {
 168       net_addr_roa4 *roa = (void *) fn->addr;
 169       net *r = fib_node_to_user(&tab->fib, fn);
 170
 171       if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
 172       {
 173         anything = 1;
 174         if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
 175           return ROA_VALID;
 176       }
 177     }
 178
 179     if (n.pxlen == 0)
 180       break;
 181
 182     n.pxlen--;
 183     ip4_clrbit(&n.prefix, n.pxlen);
 184   }
 185
 186   return anything ? ROA_INVALID : ROA_UNKNOWN;
 187 }
 188
 189 static int
 190 net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
 191 {
 192   struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
 193   struct fib_node *fn;
 194   int anything = 0;
 195
 196   while (1)
 197   {
 198     for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
 199     {
 200       net_addr_roa6 *roa = (void *) fn->addr;
 201       net *r = fib_node_to_user(&tab->fib, fn);
 202
 203       if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
 204       {
 205         anything = 1;
 206         if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
 207           return ROA_VALID;
 208       }
 209     }
 210
 211     if (n.pxlen == 0)
 212       break;
 213
 214     n.pxlen--;
 215     ip6_clrbit(&n.prefix, n.pxlen);
 216   }
 217
 218   return anything ? ROA_INVALID : ROA_UNKNOWN;
 219 }
 220
 221 /**
 222  * roa_check - check validity of route origination in a ROA table
 223  * @tab: ROA table
 224  * @n: network prefix to check
 225  * @asn: AS number of network prefix
 226  *
 227  * Implements RFC 6483 route validation for the given network prefix. The
 228  * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
 229  * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
 230  * a candidate ROA with matching ASN and maxlen field greater than or equal to
 231  * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
 232  * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
 233  * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
 234  * must have type NET_IP4 or NET_IP6, respectively.
 235  */
 236 int
 237 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
 238 {
 239   if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
 240     return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
 241   else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
 242     return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
 243   else
 244     return ROA_UNKNOWN; /* Should not happen */
 245 }
 246
 247 /**
 248  * rte_find - find a route
 249  * @net: network node
 250  * @src: route source
 251  *
 252  * The rte_find() function returns a route for destination @net
 253  * which is from route source @src.
 254  */
 255 rte *
 256 rte_find(net *net, struct rte_src *src)
 257 {
 258   rte *e = net->routes;
 259
 260   while (e && e->attrs->src != src)
 261     e = e->next;
 262   return e;
 263 }
 264
 265 /**
 266  * rte_get_temp - get a temporary &rte
 267  * @a: attributes to assign to the new route (a &rta; in case it's
 268  * un-cached, rte_update() will create a cached copy automatically)
 269  *
 270  * Create a temporary &rte and bind it with the attributes @a.
 271  * Also set route preference to the default preference set for
 272  * the protocol.
 273  */
 274 rte *
 275 rte_get_temp(rta *a)
 276 {
 277   rte *e = sl_alloc(rte_slab);
 278
 279   e->attrs = a;
 280   e->flags = 0;
 281   e->pref = 0;
 282   return e;
 283 }
 284
 285 rte *
 286 rte_do_cow(rte *r)
 287 {
 288   rte *e = sl_alloc(rte_slab);
 289
 290   memcpy(e, r, sizeof(rte));
 291   e->attrs = rta_clone(r->attrs);
 292   e->flags = 0;
 293   return e;
 294 }
 295
 296 /**
 297  * rte_cow_rta - get a private writable copy of &rte with writable &rta
 298  * @r: a route entry to be copied
 299  * @lp: a linpool from which to allocate &rta
 300  *
 301  * rte_cow_rta() takes a &rte and prepares it and associated &rta for
 302  * modification. There are three possibilities: First, both &rte and &rta are
 303  * private copies, in that case they are returned unchanged.  Second, &rte is
 304  * private copy, but &rta is cached, in that case &rta is duplicated using
 305  * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
 306  * both structures are duplicated by rte_do_cow() and rta_do_cow().
 307  *
 308  * Note that in the second case, cached &rta loses one reference, while private
 309  * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
 310  * nexthops, ...) with it. To work properly, original shared &rta should have
 311  * another reference during the life of created private copy.
 312  *
 313  * Result: a pointer to the new writable &rte with writable &rta.
 314  */
 315 rte *
 316 rte_cow_rta(rte *r, linpool *lp)
 317 {
 318   if (!rta_is_cached(r->attrs))
 319     return r;
 320
 321   r = rte_cow(r);
 322   rta *a = rta_do_cow(r->attrs, lp);
 323   rta_free(r->attrs);
 324   r->attrs = a;
 325   return r;
 326 }
 327
 328 static int                              /* Actually better or at least as good as */
 329 rte_better(rte *new, rte *old)
 330 {
 331   int (*better)(rte *, rte *);
 332
 333   if (!rte_is_valid(old))
 334     return 1;
 335   if (!rte_is_valid(new))
 336     return 0;
 337
 338   if (new->pref > old->pref)
 339     return 1;
 340   if (new->pref < old->pref)
 341     return 0;
 342   if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
 343     {
 344       /*
 345        *  If the user has configured protocol preferences, so that two different protocols
 346        *  have the same preference, try to break the tie by comparing addresses. Not too
 347        *  useful, but keeps the ordering of routes unambiguous.
 348        */
 349       return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
 350     }
 351   if (better = new->attrs->src->proto->rte_better)
 352     return better(new, old);
 353   return 0;
 354 }
 355
 356 static int
 357 rte_mergable(rte *pri, rte *sec)
 358 {
 359   int (*mergable)(rte *, rte *);
 360
 361   if (!rte_is_valid(pri) || !rte_is_valid(sec))
 362     return 0;
 363
 364   if (pri->pref != sec->pref)
 365     return 0;
 366
 367   if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
 368     return 0;
 369
 370   if (mergable = pri->attrs->src->proto->rte_mergable)
 371     return mergable(pri, sec);
 372
 373   return 0;
 374 }
 375
 376 static void
 377 rte_trace(struct proto *p, rte *e, int dir, char *msg)
 378 {
 379   log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest));
 380 }
 381
 382 static inline void
 383 rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
 384 {
 385   if (p->debug & flag)
 386     rte_trace(p, e, '>', msg);
 387 }
 388
 389 static inline void
 390 rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
 391 {
 392   if (p->debug & flag)
 393     rte_trace(p, e, '<', msg);
 394 }
 395
 396 static rte *
 397 export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent)
 398 {
 399   struct proto *p = c->proto;
 400   struct filter *filter = c->out_filter;
 401   struct proto_stats *stats = &c->stats;
 402   rte *rt;
 403   int v;
 404
 405   rt = rt0;
 406   *rt_free = NULL;
 407
 408   rte_make_tmp_attrs(&rt, pool);
 409
 410   v = p->preexport ? p->preexport(p, &rt, pool) : 0;
 411   if (v < 0)
 412     {
 413       if (silent)
 414         goto reject;
 415
 416       stats->exp_updates_rejected++;
 417       if (v == RIC_REJECT)
 418         rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
 419       goto reject;
 420     }
 421   if (v > 0)
 422     {
 423       if (!silent)
 424         rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
 425       goto accept;
 426     }
 427
 428   v = filter && ((filter == FILTER_REJECT) ||
 429                  (f_run(filter, &rt, pool,
 430                         (silent ? FF_SILENT : 0)) > F_ACCEPT));
 431   if (v)
 432     {
 433       if (silent)
 434         goto reject;
 435
 436       stats->exp_updates_filtered++;
 437       rte_trace_out(D_FILTERS, p, rt, "filtered out");
 438       goto reject;
 439     }
 440
 441  accept:
 442   if (rt != rt0)
 443     *rt_free = rt;
 444   return rt;
 445
 446  reject:
 447   /* Discard temporary rte */
 448   if (rt != rt0)
 449     rte_free(rt);
 450   return NULL;
 451 }
 452
 453 static inline rte *
 454 export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent)
 455 {
 456   return export_filter_(c, rt0, rt_free, rte_update_pool, silent);
 457 }
 458
 459 static void
 460 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed)
 461 {
 462   struct proto *p = c->proto;
 463   struct proto_stats *stats = &c->stats;
 464
 465
 466   /*
 467    * First, apply export limit.
 468    *
 469    * Export route limits has several problems. Because exp_routes
 470    * counter is reset before refeed, we don't really know whether
 471    * limit is breached and whether the update is new or not. Therefore
 472    * the number of really exported routes may exceed the limit
 473    * temporarily (routes exported before and new routes in refeed).
 474    *
 475    * Minor advantage is that if the limit is decreased and refeed is
 476    * requested, the number of exported routes really decrease.
 477    *
 478    * Second problem is that with export limits, we don't know whether
 479    * old was really exported (it might be blocked by limit). When a
 480    * withdraw is exported, we announce it even when the previous
 481    * update was blocked. This is not a big issue, but the same problem
 482    * is in updating exp_routes counter. Therefore, to be consistent in
 483    * increases and decreases of exp_routes, we count exported routes
 484    * regardless of blocking by limits.
 485    *
 486    * Similar problem is in handling updates - when a new route is
 487    * received and blocking is active, the route would be blocked, but
 488    * when an update for the route will be received later, the update
 489    * would be propagated (as old != NULL). Therefore, we have to block
 490    * also non-new updates (contrary to import blocking).
 491    */
 492
 493   struct channel_limit *l = &c->out_limit;
 494   if (l->action && new)
 495     {
 496       if ((!old || refeed) && (stats->exp_routes >= l->limit))
 497         channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
 498
 499       if (l->state == PLS_BLOCKED)
 500         {
 501           stats->exp_routes++;  /* see note above */
 502           stats->exp_updates_rejected++;
 503           rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
 504           new = NULL;
 505
 506           if (!old)
 507             return;
 508         }
 509     }
 510
 511
 512   if (new)
 513     stats->exp_updates_accepted++;
 514   else
 515     stats->exp_withdraws_accepted++;
 516
 517   /* Hack: We do not decrease exp_routes during refeed, we instead
 518      reset exp_routes at the start of refeed. */
 519   if (new)
 520     stats->exp_routes++;
 521   if (old && !refeed)
 522     stats->exp_routes--;
 523
 524   if (p->debug & D_ROUTES)
 525     {
 526       if (new && old)
 527         rte_trace_out(D_ROUTES, p, new, "replaced");
 528       else if (new)
 529         rte_trace_out(D_ROUTES, p, new, "added");
 530       else if (old)
 531         rte_trace_out(D_ROUTES, p, old, "removed");
 532     }
 533   p->rt_notify(p, c, net, new, old);
 534 }
 535
 536 static void
 537 rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
 538 {
 539   struct proto *p = c->proto;
 540
 541   rte *new = new0;
 542   rte *old = old0;
 543   rte *new_free = NULL;
 544   rte *old_free = NULL;
 545
 546   if (new)
 547     c->stats.exp_updates_received++;
 548   else
 549     c->stats.exp_withdraws_received++;
 550
 551   /*
 552    * This is a tricky part - we don't know whether route 'old' was exported to
 553    * protocol 'p' or was filtered by the export filter. We try to run the export
 554    * filter to know this to have a correct value in 'old' argument of rte_update
 555    * (and proper filter value).
 556    *
 557    * This is broken because 'configure soft' may change filters but keep routes.
 558    * Refeed cycle is expected to be called after change of the filters and with
 559    * old == new, therefore we do not even try to run the filter on an old route.
 560    * This may lead to 'spurious withdraws' but ensure that there are no 'missing
 561    * withdraws'.
 562    *
 563    * This is not completely safe as there is a window between reconfiguration
 564    * and the end of refeed - if a newly filtered route disappears during this
 565    * period, proper withdraw is not sent (because old would be also filtered)
 566    * and the route is not refeeded (because it disappeared before that).
 567    * Therefore, we also do not try to run the filter on old routes that are
 568    * older than the last filter change.
 569    */
 570
 571   if (new)
 572     new = export_filter(c, new, &new_free, 0);
 573
 574   if (old && !(refeed || (old->lastmod <= c->last_tx_filter_change)))
 575     old = export_filter(c, old, &old_free, 1);
 576
 577   if (!new && !old)
 578   {
 579     /*
 580      * As mentioned above, 'old' value may be incorrect in some race conditions.
 581      * We generally ignore it with the exception of withdraw to pipe protocol.
 582      * In that case we rather propagate unfiltered withdraws regardless of
 583      * export filters to ensure that when a protocol is flushed, its routes are
 584      * removed from all tables. Possible spurious unfiltered withdraws are not
 585      * problem here as they are ignored if there is no corresponding route at
 586      * the other end of the pipe. We directly call rt_notify() hook instead of
 587      * do_rt_notify() to avoid logging and stat counters.
 588      */
 589
 590 #ifdef CONFIG_PIPE
 591     if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
 592       p->rt_notify(p, c, net, NULL, old0);
 593 #endif
 594
 595     return;
 596   }
 597
 598   do_rt_notify(c, net, new, old, refeed);
 599
 600   /* Discard temporary rte's */
 601   if (new_free)
 602     rte_free(new_free);
 603   if (old_free)
 604     rte_free(old_free);
 605 }
 606
 607 static void
 608 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
 609 {
 610   // struct proto *p = c->proto;
 611
 612   rte *r;
 613   rte *new_best = NULL;
 614   rte *old_best = NULL;
 615   rte *new_free = NULL;
 616   rte *old_free = NULL;
 617
 618   /* Used to track whether we met old_changed position. If before_old is NULL
 619      old_changed was the first and we met it implicitly before current best route. */
 620   int old_meet = old_changed && !before_old;
 621
 622   /* Note that before_old is either NULL or valid (not rejected) route.
 623      If old_changed is valid, before_old have to be too. If old changed route
 624      was not valid, caller must use NULL for both old_changed and before_old. */
 625
 626   if (new_changed)
 627     c->stats.exp_updates_received++;
 628   else
 629     c->stats.exp_withdraws_received++;
 630
 631   /* First, find the new_best route - first accepted by filters */
 632   for (r=net->routes; rte_is_valid(r); r=r->next)
 633     {
 634       if (new_best = export_filter(c, r, &new_free, 0))
 635         break;
 636
 637       /* Note if we walked around the position of old_changed route */
 638       if (r == before_old)
 639         old_meet = 1;
 640     }
 641
 642   /*
 643    * Second, handle the feed case. That means we do not care for
 644    * old_best. It is NULL for feed, and the new_best for refeed.
 645    * For refeed, there is a hack similar to one in rt_notify_basic()
 646    * to ensure withdraws in case of changed filters
 647    */
 648   if (feed)
 649     {
 650       if (feed == 2)    /* refeed */
 651         old_best = new_best ? new_best :
 652           (rte_is_valid(net->routes) ? net->routes : NULL);
 653       else
 654         old_best = NULL;
 655
 656       if (!new_best && !old_best)
 657         return;
 658
 659       goto found;
 660     }
 661
 662   /*
 663    * Now, we find the old_best route. Generally, it is the same as the
 664    * new_best, unless new_best is the same as new_changed or
 665    * old_changed is accepted before new_best.
 666    *
 667    * There are four cases:
 668    *
 669    * - We would find and accept old_changed before new_best, therefore
 670    *   old_changed is old_best. In remaining cases we suppose this
 671    *   is not true.
 672    *
 673    * - We found no new_best, therefore there is also no old_best and
 674    *   we ignore this withdraw.
 675    *
 676    * - We found new_best different than new_changed, therefore
 677    *   old_best is the same as new_best and we ignore this update.
 678    *
 679    * - We found new_best the same as new_changed, therefore it cannot
 680    *   be old_best and we have to continue search for old_best.
 681    *
 682    * There is also a hack to ensure consistency in case of changed filters.
 683    * It does not find the proper old_best, just selects a non-NULL route.
 684    */
 685
 686   /* Hack for changed filters */
 687   if (old_changed && (old_changed->lastmod <= c->last_tx_filter_change))
 688     {
 689       old_best = old_changed;
 690       goto found;
 691     }
 692
 693   /* First case */
 694   if (old_meet)
 695     if (old_best = export_filter(c, old_changed, &old_free, 1))
 696       goto found;
 697
 698   /* Second case */
 699   if (!new_best)
 700     return;
 701
 702   /* Third case, we use r instead of new_best, because export_filter() could change it */
 703   if (r != new_changed)
 704     {
 705       if (new_free)
 706         rte_free(new_free);
 707       return;
 708     }
 709
 710   /* Fourth case */
 711   for (r=r->next; rte_is_valid(r); r=r->next)
 712     {
 713       if (old_best = export_filter(c, r, &old_free, 1))
 714         goto found;
 715
 716       if (r == before_old)
 717         if (old_best = export_filter(c, old_changed, &old_free, 1))
 718           goto found;
 719     }
 720
 721   /* Implicitly, old_best is NULL and new_best is non-NULL */
 722
 723  found:
 724   do_rt_notify(c, net, new_best, old_best, (feed == 2));
 725
 726   /* Discard temporary rte's */
 727   if (new_free)
 728     rte_free(new_free);
 729   if (old_free)
 730     rte_free(old_free);
 731 }
 732
 733
 734 static struct nexthop *
 735 nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
 736 {
 737   return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
 738 }
 739
 740 rte *
 741 rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent)
 742 {
 743   // struct proto *p = c->proto;
 744   struct nexthop *nhs = NULL;
 745   rte *best0, *best, *rt0, *rt, *tmp;
 746
 747   best0 = net->routes;
 748   *rt_free = NULL;
 749
 750   if (!rte_is_valid(best0))
 751     return NULL;
 752
 753   best = export_filter_(c, best0, rt_free, pool, silent);
 754
 755   if (!best || !rte_is_reachable(best))
 756     return best;
 757
 758   for (rt0 = best0->next; rt0; rt0 = rt0->next)
 759   {
 760     if (!rte_mergable(best0, rt0))
 761       continue;
 762
 763     rt = export_filter_(c, rt0, &tmp, pool, 1);
 764
 765     if (!rt)
 766       continue;
 767
 768     if (rte_is_reachable(rt))
 769       nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
 770
 771     if (tmp)
 772       rte_free(tmp);
 773   }
 774
 775   if (nhs)
 776   {
 777     nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
 778
 779     if (nhs->next)
 780     {
 781       best = rte_cow_rta(best, pool);
 782       nexthop_link(best->attrs, nhs);
 783     }
 784   }
 785
 786   if (best != best0)
 787     *rt_free = best;
 788
 789   return best;
 790 }
 791
 792
 793 static void
 794 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
 795                  rte *new_best, rte*old_best, int refeed)
 796 {
 797   // struct proto *p = c->proto;
 798
 799   rte *new_best_free = NULL;
 800   rte *old_best_free = NULL;
 801   rte *new_changed_free = NULL;
 802   rte *old_changed_free = NULL;
 803
 804   /* We assume that all rte arguments are either NULL or rte_is_valid() */
 805
 806   /* This check should be done by the caller */
 807   if (!new_best && !old_best)
 808     return;
 809
 810   /* Check whether the change is relevant to the merged route */
 811   if ((new_best == old_best) && !refeed)
 812   {
 813     new_changed = rte_mergable(new_best, new_changed) ?
 814       export_filter(c, new_changed, &new_changed_free, 1) : NULL;
 815
 816     old_changed = rte_mergable(old_best, old_changed) ?
 817       export_filter(c, old_changed, &old_changed_free, 1) : NULL;
 818
 819     if (!new_changed && !old_changed)
 820       return;
 821   }
 822
 823   if (new_best)
 824     c->stats.exp_updates_received++;
 825   else
 826     c->stats.exp_withdraws_received++;
 827
 828   /* Prepare new merged route */
 829   if (new_best)
 830     new_best = rt_export_merged(c, net, &new_best_free, rte_update_pool, 0);
 831
 832   /* Prepare old merged route (without proper merged next hops) */
 833   /* There are some issues with running filter on old route - see rt_notify_basic() */
 834   if (old_best && !refeed)
 835     old_best = export_filter(c, old_best, &old_best_free, 1);
 836
 837   if (new_best || old_best)
 838     do_rt_notify(c, net, new_best, old_best, refeed);
 839
 840   /* Discard temporary rte's */
 841   if (new_best_free)
 842     rte_free(new_best_free);
 843   if (old_best_free)
 844     rte_free(old_best_free);
 845   if (new_changed_free)
 846     rte_free(new_changed_free);
 847   if (old_changed_free)
 848     rte_free(old_changed_free);
 849 }
 850
 851
 852 /**
 853  * rte_announce - announce a routing table change
 854  * @tab: table the route has been added to
 855  * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
 856  * @net: network in question
 857  * @new: the new route to be announced
 858  * @old: the previous route for the same network
 859  * @new_best: the new best route for the same network
 860  * @old_best: the previous best route for the same network
 861  * @before_old: The previous route before @old for the same network.
 862  *              If @before_old is NULL @old was the first.
 863  *
 864  * This function gets a routing table update and announces it
 865  * to all protocols that acccepts given type of route announcement
 866  * and are connected to the same table by their announcement hooks.
 867  *
 868  * Route announcement of type %RA_OPTIMAL si generated when optimal
 869  * route (in routing table @tab) changes. In that case @old stores the
 870  * old optimal route.
 871  *
 872  * Route announcement of type %RA_ANY si generated when any route (in
 873  * routing table @tab) changes In that case @old stores the old route
 874  * from the same protocol.
 875  *
 876  * For each appropriate protocol, we first call its preexport()
 877  * hook which performs basic checks on the route (each protocol has a
 878  * right to veto or force accept of the route before any filter is
 879  * asked) and adds default values of attributes specific to the new
 880  * protocol (metrics, tags etc.).  Then it consults the protocol's
 881  * export filter and if it accepts the route, the rt_notify() hook of
 882  * the protocol gets called.
 883  */
 884 static void
 885 rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
 886              rte *new_best, rte *old_best, rte *before_old)
 887 {
 888   if (!rte_is_valid(new))
 889     new = NULL;
 890
 891   if (!rte_is_valid(old))
 892     old = before_old = NULL;
 893
 894   if (!rte_is_valid(new_best))
 895     new_best = NULL;
 896
 897   if (!rte_is_valid(old_best))
 898     old_best = NULL;
 899
 900   if (!old && !new)
 901     return;
 902
 903   if ((type == RA_OPTIMAL) && tab->hostcache)
 904     rt_notify_hostcache(tab, net);
 905
 906   struct channel *c; node *n;
 907   WALK_LIST2(c, n, tab->channels, table_node)
 908     {
 909       if (c->export_state == ES_DOWN)
 910         continue;
 911
 912       if (c->ra_mode == type)
 913         if (type == RA_ACCEPTED)
 914           rt_notify_accepted(c, net, new, old, before_old, 0);
 915         else if (type == RA_MERGED)
 916           rt_notify_merged(c, net, new, old, new_best, old_best, 0);
 917         else
 918           rt_notify_basic(c, net, new, old, 0);
 919     }
 920 }
 921
 922 static inline int
 923 rte_validate(rte *e)
 924 {
 925   int c;
 926   net *n = e->net;
 927
 928   if (!net_validate(n->n.addr))
 929   {
 930     log(L_WARN "Ignoring bogus prefix %N received via %s",
 931         n->n.addr, e->sender->proto->name);
 932     return 0;
 933   }
 934
 935   /* FIXME: better handling different nettypes */
 936   c = !net_is_flow(n->n.addr) ?
 937     net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE);
 938   if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
 939   {
 940     log(L_WARN "Ignoring bogus route %N received via %s",
 941         n->n.addr, e->sender->proto->name);
 942     return 0;
 943   }
 944
 945   if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
 946   {
 947     log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
 948         n->n.addr, e->attrs->dest, e->sender->proto->name);
 949     return 0;
 950   }
 951
 952   if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
 953   {
 954     log(L_WARN "Ignoring unsorted multipath route %N received via %s",
 955         n->n.addr, e->sender->proto->name);
 956     return 0;
 957   }
 958
 959   return 1;
 960 }
 961
 962 /**
 963  * rte_free - delete a &rte
 964  * @e: &rte to be deleted
 965  *
 966  * rte_free() deletes the given &rte from the routing table it's linked to.
 967  */
 968 void
 969 rte_free(rte *e)
 970 {
 971   if (rta_is_cached(e->attrs))
 972     rta_free(e->attrs);
 973   sl_free(rte_slab, e);
 974 }
 975
 976 static inline void
 977 rte_free_quick(rte *e)
 978 {
 979   rta_free(e->attrs);
 980   sl_free(rte_slab, e);
 981 }
 982
 983 static int
 984 rte_same(rte *x, rte *y)
 985 {
 986   return
 987     x->attrs == y->attrs &&
 988     x->flags == y->flags &&
 989     x->pflags == y->pflags &&
 990     x->pref == y->pref &&
 991     (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
 992 }
 993
 994 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
 995
 996 static void
 997 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
 998 {
 999   struct proto *p = c->proto;
1000   struct rtable *table = c->table;
1001   struct proto_stats *stats = &c->stats;
1002   static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
1003   rte *before_old = NULL;
1004   rte *old_best = net->routes;
1005   rte *old = NULL;
1006   rte **k;
1007
1008   k = &net->routes;                     /* Find and remove original route from the same protocol */
1009   while (old = *k)
1010     {
1011       if (old->attrs->src == src)
1012         {
1013           /* If there is the same route in the routing table but from
1014            * a different sender, then there are two paths from the
1015            * source protocol to this routing table through transparent
1016            * pipes, which is not allowed.
1017            *
1018            * We log that and ignore the route. If it is withdraw, we
1019            * ignore it completely (there might be 'spurious withdraws',
1020            * see FIXME in do_rte_announce())
1021            */
1022           if (old->sender->proto != p)
1023             {
1024               if (new)
1025                 {
1026                   log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
1027                       net->n.addr, table->name);
1028                   rte_free_quick(new);
1029                 }
1030               return;
1031             }
1032
1033           if (new && rte_same(old, new))
1034             {
1035               /* No changes, ignore the new route */
1036
1037               if (!rte_is_filtered(new))
1038                 {
1039                   stats->imp_updates_ignored++;
1040                   rte_trace_in(D_ROUTES, p, new, "ignored");
1041                 }
1042
1043               rte_free_quick(new);
1044               return;
1045             }
1046           *k = old->next;
1047           table->rt_count--;
1048           break;
1049         }
1050       k = &old->next;
1051       before_old = old;
1052     }
1053
1054   if (!old)
1055     before_old = NULL;
1056
1057   if (!old && !new)
1058     {
1059       stats->imp_withdraws_ignored++;
1060       return;
1061     }
1062
1063   int new_ok = rte_is_ok(new);
1064   int old_ok = rte_is_ok(old);
1065
1066   struct channel_limit *l = &c->rx_limit;
1067   if (l->action && !old && new && !c->in_table)
1068     {
1069       u32 all_routes = stats->imp_routes + stats->filt_routes;
1070
1071       if (all_routes >= l->limit)
1072         channel_notify_limit(c, l, PLD_RX, all_routes);
1073
1074       if (l->state == PLS_BLOCKED)
1075         {
1076           /* In receive limit the situation is simple, old is NULL so
1077              we just free new and exit like nothing happened */
1078
1079           stats->imp_updates_ignored++;
1080           rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1081           rte_free_quick(new);
1082           return;
1083         }
1084     }
1085
1086   l = &c->in_limit;
1087   if (l->action && !old_ok && new_ok)
1088     {
1089       if (stats->imp_routes >= l->limit)
1090         channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1091
1092       if (l->state == PLS_BLOCKED)
1093         {
1094           /* In import limit the situation is more complicated. We
1095              shouldn't just drop the route, we should handle it like
1096              it was filtered. We also have to continue the route
1097              processing if old or new is non-NULL, but we should exit
1098              if both are NULL as this case is probably assumed to be
1099              already handled. */
1100
1101           stats->imp_updates_ignored++;
1102           rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1103
1104           if (c->in_keep_filtered)
1105             new->flags |= REF_FILTERED;
1106           else
1107             { rte_free_quick(new); new = NULL; }
1108
1109           /* Note that old && !new could be possible when
1110              c->in_keep_filtered changed in the recent past. */
1111
1112           if (!old && !new)
1113             return;
1114
1115           new_ok = 0;
1116           goto skip_stats1;
1117         }
1118     }
1119
1120   if (new_ok)
1121     stats->imp_updates_accepted++;
1122   else if (old_ok)
1123     stats->imp_withdraws_accepted++;
1124   else
1125     stats->imp_withdraws_ignored++;
1126
1127  skip_stats1:
1128
1129   if (new)
1130     rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1131   if (old)
1132     rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1133
1134   if (table->config->sorted)
1135     {
1136       /* If routes are sorted, just insert new route to appropriate position */
1137       if (new)
1138         {
1139           if (before_old && !rte_better(new, before_old))
1140             k = &before_old->next;
1141           else
1142             k = &net->routes;
1143
1144           for (; *k; k=&(*k)->next)
1145             if (rte_better(new, *k))
1146               break;
1147
1148           new->next = *k;
1149           *k = new;
1150           table->rt_count++;
1151         }
1152     }
1153   else
1154     {
1155       /* If routes are not sorted, find the best route and move it on
1156          the first position. There are several optimized cases. */
1157
1158       if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1159         goto do_recalculate;
1160
1161       if (new && rte_better(new, old_best))
1162         {
1163           /* The first case - the new route is cleary optimal,
1164              we link it at the first position */
1165
1166           new->next = net->routes;
1167           net->routes = new;
1168           table->rt_count++;
1169         }
1170       else if (old == old_best)
1171         {
1172           /* The second case - the old best route disappeared, we add the
1173              new route (if we have any) to the list (we don't care about
1174              position) and then we elect the new optimal route and relink
1175              that route at the first position and announce it. New optimal
1176              route might be NULL if there is no more routes */
1177
1178         do_recalculate:
1179           /* Add the new route to the list */
1180           if (new)
1181             {
1182               new->next = net->routes;
1183               net->routes = new;
1184               table->rt_count++;
1185             }
1186
1187           /* Find a new optimal route (if there is any) */
1188           if (net->routes)
1189             {
1190               rte **bp = &net->routes;
1191               for (k=&(*bp)->next; *k; k=&(*k)->next)
1192                 if (rte_better(*k, *bp))
1193                   bp = k;
1194
1195               /* And relink it */
1196               rte *best = *bp;
1197               *bp = best->next;
1198               best->next = net->routes;
1199               net->routes = best;
1200             }
1201         }
1202       else if (new)
1203         {
1204           /* The third case - the new route is not better than the old
1205              best route (therefore old_best != NULL) and the old best
1206              route was not removed (therefore old_best == net->routes).
1207              We just link the new route after the old best route. */
1208
1209           ASSERT(net->routes != NULL);
1210           new->next = net->routes->next;
1211           net->routes->next = new;
1212           table->rt_count++;
1213         }
1214       /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1215     }
1216
1217   if (new)
1218     new->lastmod = current_time();
1219
1220   /* Log the route change */
1221   if (p->debug & D_ROUTES)
1222     {
1223       if (new_ok)
1224         rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1225       else if (old_ok)
1226         {
1227           if (old != old_best)
1228             rte_trace(p, old, '>', "removed");
1229           else if (rte_is_ok(net->routes))
1230             rte_trace(p, old, '>', "removed [replaced]");
1231           else
1232             rte_trace(p, old, '>', "removed [sole]");
1233         }
1234     }
1235
1236   /* Propagate the route change */
1237   rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1238   if (net->routes != old_best)
1239     rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1240   if (table->config->sorted)
1241     rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1242   rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1243
1244   if (!net->routes &&
1245       (table->gc_counter++ >= table->config->gc_max_ops) &&
1246       (table->gc_time + table->config->gc_min_time <= current_time()))
1247     rt_schedule_prune(table);
1248
1249   if (old_ok && p->rte_remove)
1250     p->rte_remove(net, old);
1251   if (new_ok && p->rte_insert)
1252     p->rte_insert(net, new);
1253
1254   if (old)
1255     rte_free_quick(old);
1256 }
1257
1258 static int rte_update_nest_cnt;         /* Nesting counter to allow recursive updates */
1259
1260 static inline void
1261 rte_update_lock(void)
1262 {
1263   rte_update_nest_cnt++;
1264 }
1265
1266 static inline void
1267 rte_update_unlock(void)
1268 {
1269   if (!--rte_update_nest_cnt)
1270     lp_flush(rte_update_pool);
1271 }
1272
1273 static inline void
1274 rte_hide_dummy_routes(net *net, rte **dummy)
1275 {
1276   if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1277   {
1278     *dummy = net->routes;
1279     net->routes = (*dummy)->next;
1280   }
1281 }
1282
1283 static inline void
1284 rte_unhide_dummy_routes(net *net, rte **dummy)
1285 {
1286   if (*dummy)
1287   {
1288     (*dummy)->next = net->routes;
1289     net->routes = *dummy;
1290   }
1291 }
1292
1293 /**
1294  * rte_update - enter a new update to a routing table
1295  * @table: table to be updated
1296  * @c: channel doing the update
1297  * @net: network node
1298  * @p: protocol submitting the update
1299  * @src: protocol originating the update
1300  * @new: a &rte representing the new route or %NULL for route removal.
1301  *
1302  * This function is called by the routing protocols whenever they discover
1303  * a new route or wish to update/remove an existing route. The right announcement
1304  * sequence is to build route attributes first (either un-cached with @aflags set
1305  * to zero or a cached one using rta_lookup(); in this case please note that
1306  * you need to increase the use count of the attributes yourself by calling
1307  * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1308  * the appropriate data and finally submit the new &rte by calling rte_update().
1309  *
1310  * @src specifies the protocol that originally created the route and the meaning
1311  * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1312  * same value as @new->attrs->proto. @p specifies the protocol that called
1313  * rte_update(). In most cases it is the same protocol as @src. rte_update()
1314  * stores @p in @new->sender;
1315  *
1316  * When rte_update() gets any route, it automatically validates it (checks,
1317  * whether the network and next hop address are valid IP addresses and also
1318  * whether a normal routing protocol doesn't try to smuggle a host or link
1319  * scope route to the table), converts all protocol dependent attributes stored
1320  * in the &rte to temporary extended attributes, consults import filters of the
1321  * protocol to see if the route should be accepted and/or its attributes modified,
1322  * stores the temporary attributes back to the &rte.
1323  *
1324  * Now, having a "public" version of the route, we
1325  * automatically find any old route defined by the protocol @src
1326  * for network @n, replace it by the new one (or removing it if @new is %NULL),
1327  * recalculate the optimal route for this destination and finally broadcast
1328  * the change (if any) to all routing protocols by calling rte_announce().
1329  *
1330  * All memory used for attribute lists and other temporary allocations is taken
1331  * from a special linear pool @rte_update_pool and freed when rte_update()
1332  * finishes.
1333  */
1334
1335 void
1336 rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1337 {
1338   struct proto *p = c->proto;
1339   struct proto_stats *stats = &c->stats;
1340   struct filter *filter = c->in_filter;
1341   rte *dummy = NULL;
1342   net *nn;
1343
1344   ASSERT(c->channel_state == CS_UP);
1345
1346   rte_update_lock();
1347   if (new)
1348     {
1349       nn = net_get(c->table, n);
1350
1351       new->net = nn;
1352       new->sender = c;
1353
1354       if (!new->pref)
1355         new->pref = c->preference;
1356
1357       stats->imp_updates_received++;
1358       if (!rte_validate(new))
1359         {
1360           rte_trace_in(D_FILTERS, p, new, "invalid");
1361           stats->imp_updates_invalid++;
1362           goto drop;
1363         }
1364
1365       if (filter == FILTER_REJECT)
1366         {
1367           stats->imp_updates_filtered++;
1368           rte_trace_in(D_FILTERS, p, new, "filtered out");
1369
1370           if (! c->in_keep_filtered)
1371             goto drop;
1372
1373           /* new is a private copy, i could modify it */
1374           new->flags |= REF_FILTERED;
1375         }
1376       else
1377         {
1378           rte_make_tmp_attrs(&new, rte_update_pool);
1379           if (filter && (filter != FILTER_REJECT))
1380             {
1381               ea_list *oldea = new->attrs->eattrs;
1382               int fr = f_run(filter, &new, rte_update_pool, 0);
1383               if (fr > F_ACCEPT)
1384                 {
1385                   stats->imp_updates_filtered++;
1386                   rte_trace_in(D_FILTERS, p, new, "filtered out");
1387
1388                   if (! c->in_keep_filtered)
1389                     goto drop;
1390
1391                   new->flags |= REF_FILTERED;
1392                 }
1393               if (new->attrs->eattrs != oldea && src->proto->store_tmp_attrs)
1394                 src->proto->store_tmp_attrs(new);
1395             }
1396         }
1397       if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1398         new->attrs = rta_lookup(new->attrs);
1399       new->flags |= REF_COW;
1400     }
1401   else
1402     {
1403       stats->imp_withdraws_received++;
1404
1405       if (!(nn = net_find(c->table, n)) || !src)
1406         {
1407           stats->imp_withdraws_ignored++;
1408           rte_update_unlock();
1409           return;
1410         }
1411     }
1412
1413  recalc:
1414   rte_hide_dummy_routes(nn, &dummy);
1415   rte_recalculate(c, nn, new, src);
1416   rte_unhide_dummy_routes(nn, &dummy);
1417   rte_update_unlock();
1418   return;
1419
1420  drop:
1421   rte_free(new);
1422   new = NULL;
1423   goto recalc;
1424 }
1425
1426 /* Independent call to rte_announce(), used from next hop
1427    recalculation, outside of rte_update(). new must be non-NULL */
1428 static inline void
1429 rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1430                rte *new_best, rte *old_best)
1431 {
1432   rte_update_lock();
1433   rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1434   rte_update_unlock();
1435 }
1436
1437 static inline void
1438 rte_discard(rte *old)   /* Non-filtered route deletion, used during garbage collection */
1439 {
1440   rte_update_lock();
1441   rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1442   rte_update_unlock();
1443 }
1444
1445 /* Modify existing route by protocol hook, used for long-lived graceful restart */
1446 static inline void
1447 rte_modify(rte *old)
1448 {
1449   rte_update_lock();
1450
1451   rte *new = old->sender->proto->rte_modify(old, rte_update_pool);
1452   if (new != old)
1453   {
1454     if (new)
1455     {
1456       if (!rta_is_cached(new->attrs))
1457         new->attrs = rta_lookup(new->attrs);
1458       new->flags = (old->flags & ~REF_MODIFY) | REF_COW;
1459     }
1460
1461     rte_recalculate(old->sender, old->net, new, old->attrs->src);
1462   }
1463
1464   rte_update_unlock();
1465 }
1466
1467 /* Check rtable for best route to given net whether it would be exported do p */
1468 int
1469 rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1470 {
1471   net *n = net_find(t, a);
1472   rte *rt = n ? n->routes : NULL;
1473
1474   if (!rte_is_valid(rt))
1475     return 0;
1476
1477   rte_update_lock();
1478
1479   /* Rest is stripped down export_filter() */
1480   rte_make_tmp_attrs(&rt, rte_update_pool);
1481   int v = p->preexport ? p->preexport(p, &rt, rte_update_pool) : 0;
1482   if (v == RIC_PROCESS)
1483     v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT);
1484
1485    /* Discard temporary rte */
1486   if (rt != n->routes)
1487     rte_free(rt);
1488
1489   rte_update_unlock();
1490
1491   return v > 0;
1492 }
1493
1494
1495 /**
1496  * rt_refresh_begin - start a refresh cycle
1497  * @t: related routing table
1498  * @c related channel
1499  *
1500  * This function starts a refresh cycle for given routing table and announce
1501  * hook. The refresh cycle is a sequence where the protocol sends all its valid
1502  * routes to the routing table (by rte_update()). After that, all protocol
1503  * routes (more precisely routes with @c as @sender) not sent during the
1504  * refresh cycle but still in the table from the past are pruned. This is
1505  * implemented by marking all related routes as stale by REF_STALE flag in
1506  * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1507  * flag in rt_refresh_end() and then removing such routes in the prune loop.
1508  */
1509 void
1510 rt_refresh_begin(rtable *t, struct channel *c)
1511 {
1512   FIB_WALK(&t->fib, net, n)
1513     {
1514       rte *e;
1515       for (e = n->routes; e; e = e->next)
1516         if (e->sender == c)
1517           e->flags |= REF_STALE;
1518     }
1519   FIB_WALK_END;
1520 }
1521
1522 /**
1523  * rt_refresh_end - end a refresh cycle
1524  * @t: related routing table
1525  * @c: related channel
1526  *
1527  * This function ends a refresh cycle for given routing table and announce
1528  * hook. See rt_refresh_begin() for description of refresh cycles.
1529  */
1530 void
1531 rt_refresh_end(rtable *t, struct channel *c)
1532 {
1533   int prune = 0;
1534
1535   FIB_WALK(&t->fib, net, n)
1536     {
1537       rte *e;
1538       for (e = n->routes; e; e = e->next)
1539         if ((e->sender == c) && (e->flags & REF_STALE))
1540           {
1541             e->flags |= REF_DISCARD;
1542             prune = 1;
1543           }
1544     }
1545   FIB_WALK_END;
1546
1547   if (prune)
1548     rt_schedule_prune(t);
1549 }
1550
1551 void
1552 rt_modify_stale(rtable *t, struct channel *c)
1553 {
1554   int prune = 0;
1555
1556   FIB_WALK(&t->fib, net, n)
1557     {
1558       rte *e;
1559       for (e = n->routes; e; e = e->next)
1560         if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED))
1561           {
1562             e->flags |= REF_MODIFY;
1563             prune = 1;
1564           }
1565     }
1566   FIB_WALK_END;
1567
1568   if (prune)
1569     rt_schedule_prune(t);
1570 }
1571
1572 /**
1573  * rte_dump - dump a route
1574  * @e: &rte to be dumped
1575  *
1576  * This functions dumps contents of a &rte to debug output.
1577  */
1578 void
1579 rte_dump(rte *e)
1580 {
1581   net *n = e->net;
1582   debug("%-1N ", n->n.addr);
1583   debug("KF=%02x PF=%02x pref=%d ", n->n.flags, e->pflags, e->pref);
1584   rta_dump(e->attrs);
1585   if (e->attrs->src->proto->proto->dump_attrs)
1586     e->attrs->src->proto->proto->dump_attrs(e);
1587   debug("\n");
1588 }
1589
1590 /**
1591  * rt_dump - dump a routing table
1592  * @t: routing table to be dumped
1593  *
1594  * This function dumps contents of a given routing table to debug output.
1595  */
1596 void
1597 rt_dump(rtable *t)
1598 {
1599   debug("Dump of routing table <%s>\n", t->name);
1600 #ifdef DEBUGGING
1601   fib_check(&t->fib);
1602 #endif
1603   FIB_WALK(&t->fib, net, n)
1604     {
1605       rte *e;
1606       for(e=n->routes; e; e=e->next)
1607         rte_dump(e);
1608     }
1609   FIB_WALK_END;
1610   debug("\n");
1611 }
1612
1613 /**
1614  * rt_dump_all - dump all routing tables
1615  *
1616  * This function dumps contents of all routing tables to debug output.
1617  */
1618 void
1619 rt_dump_all(void)
1620 {
1621   rtable *t;
1622
1623   WALK_LIST(t, routing_tables)
1624     rt_dump(t);
1625 }
1626
1627 static inline void
1628 rt_schedule_hcu(rtable *tab)
1629 {
1630   if (tab->hcu_scheduled)
1631     return;
1632
1633   tab->hcu_scheduled = 1;
1634   ev_schedule(tab->rt_event);
1635 }
1636
1637 static inline void
1638 rt_schedule_nhu(rtable *tab)
1639 {
1640   if (tab->nhu_state == NHU_CLEAN)
1641     ev_schedule(tab->rt_event);
1642
1643   /* state change:
1644    *   NHU_CLEAN   -> NHU_SCHEDULED
1645    *   NHU_RUNNING -> NHU_DIRTY
1646    */
1647   tab->nhu_state |= NHU_SCHEDULED;
1648 }
1649
1650 void
1651 rt_schedule_prune(rtable *tab)
1652 {
1653   if (tab->prune_state == 0)
1654     ev_schedule(tab->rt_event);
1655
1656   /* state change 0->1, 2->3 */
1657   tab->prune_state |= 1;
1658 }
1659
1660
1661 static void
1662 rt_event(void *ptr)
1663 {
1664   rtable *tab = ptr;
1665
1666   rt_lock_table(tab);
1667
1668   if (tab->hcu_scheduled)
1669     rt_update_hostcache(tab);
1670
1671   if (tab->nhu_state)
1672     rt_next_hop_update(tab);
1673
1674   if (tab->prune_state)
1675     rt_prune_table(tab);
1676
1677   rt_unlock_table(tab);
1678 }
1679
1680 void
1681 rt_setup(pool *p, rtable *t, struct rtable_config *cf)
1682 {
1683   bzero(t, sizeof(*t));
1684   t->name = cf->name;
1685   t->config = cf;
1686   t->addr_type = cf->addr_type;
1687   fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1688   init_list(&t->channels);
1689
1690   t->rt_event = ev_new_init(p, rt_event, t);
1691   t->gc_time = current_time();
1692 }
1693
1694 /**
1695  * rt_init - initialize routing tables
1696  *
1697  * This function is called during BIRD startup. It initializes the
1698  * routing table module.
1699  */
1700 void
1701 rt_init(void)
1702 {
1703   rta_init();
1704   rt_table_pool = rp_new(&root_pool, "Routing tables");
1705   rte_update_pool = lp_new_default(rt_table_pool);
1706   rte_slab = sl_new(rt_table_pool, sizeof(rte));
1707   init_list(&routing_tables);
1708 }
1709
1710
1711 /**
1712  * rt_prune_table - prune a routing table
1713  *
1714  * The prune loop scans routing tables and removes routes belonging to flushing
1715  * protocols, discarded routes and also stale network entries. It is called from
1716  * rt_event(). The event is rescheduled if the current iteration do not finish
1717  * the table. The pruning is directed by the prune state (@prune_state),
1718  * specifying whether the prune cycle is scheduled or running, and there
1719  * is also a persistent pruning iterator (@prune_fit).
1720  *
1721  * The prune loop is used also for channel flushing. For this purpose, the
1722  * channels to flush are marked before the iteration and notified after the
1723  * iteration.
1724  */
1725 static void
1726 rt_prune_table(rtable *tab)
1727 {
1728   struct fib_iterator *fit = &tab->prune_fit;
1729   int limit = 512;
1730
1731   struct channel *c;
1732   node *n, *x;
1733
1734   DBG("Pruning route table %s\n", tab->name);
1735 #ifdef DEBUGGING
1736   fib_check(&tab->fib);
1737 #endif
1738
1739   if (tab->prune_state == 0)
1740     return;
1741
1742   if (tab->prune_state == 1)
1743   {
1744     /* Mark channels to flush */
1745     WALK_LIST2(c, n, tab->channels, table_node)
1746       if (c->channel_state == CS_FLUSHING)
1747         c->flush_active = 1;
1748
1749     FIB_ITERATE_INIT(fit, &tab->fib);
1750     tab->prune_state = 2;
1751   }
1752
1753 again:
1754   FIB_ITERATE_START(&tab->fib, fit, net, n)
1755     {
1756       rte *e;
1757
1758     rescan:
1759       for (e=n->routes; e; e=e->next)
1760       {
1761         if (e->sender->flush_active || (e->flags & REF_DISCARD))
1762           {
1763             if (limit <= 0)
1764               {
1765                 FIB_ITERATE_PUT(fit);
1766                 ev_schedule(tab->rt_event);
1767                 return;
1768               }
1769
1770             rte_discard(e);
1771             limit--;
1772
1773             goto rescan;
1774           }
1775
1776         if (e->flags & REF_MODIFY)
1777           {
1778             if (limit <= 0)
1779               {
1780                 FIB_ITERATE_PUT(fit);
1781                 ev_schedule(tab->rt_event);
1782                 return;
1783               }
1784
1785             rte_modify(e);
1786             limit--;
1787
1788             goto rescan;
1789           }
1790       }
1791
1792       if (!n->routes)           /* Orphaned FIB entry */
1793         {
1794           FIB_ITERATE_PUT(fit);
1795           fib_delete(&tab->fib, n);
1796           goto again;
1797         }
1798     }
1799   FIB_ITERATE_END;
1800
1801 #ifdef DEBUGGING
1802   fib_check(&tab->fib);
1803 #endif
1804
1805   tab->gc_counter = 0;
1806   tab->gc_time = current_time();
1807
1808   /* state change 2->0, 3->1 */
1809   tab->prune_state &= 1;
1810
1811   if (tab->prune_state > 0)
1812     ev_schedule(tab->rt_event);
1813
1814   /* FIXME: This should be handled in a better way */
1815   rt_prune_sources();
1816
1817   /* Close flushed channels */
1818   WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1819     if (c->flush_active)
1820       {
1821         c->flush_active = 0;
1822         channel_set_state(c, CS_DOWN);
1823       }
1824
1825   return;
1826 }
1827
1828 void
1829 rt_preconfig(struct config *c)
1830 {
1831   init_list(&c->tables);
1832
1833   rt_new_table(cf_get_symbol("master4"), NET_IP4);
1834   rt_new_table(cf_get_symbol("master6"), NET_IP6);
1835 }
1836
1837
1838 /*
1839  * Some functions for handing internal next hop updates
1840  * triggered by rt_schedule_nhu().
1841  */
1842
1843 static inline int
1844 rta_next_hop_outdated(rta *a)
1845 {
1846   struct hostentry *he = a->hostentry;
1847
1848   if (!he)
1849     return 0;
1850
1851   if (!he->src)
1852     return a->dest != RTD_UNREACHABLE;
1853
1854   return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1855     (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
1856 }
1857
1858 void
1859 rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
1860 {
1861   a->hostentry = he;
1862   a->dest = he->dest;
1863   a->igp_metric = he->igp_metric;
1864
1865   if (a->dest != RTD_UNICAST)
1866   {
1867     /* No nexthop */
1868 no_nexthop:
1869     a->nh = (struct nexthop) {};
1870     if (mls)
1871     { /* Store the label stack for later changes */
1872       a->nh.labels_orig = a->nh.labels = mls->len;
1873       memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
1874     }
1875     return;
1876   }
1877
1878   if (((!mls) || (!mls->len)) && he->nexthop_linkable)
1879   { /* Just link the nexthop chain, no label append happens. */
1880     memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
1881     return;
1882   }
1883
1884   struct nexthop *nhp = NULL, *nhr = NULL;
1885   int skip_nexthop = 0;
1886
1887   for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
1888   {
1889     if (skip_nexthop)
1890       skip_nexthop--;
1891     else
1892     {
1893       nhr = nhp;
1894       nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
1895     }
1896
1897     nhp->iface = nh->iface;
1898     nhp->weight = nh->weight;
1899     if (mls)
1900     {
1901       nhp->labels = nh->labels + mls->len;
1902       nhp->labels_orig = mls->len;
1903       if (nhp->labels <= MPLS_MAX_LABEL_STACK)
1904       {
1905         memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
1906         memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
1907       }
1908       else
1909       {
1910         log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
1911             nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
1912         skip_nexthop++;
1913         continue;
1914       }
1915     }
1916     if (ipa_nonzero(nh->gw))
1917     {
1918       nhp->gw = nh->gw;                 /* Router nexthop */
1919       nhp->flags |= (nh->flags & RNF_ONLINK);
1920     }
1921     else if (ipa_nonzero(he->link))
1922       nhp->gw = he->link;               /* Device nexthop with link-local address known */
1923     else
1924       nhp->gw = he->addr;               /* Device nexthop with link-local address unknown */
1925   }
1926
1927   if (skip_nexthop)
1928     if (nhr)
1929       nhr->next = NULL;
1930     else
1931     {
1932       a->dest = RTD_UNREACHABLE;
1933       log(L_WARN "No valid nexthop remaining, setting route unreachable");
1934       goto no_nexthop;
1935     }
1936 }
1937
1938 static inline rte *
1939 rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
1940 {
1941   rta *a = alloca(RTA_MAX_SIZE);
1942   memcpy(a, old->attrs, rta_size(old->attrs));
1943
1944   mpls_label_stack mls = { .len = a->nh.labels_orig };
1945   memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
1946
1947   rta_apply_hostentry(a, old->attrs->hostentry, &mls);
1948   a->aflags = 0;
1949
1950   rte *e = sl_alloc(rte_slab);
1951   memcpy(e, old, sizeof(rte));
1952   e->attrs = rta_lookup(a);
1953
1954   return e;
1955 }
1956
1957 static inline int
1958 rt_next_hop_update_net(rtable *tab, net *n)
1959 {
1960   rte **k, *e, *new, *old_best, **new_best;
1961   int count = 0;
1962   int free_old_best = 0;
1963
1964   old_best = n->routes;
1965   if (!old_best)
1966     return 0;
1967
1968   for (k = &n->routes; e = *k; k = &e->next)
1969     if (rta_next_hop_outdated(e->attrs))
1970       {
1971         new = rt_next_hop_update_rte(tab, e);
1972         *k = new;
1973
1974         rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1975         rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1976
1977         /* Call a pre-comparison hook */
1978         /* Not really an efficient way to compute this */
1979         if (e->attrs->src->proto->rte_recalculate)
1980           e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1981
1982         if (e != old_best)
1983           rte_free_quick(e);
1984         else /* Freeing of the old best rte is postponed */
1985           free_old_best = 1;
1986
1987         e = new;
1988         count++;
1989       }
1990
1991   if (!count)
1992     return 0;
1993
1994   /* Find the new best route */
1995   new_best = NULL;
1996   for (k = &n->routes; e = *k; k = &e->next)
1997     {
1998       if (!new_best || rte_better(e, *new_best))
1999         new_best = k;
2000     }
2001
2002   /* Relink the new best route to the first position */
2003   new = *new_best;
2004   if (new != n->routes)
2005     {
2006       *new_best = new->next;
2007       new->next = n->routes;
2008       n->routes = new;
2009     }
2010
2011   /* Announce the new best route */
2012   if (new != old_best)
2013     {
2014       rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
2015       rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
2016     }
2017
2018   /* FIXME: Better announcement of merged routes */
2019   rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
2020
2021   if (free_old_best)
2022     rte_free_quick(old_best);
2023
2024   return count;
2025 }
2026
2027 static void
2028 rt_next_hop_update(rtable *tab)
2029 {
2030   struct fib_iterator *fit = &tab->nhu_fit;
2031   int max_feed = 32;
2032
2033   if (tab->nhu_state == NHU_CLEAN)
2034     return;
2035
2036   if (tab->nhu_state == NHU_SCHEDULED)
2037     {
2038       FIB_ITERATE_INIT(fit, &tab->fib);
2039       tab->nhu_state = NHU_RUNNING;
2040     }
2041
2042   FIB_ITERATE_START(&tab->fib, fit, net, n)
2043     {
2044       if (max_feed <= 0)
2045         {
2046           FIB_ITERATE_PUT(fit);
2047           ev_schedule(tab->rt_event);
2048           return;
2049         }
2050       max_feed -= rt_next_hop_update_net(tab, n);
2051     }
2052   FIB_ITERATE_END;
2053
2054   /* State change:
2055    *   NHU_DIRTY   -> NHU_SCHEDULED
2056    *   NHU_RUNNING -> NHU_CLEAN
2057    */
2058   tab->nhu_state &= 1;
2059
2060   if (tab->nhu_state != NHU_CLEAN)
2061     ev_schedule(tab->rt_event);
2062 }
2063
2064
2065 struct rtable_config *
2066 rt_new_table(struct symbol *s, uint addr_type)
2067 {
2068   /* Hack that allows to 'redefine' the master table */
2069   if ((s->class == SYM_TABLE) &&
2070       (s->def == new_config->def_tables[addr_type]) &&
2071       ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
2072     return s->def;
2073
2074   struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
2075
2076   cf_define_symbol(s, SYM_TABLE, c);
2077   c->name = s->name;
2078   c->addr_type = addr_type;
2079   c->gc_max_ops = 1000;
2080   c->gc_min_time = 5;
2081
2082   add_tail(&new_config->tables, &c->n);
2083
2084   /* First table of each type is kept as default */
2085   if (! new_config->def_tables[addr_type])
2086     new_config->def_tables[addr_type] = c;
2087
2088   return c;
2089 }
2090
2091 /**
2092  * rt_lock_table - lock a routing table
2093  * @r: routing table to be locked
2094  *
2095  * Lock a routing table, because it's in use by a protocol,
2096  * preventing it from being freed when it gets undefined in a new
2097  * configuration.
2098  */
2099 void
2100 rt_lock_table(rtable *r)
2101 {
2102   r->use_count++;
2103 }
2104
2105 /**
2106  * rt_unlock_table - unlock a routing table
2107  * @r: routing table to be unlocked
2108  *
2109  * Unlock a routing table formerly locked by rt_lock_table(),
2110  * that is decrease its use count and delete it if it's scheduled
2111  * for deletion by configuration changes.
2112  */
2113 void
2114 rt_unlock_table(rtable *r)
2115 {
2116   if (!--r->use_count && r->deleted)
2117     {
2118       struct config *conf = r->deleted;
2119       DBG("Deleting routing table %s\n", r->name);
2120       r->config->table = NULL;
2121       if (r->hostcache)
2122         rt_free_hostcache(r);
2123       rem_node(&r->n);
2124       fib_free(&r->fib);
2125       rfree(r->rt_event);
2126       mb_free(r);
2127       config_del_obstacle(conf);
2128     }
2129 }
2130
2131 static struct rtable_config *
2132 rt_find_table_config(struct config *cf, char *name)
2133 {
2134   struct symbol *sym = cf_find_symbol(cf, name);
2135   return (sym && (sym->class == SYM_TABLE)) ? sym->def : NULL;
2136 }
2137
2138 /**
2139  * rt_commit - commit new routing table configuration
2140  * @new: new configuration
2141  * @old: original configuration or %NULL if it's boot time config
2142  *
2143  * Scan differences between @old and @new configuration and modify
2144  * the routing tables according to these changes. If @new defines a
2145  * previously unknown table, create it, if it omits a table existing
2146  * in @old, schedule it for deletion (it gets deleted when all protocols
2147  * disconnect from it by calling rt_unlock_table()), if it exists
2148  * in both configurations, leave it unchanged.
2149  */
2150 void
2151 rt_commit(struct config *new, struct config *old)
2152 {
2153   struct rtable_config *o, *r;
2154
2155   DBG("rt_commit:\n");
2156   if (old)
2157     {
2158       WALK_LIST(o, old->tables)
2159         {
2160           rtable *ot = o->table;
2161           if (!ot->deleted)
2162             {
2163               r = rt_find_table_config(new, o->name);
2164               if (r && (r->addr_type == o->addr_type) && !new->shutdown)
2165                 {
2166                   DBG("\t%s: same\n", o->name);
2167                   r->table = ot;
2168                   ot->name = r->name;
2169                   ot->config = r;
2170                   if (o->sorted != r->sorted)
2171                     log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2172                 }
2173               else
2174                 {
2175                   DBG("\t%s: deleted\n", o->name);
2176                   ot->deleted = old;
2177                   config_add_obstacle(old);
2178                   rt_lock_table(ot);
2179                   rt_unlock_table(ot);
2180                 }
2181             }
2182         }
2183     }
2184
2185   WALK_LIST(r, new->tables)
2186     if (!r->table)
2187       {
2188         rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2189         DBG("\t%s: created\n", r->name);
2190         rt_setup(rt_table_pool, t, r);
2191         add_tail(&routing_tables, &t->n);
2192         r->table = t;
2193       }
2194   DBG("\tdone\n");
2195 }
2196
2197 static inline void
2198 do_feed_channel(struct channel *c, net *n, rte *e)
2199 {
2200   rte_update_lock();
2201   if (c->ra_mode == RA_ACCEPTED)
2202     rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2203   else if (c->ra_mode == RA_MERGED)
2204     rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2205   else /* RA_BASIC */
2206     rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
2207   rte_update_unlock();
2208 }
2209
2210 /**
2211  * rt_feed_channel - advertise all routes to a channel
2212  * @c: channel to be fed
2213  *
2214  * This function performs one pass of advertisement of routes to a channel that
2215  * is in the ES_FEEDING state. It is called by the protocol code as long as it
2216  * has something to do. (We avoid transferring all the routes in single pass in
2217  * order not to monopolize CPU time.)
2218  */
2219 int
2220 rt_feed_channel(struct channel *c)
2221 {
2222   struct fib_iterator *fit = &c->feed_fit;
2223   int max_feed = 256;
2224
2225   ASSERT(c->export_state == ES_FEEDING);
2226
2227   if (!c->feed_active)
2228     {
2229       FIB_ITERATE_INIT(fit, &c->table->fib);
2230       c->feed_active = 1;
2231     }
2232
2233   FIB_ITERATE_START(&c->table->fib, fit, net, n)
2234     {
2235       rte *e = n->routes;
2236       if (max_feed <= 0)
2237         {
2238           FIB_ITERATE_PUT(fit);
2239           return 0;
2240         }
2241
2242       /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2243
2244       if ((c->ra_mode == RA_OPTIMAL) ||
2245           (c->ra_mode == RA_ACCEPTED) ||
2246           (c->ra_mode == RA_MERGED))
2247         if (rte_is_valid(e))
2248           {
2249             /* In the meantime, the protocol may fell down */
2250             if (c->export_state != ES_FEEDING)
2251               goto done;
2252
2253             do_feed_channel(c, n, e);
2254             max_feed--;
2255           }
2256
2257       if (c->ra_mode == RA_ANY)
2258         for(e = n->routes; e; e = e->next)
2259           {
2260             /* In the meantime, the protocol may fell down */
2261             if (c->export_state != ES_FEEDING)
2262               goto done;
2263
2264             if (!rte_is_valid(e))
2265               continue;
2266
2267             do_feed_channel(c, n, e);
2268             max_feed--;
2269           }
2270     }
2271   FIB_ITERATE_END;
2272
2273 done:
2274   c->feed_active = 0;
2275   return 1;
2276 }
2277
2278 /**
2279  * rt_feed_baby_abort - abort protocol feeding
2280  * @c: channel
2281  *
2282  * This function is called by the protocol code when the protocol stops or
2283  * ceases to exist during the feeding.
2284  */
2285 void
2286 rt_feed_channel_abort(struct channel *c)
2287 {
2288   if (c->feed_active)
2289     {
2290       /* Unlink the iterator */
2291       fit_get(&c->table->fib, &c->feed_fit);
2292       c->feed_active = 0;
2293     }
2294 }
2295
2296
2297 int
2298 rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
2299 {
2300   struct rtable *tab = c->in_table;
2301   rte *old, **pos;
2302   net *net;
2303
2304   if (new)
2305   {
2306     net = net_get(tab, n);
2307
2308     if (!new->pref)
2309       new->pref = c->preference;
2310
2311     if (!rta_is_cached(new->attrs))
2312       new->attrs = rta_lookup(new->attrs);
2313   }
2314   else
2315   {
2316     net = net_find(tab, n);
2317
2318     if (!net)
2319       goto drop_withdraw;
2320   }
2321
2322   /* Find the old rte */
2323   for (pos = &net->routes; old = *pos; pos = &old->next)
2324     if (old->attrs->src == src)
2325     {
2326       if (new && rte_same(old, new))
2327         goto drop_update;
2328
2329       /* Remove the old rte */
2330       *pos = old->next;
2331       rte_free_quick(old);
2332       tab->rt_count--;
2333
2334       break;
2335     }
2336
2337   if (!new)
2338   {
2339     if (!old)
2340       goto drop_withdraw;
2341
2342     return 1;
2343   }
2344
2345   struct channel_limit *l = &c->rx_limit;
2346   if (l->action && !old)
2347   {
2348     if (tab->rt_count >= l->limit)
2349       channel_notify_limit(c, l, PLD_RX, tab->rt_count);
2350
2351     if (l->state == PLS_BLOCKED)
2352     {
2353       rte_trace_in(D_FILTERS, c->proto, new, "ignored [limit]");
2354       goto drop_update;
2355     }
2356   }
2357
2358   /* Insert the new rte */
2359   rte *e = rte_do_cow(new);
2360   e->flags |= REF_COW;
2361   e->net = net;
2362   e->sender = c;
2363   e->lastmod = current_time();
2364   e->next = *pos;
2365   *pos = e;
2366   tab->rt_count++;
2367   return 1;
2368
2369 drop_update:
2370   c->stats.imp_updates_received++;
2371   c->stats.imp_updates_ignored++;
2372   rte_free(new);
2373   return 0;
2374
2375 drop_withdraw:
2376   c->stats.imp_withdraws_received++;
2377   c->stats.imp_withdraws_ignored++;
2378   return 0;
2379 }
2380
2381 int
2382 rt_reload_channel(struct channel *c)
2383 {
2384   struct rtable *tab = c->in_table;
2385   struct fib_iterator *fit = &c->reload_fit;
2386   int max_feed = 64;
2387
2388   ASSERT(c->channel_state == CS_UP);
2389
2390   if (!c->reload_active)
2391   {
2392     FIB_ITERATE_INIT(fit, &tab->fib);
2393     c->reload_active = 1;
2394   }
2395
2396   FIB_ITERATE_START(&tab->fib, fit, net, n)
2397   {
2398     if (max_feed <= 0)
2399     {
2400       FIB_ITERATE_PUT(fit);
2401       return 0;
2402     }
2403
2404     for (rte *e = n->routes; e; e = e->next)
2405     {
2406       rte_update2(c, n->n.addr, rte_do_cow(e), e->attrs->src);
2407       max_feed--;
2408     }
2409   }
2410   FIB_ITERATE_END;
2411
2412   c->reload_active = 0;
2413   return 1;
2414 }
2415
2416 void
2417 rt_reload_channel_abort(struct channel *c)
2418 {
2419   if (c->reload_active)
2420   {
2421     /* Unlink the iterator */
2422     fit_get(&c->in_table->fib, &c->reload_fit);
2423     c->reload_active = 0;
2424   }
2425 }
2426
2427 void
2428 rt_prune_sync(rtable *t, int all)
2429 {
2430   FIB_WALK(&t->fib, net, n)
2431   {
2432     rte *e, **ee = &n->routes;
2433     while (e = *ee)
2434     {
2435       if (all || (e->flags & (REF_STALE | REF_DISCARD)))
2436       {
2437         *ee = e->next;
2438         rte_free_quick(e);
2439         t->rt_count--;
2440       }
2441       else
2442         ee = &e->next;
2443     }
2444   }
2445   FIB_WALK_END;
2446 }
2447
2448
2449 static inline u32
2450 hc_hash(ip_addr a, rtable *dep)
2451 {
2452   return ipa_hash(a) ^ ptr_hash(dep);
2453 }
2454
2455 static inline void
2456 hc_insert(struct hostcache *hc, struct hostentry *he)
2457 {
2458   uint k = he->hash_key >> hc->hash_shift;
2459   he->next = hc->hash_table[k];
2460   hc->hash_table[k] = he;
2461 }
2462
2463 static inline void
2464 hc_remove(struct hostcache *hc, struct hostentry *he)
2465 {
2466   struct hostentry **hep;
2467   uint k = he->hash_key >> hc->hash_shift;
2468
2469   for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2470   *hep = he->next;
2471 }
2472
2473 #define HC_DEF_ORDER 10
2474 #define HC_HI_MARK *4
2475 #define HC_HI_STEP 2
2476 #define HC_HI_ORDER 16                  /* Must be at most 16 */
2477 #define HC_LO_MARK /5
2478 #define HC_LO_STEP 2
2479 #define HC_LO_ORDER 10
2480
2481 static void
2482 hc_alloc_table(struct hostcache *hc, unsigned order)
2483 {
2484   uint hsize = 1 << order;
2485   hc->hash_order = order;
2486   hc->hash_shift = 32 - order;
2487   hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2488   hc->hash_min = (order <= HC_LO_ORDER) ?  0U : (hsize HC_LO_MARK);
2489
2490   hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2491 }
2492
2493 static void
2494 hc_resize(struct hostcache *hc, unsigned new_order)
2495 {
2496   struct hostentry **old_table = hc->hash_table;
2497   struct hostentry *he, *hen;
2498   uint old_size = 1 << hc->hash_order;
2499   uint i;
2500
2501   hc_alloc_table(hc, new_order);
2502   for (i = 0; i < old_size; i++)
2503     for (he = old_table[i]; he != NULL; he=hen)
2504       {
2505         hen = he->next;
2506         hc_insert(hc, he);
2507       }
2508   mb_free(old_table);
2509 }
2510
2511 static struct hostentry *
2512 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2513 {
2514   struct hostentry *he = sl_alloc(hc->slab);
2515
2516   *he = (struct hostentry) {
2517     .addr = a,
2518     .link = ll,
2519     .tab = dep,
2520     .hash_key = k,
2521   };
2522
2523   add_tail(&hc->hostentries, &he->ln);
2524   hc_insert(hc, he);
2525
2526   hc->hash_items++;
2527   if (hc->hash_items > hc->hash_max)
2528     hc_resize(hc, hc->hash_order + HC_HI_STEP);
2529
2530   return he;
2531 }
2532
2533 static void
2534 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2535 {
2536   rta_free(he->src);
2537
2538   rem_node(&he->ln);
2539   hc_remove(hc, he);
2540   sl_free(hc->slab, he);
2541
2542   hc->hash_items--;
2543   if (hc->hash_items < hc->hash_min)
2544     hc_resize(hc, hc->hash_order - HC_LO_STEP);
2545 }
2546
2547 static void
2548 rt_init_hostcache(rtable *tab)
2549 {
2550   struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2551   init_list(&hc->hostentries);
2552
2553   hc->hash_items = 0;
2554   hc_alloc_table(hc, HC_DEF_ORDER);
2555   hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2556
2557   hc->lp = lp_new(rt_table_pool, LP_GOOD_SIZE(1024));
2558   hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2559
2560   tab->hostcache = hc;
2561 }
2562
2563 static void
2564 rt_free_hostcache(rtable *tab)
2565 {
2566   struct hostcache *hc = tab->hostcache;
2567
2568   node *n;
2569   WALK_LIST(n, hc->hostentries)
2570     {
2571       struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2572       rta_free(he->src);
2573
2574       if (he->uc)
2575         log(L_ERR "Hostcache is not empty in table %s", tab->name);
2576     }
2577
2578   rfree(hc->slab);
2579   rfree(hc->lp);
2580   mb_free(hc->hash_table);
2581   mb_free(hc);
2582 }
2583
2584 static void
2585 rt_notify_hostcache(rtable *tab, net *net)
2586 {
2587   if (tab->hcu_scheduled)
2588     return;
2589
2590   if (trie_match_net(tab->hostcache->trie, net->n.addr))
2591     rt_schedule_hcu(tab);
2592 }
2593
2594 static int
2595 if_local_addr(ip_addr a, struct iface *i)
2596 {
2597   struct ifa *b;
2598
2599   WALK_LIST(b, i->addrs)
2600     if (ipa_equal(a, b->ip))
2601       return 1;
2602
2603   return 0;
2604 }
2605
2606 static u32
2607 rt_get_igp_metric(rte *rt)
2608 {
2609   eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2610
2611   if (ea)
2612     return ea->u.data;
2613
2614   rta *a = rt->attrs;
2615
2616 #ifdef CONFIG_OSPF
2617   if ((a->source == RTS_OSPF) ||
2618       (a->source == RTS_OSPF_IA) ||
2619       (a->source == RTS_OSPF_EXT1))
2620     return rt->u.ospf.metric1;
2621 #endif
2622
2623 #ifdef CONFIG_RIP
2624   if (a->source == RTS_RIP)
2625     return rt->u.rip.metric;
2626 #endif
2627
2628   if (a->source == RTS_DEVICE)
2629     return 0;
2630
2631   return IGP_METRIC_UNKNOWN;
2632 }
2633
2634 static int
2635 rt_update_hostentry(rtable *tab, struct hostentry *he)
2636 {
2637   rta *old_src = he->src;
2638   int direct = 0;
2639   int pxlen = 0;
2640
2641   /* Reset the hostentry */
2642   he->src = NULL;
2643   he->dest = RTD_UNREACHABLE;
2644   he->nexthop_linkable = 0;
2645   he->igp_metric = 0;
2646
2647   net_addr he_addr;
2648   net_fill_ip_host(&he_addr, he->addr);
2649   net *n = net_route(tab, &he_addr);
2650   if (n)
2651     {
2652       rte *e = n->routes;
2653       rta *a = e->attrs;
2654       pxlen = n->n.addr->pxlen;
2655
2656       if (a->hostentry)
2657         {
2658           /* Recursive route should not depend on another recursive route */
2659           log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2660               he->addr, n->n.addr);
2661           goto done;
2662         }
2663
2664       if (a->dest == RTD_UNICAST)
2665         {
2666           for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
2667             if (ipa_zero(nh->gw))
2668               {
2669                 if (if_local_addr(he->addr, nh->iface))
2670                   {
2671                     /* The host address is a local address, this is not valid */
2672                     log(L_WARN "Next hop address %I is a local address of iface %s",
2673                         he->addr, nh->iface->name);
2674                     goto done;
2675                   }
2676
2677                 direct++;
2678               }
2679         }
2680
2681       he->src = rta_clone(a);
2682       he->dest = a->dest;
2683       he->nexthop_linkable = !direct;
2684       he->igp_metric = rt_get_igp_metric(e);
2685     }
2686
2687 done:
2688   /* Add a prefix range to the trie */
2689   trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2690
2691   rta_free(old_src);
2692   return old_src != he->src;
2693 }
2694
2695 static void
2696 rt_update_hostcache(rtable *tab)
2697 {
2698   struct hostcache *hc = tab->hostcache;
2699   struct hostentry *he;
2700   node *n, *x;
2701
2702   /* Reset the trie */
2703   lp_flush(hc->lp);
2704   hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2705
2706   WALK_LIST_DELSAFE(n, x, hc->hostentries)
2707     {
2708       he = SKIP_BACK(struct hostentry, ln, n);
2709       if (!he->uc)
2710         {
2711           hc_delete_hostentry(hc, he);
2712           continue;
2713         }
2714
2715       if (rt_update_hostentry(tab, he))
2716         rt_schedule_nhu(he->tab);
2717     }
2718
2719   tab->hcu_scheduled = 0;
2720 }
2721
2722 struct hostentry *
2723 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2724 {
2725   struct hostentry *he;
2726
2727   if (!tab->hostcache)
2728     rt_init_hostcache(tab);
2729
2730   u32 k = hc_hash(a, dep);
2731   struct hostcache *hc = tab->hostcache;
2732   for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2733     if (ipa_equal(he->addr, a) && (he->tab == dep))
2734       return he;
2735
2736   he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
2737   rt_update_hostentry(tab, he);
2738   return he;
2739 }
2740
2741
2742 /*
2743  *  Documentation for functions declared inline in route.h
2744  */
2745 #if 0
2746
2747 /**
2748  * net_find - find a network entry
2749  * @tab: a routing table
2750  * @addr: address of the network
2751  *
2752  * net_find() looks up the given network in routing table @tab and
2753  * returns a pointer to its &net entry or %NULL if no such network
2754  * exists.
2755  */
2756 static inline net *net_find(rtable *tab, net_addr *addr)
2757 { DUMMY; }
2758
2759 /**
2760  * net_get - obtain a network entry
2761  * @tab: a routing table
2762  * @addr: address of the network
2763  *
2764  * net_get() looks up the given network in routing table @tab and
2765  * returns a pointer to its &net entry. If no such entry exists, it's
2766  * created.
2767  */
2768 static inline net *net_get(rtable *tab, net_addr *addr)
2769 { DUMMY; }
2770
2771 /**
2772  * rte_cow - copy a route for writing
2773  * @r: a route entry to be copied
2774  *
2775  * rte_cow() takes a &rte and prepares it for modification. The exact action
2776  * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2777  * just returned unchanged, else a new temporary entry with the same contents
2778  * is created.
2779  *
2780  * The primary use of this function is inside the filter machinery -- when
2781  * a filter wants to modify &rte contents (to change the preference or to
2782  * attach another set of attributes), it must ensure that the &rte is not
2783  * shared with anyone else (and especially that it isn't stored in any routing
2784  * table).
2785  *
2786  * Result: a pointer to the new writable &rte.
2787  */
2788 static inline rte * rte_cow(rte *r)
2789 { DUMMY; }
2790
2791 #endif