nest/rt-table.c

   1 /*
   2  *      BIRD -- Routing Tables
   3  *
   4  *      (c) 1998--2000 Martin Mares <mj@ucw.cz>
   5  *
   6  *      Can be freely distributed and used under the terms of the GNU GPL.
   7  */
   8
   9 /**
  10  * DOC: Routing tables
  11  *
  12  * Routing tables are probably the most important structures BIRD uses. They
  13  * hold all the information about known networks, the associated routes and
  14  * their attributes.
  15  *
  16  * There are multiple routing tables (a primary one together with any
  17  * number of secondary ones if requested by the configuration). Each table
  18  * is basically a FIB containing entries describing the individual
  19  * destination networks. For each network (represented by structure &net),
  20  * there is a one-way linked list of route entries (&rte), the first entry
  21  * on the list being the best one (i.e., the one we currently use
  22  * for routing), the order of the other ones is undetermined.
  23  *
  24  * The &rte contains information specific to the route (preference, protocol
  25  * metrics, time of last modification etc.) and a pointer to a &rta structure
  26  * (see the route attribute module for a precise explanation) holding the
  27  * remaining route attributes which are expected to be shared by multiple
  28  * routes in order to conserve memory.
  29  */
  30
  31 #undef LOCAL_DEBUG
  32
  33 #include "nest/bird.h"
  34 #include "nest/route.h"
  35 #include "nest/protocol.h"
  36 #include "nest/iface.h"
  37 #include "lib/resource.h"
  38 #include "lib/event.h"
  39 #include "lib/string.h"
  40 #include "conf/conf.h"
  41 #include "filter/filter.h"
  42 #include "lib/hash.h"
  43 #include "lib/string.h"
  44 #include "lib/alloca.h"
  45
  46 pool *rt_table_pool;
  47
  48 static slab *rte_slab;
  49 static linpool *rte_update_pool;
  50
  51 list routing_tables;
  52
  53 static void rt_free_hostcache(rtable *tab);
  54 static void rt_notify_hostcache(rtable *tab, net *net);
  55 static void rt_update_hostcache(rtable *tab);
  56 static void rt_next_hop_update(rtable *tab);
  57 static inline void rt_prune_table(rtable *tab);
  58
  59
  60 /* Like fib_route(), but skips empty net entries */
  61 static inline void *
  62 net_route_ip4(rtable *t, net_addr_ip4 *n)
  63 {
  64   net *r;
  65
  66   while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
  67   {
  68     n->pxlen--;
  69     ip4_clrbit(&n->prefix, n->pxlen);
  70   }
  71
  72   return r;
  73 }
  74
  75 static inline void *
  76 net_route_ip6(rtable *t, net_addr_ip6 *n)
  77 {
  78   net *r;
  79
  80   while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0))
  81   {
  82     n->pxlen--;
  83     ip6_clrbit(&n->prefix, n->pxlen);
  84   }
  85
  86   return r;
  87 }
  88
  89 static inline void *
  90 net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n)
  91 {
  92   struct fib_node *fn;
  93
  94   while (1)
  95   {
  96     net *best = NULL;
  97     int best_pxlen = 0;
  98
  99     /* We need to do dst first matching. Since sadr addresses are hashed on dst
 100        prefix only, find the hash table chain and go through it to find the
 101        match with the smallest matching src prefix. */
 102     for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next)
 103     {
 104       net_addr_ip6_sadr *a = (void *) fn->addr;
 105
 106       if (net_equal_dst_ip6_sadr(n, a) &&
 107           net_in_net_src_ip6_sadr(n, a) &&
 108           (a->src_pxlen >= best_pxlen))
 109       {
 110         best = fib_node_to_user(&t->fib, fn);
 111         best_pxlen = a->src_pxlen;
 112       }
 113     }
 114
 115     if (best)
 116       return best;
 117
 118     if (!n->dst_pxlen)
 119       break;
 120
 121     n->dst_pxlen--;
 122     ip6_clrbit(&n->dst_prefix, n->dst_pxlen);
 123   }
 124
 125   return NULL;
 126 }
 127
 128 void *
 129 net_route(rtable *tab, const net_addr *n)
 130 {
 131   ASSERT(tab->addr_type == n->type);
 132
 133   net_addr *n0 = alloca(n->length);
 134   net_copy(n0, n);
 135
 136   switch (n->type)
 137   {
 138   case NET_IP4:
 139   case NET_VPN4:
 140   case NET_ROA4:
 141     return net_route_ip4(tab, (net_addr_ip4 *) n0);
 142
 143   case NET_IP6:
 144   case NET_VPN6:
 145   case NET_ROA6:
 146     return net_route_ip6(tab, (net_addr_ip6 *) n0);
 147
 148   case NET_IP6_SADR:
 149     return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0);
 150
 151   default:
 152     return NULL;
 153   }
 154 }
 155
 156
 157 static int
 158 net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn)
 159 {
 160   struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0);
 161   struct fib_node *fn;
 162   int anything = 0;
 163
 164   while (1)
 165   {
 166     for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
 167     {
 168       net_addr_roa4 *roa = (void *) fn->addr;
 169       net *r = fib_node_to_user(&tab->fib, fn);
 170
 171       if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes))
 172       {
 173         anything = 1;
 174         if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
 175           return ROA_VALID;
 176       }
 177     }
 178
 179     if (n.pxlen == 0)
 180       break;
 181
 182     n.pxlen--;
 183     ip4_clrbit(&n.prefix, n.pxlen);
 184   }
 185
 186   return anything ? ROA_INVALID : ROA_UNKNOWN;
 187 }
 188
 189 static int
 190 net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn)
 191 {
 192   struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0);
 193   struct fib_node *fn;
 194   int anything = 0;
 195
 196   while (1)
 197   {
 198     for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next)
 199     {
 200       net_addr_roa6 *roa = (void *) fn->addr;
 201       net *r = fib_node_to_user(&tab->fib, fn);
 202
 203       if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes))
 204       {
 205         anything = 1;
 206         if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen))
 207           return ROA_VALID;
 208       }
 209     }
 210
 211     if (n.pxlen == 0)
 212       break;
 213
 214     n.pxlen--;
 215     ip6_clrbit(&n.prefix, n.pxlen);
 216   }
 217
 218   return anything ? ROA_INVALID : ROA_UNKNOWN;
 219 }
 220
 221 /**
 222  * roa_check - check validity of route origination in a ROA table
 223  * @tab: ROA table
 224  * @n: network prefix to check
 225  * @asn: AS number of network prefix
 226  *
 227  * Implements RFC 6483 route validation for the given network prefix. The
 228  * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given
 229  * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is
 230  * a candidate ROA with matching ASN and maxlen field greater than or equal to
 231  * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If
 232  * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID
 233  * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n
 234  * must have type NET_IP4 or NET_IP6, respectively.
 235  */
 236 int
 237 net_roa_check(rtable *tab, const net_addr *n, u32 asn)
 238 {
 239   if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4))
 240     return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn);
 241   else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6))
 242     return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn);
 243   else
 244     return ROA_UNKNOWN; /* Should not happen */
 245 }
 246
 247 /**
 248  * rte_find - find a route
 249  * @net: network node
 250  * @src: route source
 251  *
 252  * The rte_find() function returns a route for destination @net
 253  * which is from route source @src.
 254  */
 255 rte *
 256 rte_find(net *net, struct rte_src *src)
 257 {
 258   rte *e = net->routes;
 259
 260   while (e && e->attrs->src != src)
 261     e = e->next;
 262   return e;
 263 }
 264
 265 /**
 266  * rte_get_temp - get a temporary &rte
 267  * @a: attributes to assign to the new route (a &rta; in case it's
 268  * un-cached, rte_update() will create a cached copy automatically)
 269  *
 270  * Create a temporary &rte and bind it with the attributes @a.
 271  * Also set route preference to the default preference set for
 272  * the protocol.
 273  */
 274 rte *
 275 rte_get_temp(rta *a)
 276 {
 277   rte *e = sl_alloc(rte_slab);
 278
 279   e->attrs = a;
 280   e->flags = 0;
 281   e->pref = 0;
 282   return e;
 283 }
 284
 285 rte *
 286 rte_do_cow(rte *r)
 287 {
 288   rte *e = sl_alloc(rte_slab);
 289
 290   memcpy(e, r, sizeof(rte));
 291   e->attrs = rta_clone(r->attrs);
 292   e->flags = 0;
 293   return e;
 294 }
 295
 296 /**
 297  * rte_cow_rta - get a private writable copy of &rte with writable &rta
 298  * @r: a route entry to be copied
 299  * @lp: a linpool from which to allocate &rta
 300  *
 301  * rte_cow_rta() takes a &rte and prepares it and associated &rta for
 302  * modification. There are three possibilities: First, both &rte and &rta are
 303  * private copies, in that case they are returned unchanged.  Second, &rte is
 304  * private copy, but &rta is cached, in that case &rta is duplicated using
 305  * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
 306  * both structures are duplicated by rte_do_cow() and rta_do_cow().
 307  *
 308  * Note that in the second case, cached &rta loses one reference, while private
 309  * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
 310  * nexthops, ...) with it. To work properly, original shared &rta should have
 311  * another reference during the life of created private copy.
 312  *
 313  * Result: a pointer to the new writable &rte with writable &rta.
 314  */
 315 rte *
 316 rte_cow_rta(rte *r, linpool *lp)
 317 {
 318   if (!rta_is_cached(r->attrs))
 319     return r;
 320
 321   r = rte_cow(r);
 322   rta *a = rta_do_cow(r->attrs, lp);
 323   rta_free(r->attrs);
 324   r->attrs = a;
 325   return r;
 326 }
 327
 328 static int                              /* Actually better or at least as good as */
 329 rte_better(rte *new, rte *old)
 330 {
 331   int (*better)(rte *, rte *);
 332
 333   if (!rte_is_valid(old))
 334     return 1;
 335   if (!rte_is_valid(new))
 336     return 0;
 337
 338   if (new->pref > old->pref)
 339     return 1;
 340   if (new->pref < old->pref)
 341     return 0;
 342   if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
 343     {
 344       /*
 345        *  If the user has configured protocol preferences, so that two different protocols
 346        *  have the same preference, try to break the tie by comparing addresses. Not too
 347        *  useful, but keeps the ordering of routes unambiguous.
 348        */
 349       return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
 350     }
 351   if (better = new->attrs->src->proto->rte_better)
 352     return better(new, old);
 353   return 0;
 354 }
 355
 356 static int
 357 rte_mergable(rte *pri, rte *sec)
 358 {
 359   int (*mergable)(rte *, rte *);
 360
 361   if (!rte_is_valid(pri) || !rte_is_valid(sec))
 362     return 0;
 363
 364   if (pri->pref != sec->pref)
 365     return 0;
 366
 367   if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
 368     return 0;
 369
 370   if (mergable = pri->attrs->src->proto->rte_mergable)
 371     return mergable(pri, sec);
 372
 373   return 0;
 374 }
 375
 376 static void
 377 rte_trace(struct proto *p, rte *e, int dir, char *msg)
 378 {
 379   log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest));
 380 }
 381
 382 static inline void
 383 rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
 384 {
 385   if (p->debug & flag)
 386     rte_trace(p, e, '>', msg);
 387 }
 388
 389 static inline void
 390 rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
 391 {
 392   if (p->debug & flag)
 393     rte_trace(p, e, '<', msg);
 394 }
 395
 396 static rte *
 397 export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent)
 398 {
 399   struct proto *p = c->proto;
 400   struct filter *filter = c->out_filter;
 401   struct proto_stats *stats = &c->stats;
 402   rte *rt;
 403   int v;
 404
 405   rt = rt0;
 406   *rt_free = NULL;
 407
 408   v = p->preexport ? p->preexport(p, &rt, pool) : 0;
 409   if (v < 0)
 410     {
 411       if (silent)
 412         goto reject;
 413
 414       stats->exp_updates_rejected++;
 415       if (v == RIC_REJECT)
 416         rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
 417       goto reject;
 418     }
 419   if (v > 0)
 420     {
 421       if (!silent)
 422         rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
 423       goto accept;
 424     }
 425
 426   rte_make_tmp_attrs(&rt, pool);
 427
 428   v = filter && ((filter == FILTER_REJECT) ||
 429                  (f_run(filter, &rt, pool,
 430                         (silent ? FF_SILENT : 0)) > F_ACCEPT));
 431   if (v)
 432     {
 433       if (silent)
 434         goto reject;
 435
 436       stats->exp_updates_filtered++;
 437       rte_trace_out(D_FILTERS, p, rt, "filtered out");
 438       goto reject;
 439     }
 440
 441  accept:
 442   if (rt != rt0)
 443     *rt_free = rt;
 444   return rt;
 445
 446  reject:
 447   /* Discard temporary rte */
 448   if (rt != rt0)
 449     rte_free(rt);
 450   return NULL;
 451 }
 452
 453 static inline rte *
 454 export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent)
 455 {
 456   return export_filter_(c, rt0, rt_free, rte_update_pool, silent);
 457 }
 458
 459 static void
 460 do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed)
 461 {
 462   struct proto *p = c->proto;
 463   struct proto_stats *stats = &c->stats;
 464
 465
 466   /*
 467    * First, apply export limit.
 468    *
 469    * Export route limits has several problems. Because exp_routes
 470    * counter is reset before refeed, we don't really know whether
 471    * limit is breached and whether the update is new or not. Therefore
 472    * the number of really exported routes may exceed the limit
 473    * temporarily (routes exported before and new routes in refeed).
 474    *
 475    * Minor advantage is that if the limit is decreased and refeed is
 476    * requested, the number of exported routes really decrease.
 477    *
 478    * Second problem is that with export limits, we don't know whether
 479    * old was really exported (it might be blocked by limit). When a
 480    * withdraw is exported, we announce it even when the previous
 481    * update was blocked. This is not a big issue, but the same problem
 482    * is in updating exp_routes counter. Therefore, to be consistent in
 483    * increases and decreases of exp_routes, we count exported routes
 484    * regardless of blocking by limits.
 485    *
 486    * Similar problem is in handling updates - when a new route is
 487    * received and blocking is active, the route would be blocked, but
 488    * when an update for the route will be received later, the update
 489    * would be propagated (as old != NULL). Therefore, we have to block
 490    * also non-new updates (contrary to import blocking).
 491    */
 492
 493   struct channel_limit *l = &c->out_limit;
 494   if (l->action && new)
 495     {
 496       if ((!old || refeed) && (stats->exp_routes >= l->limit))
 497         channel_notify_limit(c, l, PLD_OUT, stats->exp_routes);
 498
 499       if (l->state == PLS_BLOCKED)
 500         {
 501           stats->exp_routes++;  /* see note above */
 502           stats->exp_updates_rejected++;
 503           rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
 504           new = NULL;
 505
 506           if (!old)
 507             return;
 508         }
 509     }
 510
 511
 512   if (new)
 513     stats->exp_updates_accepted++;
 514   else
 515     stats->exp_withdraws_accepted++;
 516
 517   /* Hack: We do not decrease exp_routes during refeed, we instead
 518      reset exp_routes at the start of refeed. */
 519   if (new)
 520     stats->exp_routes++;
 521   if (old && !refeed)
 522     stats->exp_routes--;
 523
 524   if (p->debug & D_ROUTES)
 525     {
 526       if (new && old)
 527         rte_trace_out(D_ROUTES, p, new, "replaced");
 528       else if (new)
 529         rte_trace_out(D_ROUTES, p, new, "added");
 530       else if (old)
 531         rte_trace_out(D_ROUTES, p, old, "removed");
 532     }
 533   p->rt_notify(p, c, net, new, old);
 534 }
 535
 536 static void
 537 rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed)
 538 {
 539   struct proto *p = c->proto;
 540
 541   rte *new = new0;
 542   rte *old = old0;
 543   rte *new_free = NULL;
 544   rte *old_free = NULL;
 545
 546   if (new)
 547     c->stats.exp_updates_received++;
 548   else
 549     c->stats.exp_withdraws_received++;
 550
 551   /*
 552    * This is a tricky part - we don't know whether route 'old' was exported to
 553    * protocol 'p' or was filtered by the export filter. We try to run the export
 554    * filter to know this to have a correct value in 'old' argument of rte_update
 555    * (and proper filter value).
 556    *
 557    * This is broken because 'configure soft' may change filters but keep routes.
 558    * Refeed cycle is expected to be called after change of the filters and with
 559    * old == new, therefore we do not even try to run the filter on an old route.
 560    * This may lead to 'spurious withdraws' but ensure that there are no 'missing
 561    * withdraws'.
 562    *
 563    * This is not completely safe as there is a window between reconfiguration
 564    * and the end of refeed - if a newly filtered route disappears during this
 565    * period, proper withdraw is not sent (because old would be also filtered)
 566    * and the route is not refeeded (because it disappeared before that).
 567    * Therefore, we also do not try to run the filter on old routes that are
 568    * older than the last filter change.
 569    */
 570
 571   if (new)
 572     new = export_filter(c, new, &new_free, 0);
 573
 574   if (old && !(refeed || (old->lastmod <= c->last_tx_filter_change)))
 575     old = export_filter(c, old, &old_free, 1);
 576
 577   if (!new && !old)
 578   {
 579     /*
 580      * As mentioned above, 'old' value may be incorrect in some race conditions.
 581      * We generally ignore it with the exception of withdraw to pipe protocol.
 582      * In that case we rather propagate unfiltered withdraws regardless of
 583      * export filters to ensure that when a protocol is flushed, its routes are
 584      * removed from all tables. Possible spurious unfiltered withdraws are not
 585      * problem here as they are ignored if there is no corresponding route at
 586      * the other end of the pipe. We directly call rt_notify() hook instead of
 587      * do_rt_notify() to avoid logging and stat counters.
 588      */
 589
 590 #ifdef CONFIG_PIPE
 591     if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
 592       p->rt_notify(p, c, net, NULL, old0);
 593 #endif
 594
 595     return;
 596   }
 597
 598   do_rt_notify(c, net, new, old, refeed);
 599
 600   /* Discard temporary rte's */
 601   if (new_free)
 602     rte_free(new_free);
 603   if (old_free)
 604     rte_free(old_free);
 605 }
 606
 607 static void
 608 rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
 609 {
 610   // struct proto *p = c->proto;
 611
 612   rte *r;
 613   rte *new_best = NULL;
 614   rte *old_best = NULL;
 615   rte *new_free = NULL;
 616   rte *old_free = NULL;
 617
 618   /* Used to track whether we met old_changed position. If before_old is NULL
 619      old_changed was the first and we met it implicitly before current best route. */
 620   int old_meet = old_changed && !before_old;
 621
 622   /* Note that before_old is either NULL or valid (not rejected) route.
 623      If old_changed is valid, before_old have to be too. If old changed route
 624      was not valid, caller must use NULL for both old_changed and before_old. */
 625
 626   if (new_changed)
 627     c->stats.exp_updates_received++;
 628   else
 629     c->stats.exp_withdraws_received++;
 630
 631   /* First, find the new_best route - first accepted by filters */
 632   for (r=net->routes; rte_is_valid(r); r=r->next)
 633     {
 634       if (new_best = export_filter(c, r, &new_free, 0))
 635         break;
 636
 637       /* Note if we walked around the position of old_changed route */
 638       if (r == before_old)
 639         old_meet = 1;
 640     }
 641
 642   /*
 643    * Second, handle the feed case. That means we do not care for
 644    * old_best. It is NULL for feed, and the new_best for refeed.
 645    * For refeed, there is a hack similar to one in rt_notify_basic()
 646    * to ensure withdraws in case of changed filters
 647    */
 648   if (feed)
 649     {
 650       if (feed == 2)    /* refeed */
 651         old_best = new_best ? new_best :
 652           (rte_is_valid(net->routes) ? net->routes : NULL);
 653       else
 654         old_best = NULL;
 655
 656       if (!new_best && !old_best)
 657         return;
 658
 659       goto found;
 660     }
 661
 662   /*
 663    * Now, we find the old_best route. Generally, it is the same as the
 664    * new_best, unless new_best is the same as new_changed or
 665    * old_changed is accepted before new_best.
 666    *
 667    * There are four cases:
 668    *
 669    * - We would find and accept old_changed before new_best, therefore
 670    *   old_changed is old_best. In remaining cases we suppose this
 671    *   is not true.
 672    *
 673    * - We found no new_best, therefore there is also no old_best and
 674    *   we ignore this withdraw.
 675    *
 676    * - We found new_best different than new_changed, therefore
 677    *   old_best is the same as new_best and we ignore this update.
 678    *
 679    * - We found new_best the same as new_changed, therefore it cannot
 680    *   be old_best and we have to continue search for old_best.
 681    *
 682    * There is also a hack to ensure consistency in case of changed filters.
 683    * It does not find the proper old_best, just selects a non-NULL route.
 684    */
 685
 686   /* Hack for changed filters */
 687   if (old_changed && (old_changed->lastmod <= c->last_tx_filter_change))
 688     {
 689       old_best = old_changed;
 690       goto found;
 691     }
 692
 693   /* First case */
 694   if (old_meet)
 695     if (old_best = export_filter(c, old_changed, &old_free, 1))
 696       goto found;
 697
 698   /* Second case */
 699   if (!new_best)
 700     return;
 701
 702   /* Third case, we use r instead of new_best, because export_filter() could change it */
 703   if (r != new_changed)
 704     {
 705       if (new_free)
 706         rte_free(new_free);
 707       return;
 708     }
 709
 710   /* Fourth case */
 711   for (r=r->next; rte_is_valid(r); r=r->next)
 712     {
 713       if (old_best = export_filter(c, r, &old_free, 1))
 714         goto found;
 715
 716       if (r == before_old)
 717         if (old_best = export_filter(c, old_changed, &old_free, 1))
 718           goto found;
 719     }
 720
 721   /* Implicitly, old_best is NULL and new_best is non-NULL */
 722
 723  found:
 724   do_rt_notify(c, net, new_best, old_best, (feed == 2));
 725
 726   /* Discard temporary rte's */
 727   if (new_free)
 728     rte_free(new_free);
 729   if (old_free)
 730     rte_free(old_free);
 731 }
 732
 733
 734 static struct nexthop *
 735 nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max)
 736 {
 737   return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool);
 738 }
 739
 740 rte *
 741 rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent)
 742 {
 743   // struct proto *p = c->proto;
 744   struct nexthop *nhs = NULL;
 745   rte *best0, *best, *rt0, *rt, *tmp;
 746
 747   best0 = net->routes;
 748   *rt_free = NULL;
 749
 750   if (!rte_is_valid(best0))
 751     return NULL;
 752
 753   best = export_filter_(c, best0, rt_free, pool, silent);
 754
 755   if (!best || !rte_is_reachable(best))
 756     return best;
 757
 758   for (rt0 = best0->next; rt0; rt0 = rt0->next)
 759   {
 760     if (!rte_mergable(best0, rt0))
 761       continue;
 762
 763     rt = export_filter_(c, rt0, &tmp, pool, 1);
 764
 765     if (!rt)
 766       continue;
 767
 768     if (rte_is_reachable(rt))
 769       nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit);
 770
 771     if (tmp)
 772       rte_free(tmp);
 773   }
 774
 775   if (nhs)
 776   {
 777     nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit);
 778
 779     if (nhs->next)
 780     {
 781       best = rte_cow_rta(best, pool);
 782       nexthop_link(best->attrs, nhs);
 783     }
 784   }
 785
 786   if (best != best0)
 787     *rt_free = best;
 788
 789   return best;
 790 }
 791
 792
 793 static void
 794 rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed,
 795                  rte *new_best, rte*old_best, int refeed)
 796 {
 797   // struct proto *p = c->proto;
 798
 799   rte *new_best_free = NULL;
 800   rte *old_best_free = NULL;
 801   rte *new_changed_free = NULL;
 802   rte *old_changed_free = NULL;
 803
 804   /* We assume that all rte arguments are either NULL or rte_is_valid() */
 805
 806   /* This check should be done by the caller */
 807   if (!new_best && !old_best)
 808     return;
 809
 810   /* Check whether the change is relevant to the merged route */
 811   if ((new_best == old_best) && !refeed)
 812   {
 813     new_changed = rte_mergable(new_best, new_changed) ?
 814       export_filter(c, new_changed, &new_changed_free, 1) : NULL;
 815
 816     old_changed = rte_mergable(old_best, old_changed) ?
 817       export_filter(c, old_changed, &old_changed_free, 1) : NULL;
 818
 819     if (!new_changed && !old_changed)
 820       return;
 821   }
 822
 823   if (new_best)
 824     c->stats.exp_updates_received++;
 825   else
 826     c->stats.exp_withdraws_received++;
 827
 828   /* Prepare new merged route */
 829   if (new_best)
 830     new_best = rt_export_merged(c, net, &new_best_free, rte_update_pool, 0);
 831
 832   /* Prepare old merged route (without proper merged next hops) */
 833   /* There are some issues with running filter on old route - see rt_notify_basic() */
 834   if (old_best && !refeed)
 835     old_best = export_filter(c, old_best, &old_best_free, 1);
 836
 837   if (new_best || old_best)
 838     do_rt_notify(c, net, new_best, old_best, refeed);
 839
 840   /* Discard temporary rte's */
 841   if (new_best_free)
 842     rte_free(new_best_free);
 843   if (old_best_free)
 844     rte_free(old_best_free);
 845   if (new_changed_free)
 846     rte_free(new_changed_free);
 847   if (old_changed_free)
 848     rte_free(old_changed_free);
 849 }
 850
 851
 852 /**
 853  * rte_announce - announce a routing table change
 854  * @tab: table the route has been added to
 855  * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
 856  * @net: network in question
 857  * @new: the new route to be announced
 858  * @old: the previous route for the same network
 859  * @new_best: the new best route for the same network
 860  * @old_best: the previous best route for the same network
 861  * @before_old: The previous route before @old for the same network.
 862  *              If @before_old is NULL @old was the first.
 863  *
 864  * This function gets a routing table update and announces it
 865  * to all protocols that acccepts given type of route announcement
 866  * and are connected to the same table by their announcement hooks.
 867  *
 868  * Route announcement of type %RA_OPTIMAL si generated when optimal
 869  * route (in routing table @tab) changes. In that case @old stores the
 870  * old optimal route.
 871  *
 872  * Route announcement of type %RA_ANY si generated when any route (in
 873  * routing table @tab) changes In that case @old stores the old route
 874  * from the same protocol.
 875  *
 876  * For each appropriate protocol, we first call its preexport()
 877  * hook which performs basic checks on the route (each protocol has a
 878  * right to veto or force accept of the route before any filter is
 879  * asked) and adds default values of attributes specific to the new
 880  * protocol (metrics, tags etc.).  Then it consults the protocol's
 881  * export filter and if it accepts the route, the rt_notify() hook of
 882  * the protocol gets called.
 883  */
 884 static void
 885 rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
 886              rte *new_best, rte *old_best, rte *before_old)
 887 {
 888   if (!rte_is_valid(new))
 889     new = NULL;
 890
 891   if (!rte_is_valid(old))
 892     old = before_old = NULL;
 893
 894   if (!rte_is_valid(new_best))
 895     new_best = NULL;
 896
 897   if (!rte_is_valid(old_best))
 898     old_best = NULL;
 899
 900   if (!old && !new)
 901     return;
 902
 903   if ((type == RA_OPTIMAL) && tab->hostcache)
 904     rt_notify_hostcache(tab, net);
 905
 906   struct channel *c; node *n;
 907   WALK_LIST2(c, n, tab->channels, table_node)
 908     {
 909       if (c->export_state == ES_DOWN)
 910         continue;
 911
 912       if (c->ra_mode == type)
 913         if (type == RA_ACCEPTED)
 914           rt_notify_accepted(c, net, new, old, before_old, 0);
 915         else if (type == RA_MERGED)
 916           rt_notify_merged(c, net, new, old, new_best, old_best, 0);
 917         else
 918           rt_notify_basic(c, net, new, old, 0);
 919     }
 920 }
 921
 922 static inline int
 923 rte_validate(rte *e)
 924 {
 925   int c;
 926   net *n = e->net;
 927
 928   if (!net_validate(n->n.addr))
 929   {
 930     log(L_WARN "Ignoring bogus prefix %N received via %s",
 931         n->n.addr, e->sender->proto->name);
 932     return 0;
 933   }
 934
 935   /* FIXME: better handling different nettypes */
 936   c = !net_is_flow(n->n.addr) ?
 937     net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE);
 938   if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
 939   {
 940     log(L_WARN "Ignoring bogus route %N received via %s",
 941         n->n.addr, e->sender->proto->name);
 942     return 0;
 943   }
 944
 945   if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest)
 946   {
 947     log(L_WARN "Ignoring route %N with invalid dest %d received via %s",
 948         n->n.addr, e->attrs->dest, e->sender->proto->name);
 949     return 0;
 950   }
 951
 952   if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh)))
 953   {
 954     log(L_WARN "Ignoring unsorted multipath route %N received via %s",
 955         n->n.addr, e->sender->proto->name);
 956     return 0;
 957   }
 958
 959   return 1;
 960 }
 961
 962 /**
 963  * rte_free - delete a &rte
 964  * @e: &rte to be deleted
 965  *
 966  * rte_free() deletes the given &rte from the routing table it's linked to.
 967  */
 968 void
 969 rte_free(rte *e)
 970 {
 971   if (rta_is_cached(e->attrs))
 972     rta_free(e->attrs);
 973   sl_free(rte_slab, e);
 974 }
 975
 976 static inline void
 977 rte_free_quick(rte *e)
 978 {
 979   rta_free(e->attrs);
 980   sl_free(rte_slab, e);
 981 }
 982
 983 static int
 984 rte_same(rte *x, rte *y)
 985 {
 986   return
 987     x->attrs == y->attrs &&
 988     x->flags == y->flags &&
 989     x->pflags == y->pflags &&
 990     x->pref == y->pref &&
 991     (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
 992 }
 993
 994 static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }
 995
 996 static void
 997 rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src)
 998 {
 999   struct proto *p = c->proto;
1000   struct rtable *table = c->table;
1001   struct proto_stats *stats = &c->stats;
1002   static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
1003   rte *before_old = NULL;
1004   rte *old_best = net->routes;
1005   rte *old = NULL;
1006   rte **k;
1007
1008   k = &net->routes;                     /* Find and remove original route from the same protocol */
1009   while (old = *k)
1010     {
1011       if (old->attrs->src == src)
1012         {
1013           /* If there is the same route in the routing table but from
1014            * a different sender, then there are two paths from the
1015            * source protocol to this routing table through transparent
1016            * pipes, which is not allowed.
1017            *
1018            * We log that and ignore the route. If it is withdraw, we
1019            * ignore it completely (there might be 'spurious withdraws',
1020            * see FIXME in do_rte_announce())
1021            */
1022           if (old->sender->proto != p)
1023             {
1024               if (new)
1025                 {
1026                   log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s",
1027                       net->n.addr, table->name);
1028                   rte_free_quick(new);
1029                 }
1030               return;
1031             }
1032
1033           if (new && rte_same(old, new))
1034             {
1035               /* No changes, ignore the new route */
1036
1037               if (!rte_is_filtered(new))
1038                 {
1039                   stats->imp_updates_ignored++;
1040                   rte_trace_in(D_ROUTES, p, new, "ignored");
1041                 }
1042
1043               rte_free_quick(new);
1044               return;
1045             }
1046           *k = old->next;
1047           table->rt_count--;
1048           break;
1049         }
1050       k = &old->next;
1051       before_old = old;
1052     }
1053
1054   if (!old)
1055     before_old = NULL;
1056
1057   if (!old && !new)
1058     {
1059       stats->imp_withdraws_ignored++;
1060       return;
1061     }
1062
1063   int new_ok = rte_is_ok(new);
1064   int old_ok = rte_is_ok(old);
1065
1066   struct channel_limit *l = &c->rx_limit;
1067   if (l->action && !old && new && !c->in_table)
1068     {
1069       u32 all_routes = stats->imp_routes + stats->filt_routes;
1070
1071       if (all_routes >= l->limit)
1072         channel_notify_limit(c, l, PLD_RX, all_routes);
1073
1074       if (l->state == PLS_BLOCKED)
1075         {
1076           /* In receive limit the situation is simple, old is NULL so
1077              we just free new and exit like nothing happened */
1078
1079           stats->imp_updates_ignored++;
1080           rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1081           rte_free_quick(new);
1082           return;
1083         }
1084     }
1085
1086   l = &c->in_limit;
1087   if (l->action && !old_ok && new_ok)
1088     {
1089       if (stats->imp_routes >= l->limit)
1090         channel_notify_limit(c, l, PLD_IN, stats->imp_routes);
1091
1092       if (l->state == PLS_BLOCKED)
1093         {
1094           /* In import limit the situation is more complicated. We
1095              shouldn't just drop the route, we should handle it like
1096              it was filtered. We also have to continue the route
1097              processing if old or new is non-NULL, but we should exit
1098              if both are NULL as this case is probably assumed to be
1099              already handled. */
1100
1101           stats->imp_updates_ignored++;
1102           rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
1103
1104           if (c->in_keep_filtered)
1105             new->flags |= REF_FILTERED;
1106           else
1107             { rte_free_quick(new); new = NULL; }
1108
1109           /* Note that old && !new could be possible when
1110              c->in_keep_filtered changed in the recent past. */
1111
1112           if (!old && !new)
1113             return;
1114
1115           new_ok = 0;
1116           goto skip_stats1;
1117         }
1118     }
1119
1120   if (new_ok)
1121     stats->imp_updates_accepted++;
1122   else if (old_ok)
1123     stats->imp_withdraws_accepted++;
1124   else
1125     stats->imp_withdraws_ignored++;
1126
1127  skip_stats1:
1128
1129   if (new)
1130     rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
1131   if (old)
1132     rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
1133
1134   if (table->config->sorted)
1135     {
1136       /* If routes are sorted, just insert new route to appropriate position */
1137       if (new)
1138         {
1139           if (before_old && !rte_better(new, before_old))
1140             k = &before_old->next;
1141           else
1142             k = &net->routes;
1143
1144           for (; *k; k=&(*k)->next)
1145             if (rte_better(new, *k))
1146               break;
1147
1148           new->next = *k;
1149           *k = new;
1150           table->rt_count++;
1151         }
1152     }
1153   else
1154     {
1155       /* If routes are not sorted, find the best route and move it on
1156          the first position. There are several optimized cases. */
1157
1158       if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1159         goto do_recalculate;
1160
1161       if (new && rte_better(new, old_best))
1162         {
1163           /* The first case - the new route is cleary optimal,
1164              we link it at the first position */
1165
1166           new->next = net->routes;
1167           net->routes = new;
1168           table->rt_count++;
1169         }
1170       else if (old == old_best)
1171         {
1172           /* The second case - the old best route disappeared, we add the
1173              new route (if we have any) to the list (we don't care about
1174              position) and then we elect the new optimal route and relink
1175              that route at the first position and announce it. New optimal
1176              route might be NULL if there is no more routes */
1177
1178         do_recalculate:
1179           /* Add the new route to the list */
1180           if (new)
1181             {
1182               new->next = net->routes;
1183               net->routes = new;
1184               table->rt_count++;
1185             }
1186
1187           /* Find a new optimal route (if there is any) */
1188           if (net->routes)
1189             {
1190               rte **bp = &net->routes;
1191               for (k=&(*bp)->next; *k; k=&(*k)->next)
1192                 if (rte_better(*k, *bp))
1193                   bp = k;
1194
1195               /* And relink it */
1196               rte *best = *bp;
1197               *bp = best->next;
1198               best->next = net->routes;
1199               net->routes = best;
1200             }
1201         }
1202       else if (new)
1203         {
1204           /* The third case - the new route is not better than the old
1205              best route (therefore old_best != NULL) and the old best
1206              route was not removed (therefore old_best == net->routes).
1207              We just link the new route after the old best route. */
1208
1209           ASSERT(net->routes != NULL);
1210           new->next = net->routes->next;
1211           net->routes->next = new;
1212           table->rt_count++;
1213         }
1214       /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1215     }
1216
1217   if (new)
1218     new->lastmod = current_time();
1219
1220   /* Log the route change */
1221   if (p->debug & D_ROUTES)
1222     {
1223       if (new_ok)
1224         rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
1225       else if (old_ok)
1226         {
1227           if (old != old_best)
1228             rte_trace(p, old, '>', "removed");
1229           else if (rte_is_ok(net->routes))
1230             rte_trace(p, old, '>', "removed [replaced]");
1231           else
1232             rte_trace(p, old, '>', "removed [sole]");
1233         }
1234     }
1235
1236   /* Propagate the route change */
1237   rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1238   if (net->routes != old_best)
1239     rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1240   if (table->config->sorted)
1241     rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
1242   rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1243
1244   if (!net->routes &&
1245       (table->gc_counter++ >= table->config->gc_max_ops) &&
1246       (table->gc_time + table->config->gc_min_time <= current_time()))
1247     rt_schedule_prune(table);
1248
1249   if (old_ok && p->rte_remove)
1250     p->rte_remove(net, old);
1251   if (new_ok && p->rte_insert)
1252     p->rte_insert(net, new);
1253
1254   if (old)
1255     rte_free_quick(old);
1256 }
1257
1258 static int rte_update_nest_cnt;         /* Nesting counter to allow recursive updates */
1259
1260 static inline void
1261 rte_update_lock(void)
1262 {
1263   rte_update_nest_cnt++;
1264 }
1265
1266 static inline void
1267 rte_update_unlock(void)
1268 {
1269   if (!--rte_update_nest_cnt)
1270     lp_flush(rte_update_pool);
1271 }
1272
1273 static inline void
1274 rte_hide_dummy_routes(net *net, rte **dummy)
1275 {
1276   if (net->routes && net->routes->attrs->source == RTS_DUMMY)
1277   {
1278     *dummy = net->routes;
1279     net->routes = (*dummy)->next;
1280   }
1281 }
1282
1283 static inline void
1284 rte_unhide_dummy_routes(net *net, rte **dummy)
1285 {
1286   if (*dummy)
1287   {
1288     (*dummy)->next = net->routes;
1289     net->routes = *dummy;
1290   }
1291 }
1292
1293 /**
1294  * rte_update - enter a new update to a routing table
1295  * @table: table to be updated
1296  * @c: channel doing the update
1297  * @net: network node
1298  * @p: protocol submitting the update
1299  * @src: protocol originating the update
1300  * @new: a &rte representing the new route or %NULL for route removal.
1301  *
1302  * This function is called by the routing protocols whenever they discover
1303  * a new route or wish to update/remove an existing route. The right announcement
1304  * sequence is to build route attributes first (either un-cached with @aflags set
1305  * to zero or a cached one using rta_lookup(); in this case please note that
1306  * you need to increase the use count of the attributes yourself by calling
1307  * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
1308  * the appropriate data and finally submit the new &rte by calling rte_update().
1309  *
1310  * @src specifies the protocol that originally created the route and the meaning
1311  * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
1312  * same value as @new->attrs->proto. @p specifies the protocol that called
1313  * rte_update(). In most cases it is the same protocol as @src. rte_update()
1314  * stores @p in @new->sender;
1315  *
1316  * When rte_update() gets any route, it automatically validates it (checks,
1317  * whether the network and next hop address are valid IP addresses and also
1318  * whether a normal routing protocol doesn't try to smuggle a host or link
1319  * scope route to the table), converts all protocol dependent attributes stored
1320  * in the &rte to temporary extended attributes, consults import filters of the
1321  * protocol to see if the route should be accepted and/or its attributes modified,
1322  * stores the temporary attributes back to the &rte.
1323  *
1324  * Now, having a "public" version of the route, we
1325  * automatically find any old route defined by the protocol @src
1326  * for network @n, replace it by the new one (or removing it if @new is %NULL),
1327  * recalculate the optimal route for this destination and finally broadcast
1328  * the change (if any) to all routing protocols by calling rte_announce().
1329  *
1330  * All memory used for attribute lists and other temporary allocations is taken
1331  * from a special linear pool @rte_update_pool and freed when rte_update()
1332  * finishes.
1333  */
1334
1335 void
1336 rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
1337 {
1338   struct proto *p = c->proto;
1339   struct proto_stats *stats = &c->stats;
1340   struct filter *filter = c->in_filter;
1341   rte *dummy = NULL;
1342   net *nn;
1343
1344   ASSERT(c->channel_state == CS_UP);
1345
1346   rte_update_lock();
1347   if (new)
1348     {
1349       nn = net_get(c->table, n);
1350
1351       new->net = nn;
1352       new->sender = c;
1353
1354       if (!new->pref)
1355         new->pref = c->preference;
1356
1357       stats->imp_updates_received++;
1358       if (!rte_validate(new))
1359         {
1360           rte_trace_in(D_FILTERS, p, new, "invalid");
1361           stats->imp_updates_invalid++;
1362           goto drop;
1363         }
1364
1365       if (filter == FILTER_REJECT)
1366         {
1367           stats->imp_updates_filtered++;
1368           rte_trace_in(D_FILTERS, p, new, "filtered out");
1369
1370           if (! c->in_keep_filtered)
1371             goto drop;
1372
1373           /* new is a private copy, i could modify it */
1374           new->flags |= REF_FILTERED;
1375         }
1376       else
1377         {
1378           rte_make_tmp_attrs(&new, rte_update_pool);
1379           if (filter && (filter != FILTER_REJECT))
1380             {
1381               ea_list *oldea = new->attrs->eattrs;
1382               int fr = f_run(filter, &new, rte_update_pool, 0);
1383               if (fr > F_ACCEPT)
1384                 {
1385                   stats->imp_updates_filtered++;
1386                   rte_trace_in(D_FILTERS, p, new, "filtered out");
1387
1388                   if (! c->in_keep_filtered)
1389                     goto drop;
1390
1391                   new->flags |= REF_FILTERED;
1392                 }
1393               if (new->attrs->eattrs != oldea && src->proto->store_tmp_attrs)
1394                 src->proto->store_tmp_attrs(new);
1395             }
1396         }
1397       if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1398         new->attrs = rta_lookup(new->attrs);
1399       new->flags |= REF_COW;
1400     }
1401   else
1402     {
1403       stats->imp_withdraws_received++;
1404
1405       if (!(nn = net_find(c->table, n)) || !src)
1406         {
1407           stats->imp_withdraws_ignored++;
1408           rte_update_unlock();
1409           return;
1410         }
1411     }
1412
1413  recalc:
1414   rte_hide_dummy_routes(nn, &dummy);
1415   rte_recalculate(c, nn, new, src);
1416   rte_unhide_dummy_routes(nn, &dummy);
1417   rte_update_unlock();
1418   return;
1419
1420  drop:
1421   rte_free(new);
1422   new = NULL;
1423   goto recalc;
1424 }
1425
1426 /* Independent call to rte_announce(), used from next hop
1427    recalculation, outside of rte_update(). new must be non-NULL */
1428 static inline void
1429 rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
1430                rte *new_best, rte *old_best)
1431 {
1432   rte_update_lock();
1433   rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
1434   rte_update_unlock();
1435 }
1436
1437 static inline void
1438 rte_discard(rte *old)   /* Non-filtered route deletion, used during garbage collection */
1439 {
1440   rte_update_lock();
1441   rte_recalculate(old->sender, old->net, NULL, old->attrs->src);
1442   rte_update_unlock();
1443 }
1444
1445 /* Modify existing route by protocol hook, used for long-lived graceful restart */
1446 static inline void
1447 rte_modify(rte *old)
1448 {
1449   rte_update_lock();
1450
1451   rte *new = old->sender->proto->rte_modify(old, rte_update_pool);
1452   if (new != old)
1453   {
1454     if (new)
1455     {
1456       if (!rta_is_cached(new->attrs))
1457         new->attrs = rta_lookup(new->attrs);
1458       new->flags = (old->flags & ~REF_MODIFY) | REF_COW;
1459     }
1460
1461     rte_recalculate(old->sender, old->net, new, old->attrs->src);
1462   }
1463
1464   rte_update_unlock();
1465 }
1466
1467 /* Check rtable for best route to given net whether it would be exported do p */
1468 int
1469 rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter)
1470 {
1471   net *n = net_find(t, a);
1472   rte *rt = n ? n->routes : NULL;
1473
1474   if (!rte_is_valid(rt))
1475     return 0;
1476
1477   rte_update_lock();
1478
1479   /* Rest is stripped down export_filter() */
1480   int v = p->preexport ? p->preexport(p, &rt, rte_update_pool) : 0;
1481   if (v == RIC_PROCESS)
1482   {
1483     rte_make_tmp_attrs(&rt, rte_update_pool);
1484     v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT);
1485   }
1486
1487   /* Discard temporary rte */
1488   if (rt != n->routes)
1489     rte_free(rt);
1490
1491   rte_update_unlock();
1492
1493   return v > 0;
1494 }
1495
1496
1497 /**
1498  * rt_refresh_begin - start a refresh cycle
1499  * @t: related routing table
1500  * @c related channel
1501  *
1502  * This function starts a refresh cycle for given routing table and announce
1503  * hook. The refresh cycle is a sequence where the protocol sends all its valid
1504  * routes to the routing table (by rte_update()). After that, all protocol
1505  * routes (more precisely routes with @c as @sender) not sent during the
1506  * refresh cycle but still in the table from the past are pruned. This is
1507  * implemented by marking all related routes as stale by REF_STALE flag in
1508  * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
1509  * flag in rt_refresh_end() and then removing such routes in the prune loop.
1510  */
1511 void
1512 rt_refresh_begin(rtable *t, struct channel *c)
1513 {
1514   FIB_WALK(&t->fib, net, n)
1515     {
1516       rte *e;
1517       for (e = n->routes; e; e = e->next)
1518         if (e->sender == c)
1519           e->flags |= REF_STALE;
1520     }
1521   FIB_WALK_END;
1522 }
1523
1524 /**
1525  * rt_refresh_end - end a refresh cycle
1526  * @t: related routing table
1527  * @c: related channel
1528  *
1529  * This function ends a refresh cycle for given routing table and announce
1530  * hook. See rt_refresh_begin() for description of refresh cycles.
1531  */
1532 void
1533 rt_refresh_end(rtable *t, struct channel *c)
1534 {
1535   int prune = 0;
1536
1537   FIB_WALK(&t->fib, net, n)
1538     {
1539       rte *e;
1540       for (e = n->routes; e; e = e->next)
1541         if ((e->sender == c) && (e->flags & REF_STALE))
1542           {
1543             e->flags |= REF_DISCARD;
1544             prune = 1;
1545           }
1546     }
1547   FIB_WALK_END;
1548
1549   if (prune)
1550     rt_schedule_prune(t);
1551 }
1552
1553 void
1554 rt_modify_stale(rtable *t, struct channel *c)
1555 {
1556   int prune = 0;
1557
1558   FIB_WALK(&t->fib, net, n)
1559     {
1560       rte *e;
1561       for (e = n->routes; e; e = e->next)
1562         if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED))
1563           {
1564             e->flags |= REF_MODIFY;
1565             prune = 1;
1566           }
1567     }
1568   FIB_WALK_END;
1569
1570   if (prune)
1571     rt_schedule_prune(t);
1572 }
1573
1574 /**
1575  * rte_dump - dump a route
1576  * @e: &rte to be dumped
1577  *
1578  * This functions dumps contents of a &rte to debug output.
1579  */
1580 void
1581 rte_dump(rte *e)
1582 {
1583   net *n = e->net;
1584   debug("%-1N ", n->n.addr);
1585   debug("KF=%02x PF=%02x pref=%d ", n->n.flags, e->pflags, e->pref);
1586   rta_dump(e->attrs);
1587   if (e->attrs->src->proto->proto->dump_attrs)
1588     e->attrs->src->proto->proto->dump_attrs(e);
1589   debug("\n");
1590 }
1591
1592 /**
1593  * rt_dump - dump a routing table
1594  * @t: routing table to be dumped
1595  *
1596  * This function dumps contents of a given routing table to debug output.
1597  */
1598 void
1599 rt_dump(rtable *t)
1600 {
1601   debug("Dump of routing table <%s>\n", t->name);
1602 #ifdef DEBUGGING
1603   fib_check(&t->fib);
1604 #endif
1605   FIB_WALK(&t->fib, net, n)
1606     {
1607       rte *e;
1608       for(e=n->routes; e; e=e->next)
1609         rte_dump(e);
1610     }
1611   FIB_WALK_END;
1612   debug("\n");
1613 }
1614
1615 /**
1616  * rt_dump_all - dump all routing tables
1617  *
1618  * This function dumps contents of all routing tables to debug output.
1619  */
1620 void
1621 rt_dump_all(void)
1622 {
1623   rtable *t;
1624
1625   WALK_LIST(t, routing_tables)
1626     rt_dump(t);
1627 }
1628
1629 static inline void
1630 rt_schedule_hcu(rtable *tab)
1631 {
1632   if (tab->hcu_scheduled)
1633     return;
1634
1635   tab->hcu_scheduled = 1;
1636   ev_schedule(tab->rt_event);
1637 }
1638
1639 static inline void
1640 rt_schedule_nhu(rtable *tab)
1641 {
1642   if (tab->nhu_state == NHU_CLEAN)
1643     ev_schedule(tab->rt_event);
1644
1645   /* state change:
1646    *   NHU_CLEAN   -> NHU_SCHEDULED
1647    *   NHU_RUNNING -> NHU_DIRTY
1648    */
1649   tab->nhu_state |= NHU_SCHEDULED;
1650 }
1651
1652 void
1653 rt_schedule_prune(rtable *tab)
1654 {
1655   if (tab->prune_state == 0)
1656     ev_schedule(tab->rt_event);
1657
1658   /* state change 0->1, 2->3 */
1659   tab->prune_state |= 1;
1660 }
1661
1662
1663 static void
1664 rt_event(void *ptr)
1665 {
1666   rtable *tab = ptr;
1667
1668   rt_lock_table(tab);
1669
1670   if (tab->hcu_scheduled)
1671     rt_update_hostcache(tab);
1672
1673   if (tab->nhu_state)
1674     rt_next_hop_update(tab);
1675
1676   if (tab->prune_state)
1677     rt_prune_table(tab);
1678
1679   rt_unlock_table(tab);
1680 }
1681
1682 void
1683 rt_setup(pool *p, rtable *t, struct rtable_config *cf)
1684 {
1685   bzero(t, sizeof(*t));
1686   t->name = cf->name;
1687   t->config = cf;
1688   t->addr_type = cf->addr_type;
1689   fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL);
1690   init_list(&t->channels);
1691
1692   t->rt_event = ev_new_init(p, rt_event, t);
1693   t->gc_time = current_time();
1694 }
1695
1696 /**
1697  * rt_init - initialize routing tables
1698  *
1699  * This function is called during BIRD startup. It initializes the
1700  * routing table module.
1701  */
1702 void
1703 rt_init(void)
1704 {
1705   rta_init();
1706   rt_table_pool = rp_new(&root_pool, "Routing tables");
1707   rte_update_pool = lp_new_default(rt_table_pool);
1708   rte_slab = sl_new(rt_table_pool, sizeof(rte));
1709   init_list(&routing_tables);
1710 }
1711
1712
1713 /**
1714  * rt_prune_table - prune a routing table
1715  *
1716  * The prune loop scans routing tables and removes routes belonging to flushing
1717  * protocols, discarded routes and also stale network entries. It is called from
1718  * rt_event(). The event is rescheduled if the current iteration do not finish
1719  * the table. The pruning is directed by the prune state (@prune_state),
1720  * specifying whether the prune cycle is scheduled or running, and there
1721  * is also a persistent pruning iterator (@prune_fit).
1722  *
1723  * The prune loop is used also for channel flushing. For this purpose, the
1724  * channels to flush are marked before the iteration and notified after the
1725  * iteration.
1726  */
1727 static void
1728 rt_prune_table(rtable *tab)
1729 {
1730   struct fib_iterator *fit = &tab->prune_fit;
1731   int limit = 512;
1732
1733   struct channel *c;
1734   node *n, *x;
1735
1736   DBG("Pruning route table %s\n", tab->name);
1737 #ifdef DEBUGGING
1738   fib_check(&tab->fib);
1739 #endif
1740
1741   if (tab->prune_state == 0)
1742     return;
1743
1744   if (tab->prune_state == 1)
1745   {
1746     /* Mark channels to flush */
1747     WALK_LIST2(c, n, tab->channels, table_node)
1748       if (c->channel_state == CS_FLUSHING)
1749         c->flush_active = 1;
1750
1751     FIB_ITERATE_INIT(fit, &tab->fib);
1752     tab->prune_state = 2;
1753   }
1754
1755 again:
1756   FIB_ITERATE_START(&tab->fib, fit, net, n)
1757     {
1758       rte *e;
1759
1760     rescan:
1761       for (e=n->routes; e; e=e->next)
1762       {
1763         if (e->sender->flush_active || (e->flags & REF_DISCARD))
1764           {
1765             if (limit <= 0)
1766               {
1767                 FIB_ITERATE_PUT(fit);
1768                 ev_schedule(tab->rt_event);
1769                 return;
1770               }
1771
1772             rte_discard(e);
1773             limit--;
1774
1775             goto rescan;
1776           }
1777
1778         if (e->flags & REF_MODIFY)
1779           {
1780             if (limit <= 0)
1781               {
1782                 FIB_ITERATE_PUT(fit);
1783                 ev_schedule(tab->rt_event);
1784                 return;
1785               }
1786
1787             rte_modify(e);
1788             limit--;
1789
1790             goto rescan;
1791           }
1792       }
1793
1794       if (!n->routes)           /* Orphaned FIB entry */
1795         {
1796           FIB_ITERATE_PUT(fit);
1797           fib_delete(&tab->fib, n);
1798           goto again;
1799         }
1800     }
1801   FIB_ITERATE_END;
1802
1803 #ifdef DEBUGGING
1804   fib_check(&tab->fib);
1805 #endif
1806
1807   tab->gc_counter = 0;
1808   tab->gc_time = current_time();
1809
1810   /* state change 2->0, 3->1 */
1811   tab->prune_state &= 1;
1812
1813   if (tab->prune_state > 0)
1814     ev_schedule(tab->rt_event);
1815
1816   /* FIXME: This should be handled in a better way */
1817   rt_prune_sources();
1818
1819   /* Close flushed channels */
1820   WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node)
1821     if (c->flush_active)
1822       {
1823         c->flush_active = 0;
1824         channel_set_state(c, CS_DOWN);
1825       }
1826
1827   return;
1828 }
1829
1830 void
1831 rt_preconfig(struct config *c)
1832 {
1833   init_list(&c->tables);
1834
1835   rt_new_table(cf_get_symbol("master4"), NET_IP4);
1836   rt_new_table(cf_get_symbol("master6"), NET_IP6);
1837 }
1838
1839
1840 /*
1841  * Some functions for handing internal next hop updates
1842  * triggered by rt_schedule_nhu().
1843  */
1844
1845 static inline int
1846 rta_next_hop_outdated(rta *a)
1847 {
1848   struct hostentry *he = a->hostentry;
1849
1850   if (!he)
1851     return 0;
1852
1853   if (!he->src)
1854     return a->dest != RTD_UNREACHABLE;
1855
1856   return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
1857     (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
1858 }
1859
1860 void
1861 rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
1862 {
1863   a->hostentry = he;
1864   a->dest = he->dest;
1865   a->igp_metric = he->igp_metric;
1866
1867   if (a->dest != RTD_UNICAST)
1868   {
1869     /* No nexthop */
1870 no_nexthop:
1871     a->nh = (struct nexthop) {};
1872     if (mls)
1873     { /* Store the label stack for later changes */
1874       a->nh.labels_orig = a->nh.labels = mls->len;
1875       memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32));
1876     }
1877     return;
1878   }
1879
1880   if (((!mls) || (!mls->len)) && he->nexthop_linkable)
1881   { /* Just link the nexthop chain, no label append happens. */
1882     memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh)));
1883     return;
1884   }
1885
1886   struct nexthop *nhp = NULL, *nhr = NULL;
1887   int skip_nexthop = 0;
1888
1889   for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
1890   {
1891     if (skip_nexthop)
1892       skip_nexthop--;
1893     else
1894     {
1895       nhr = nhp;
1896       nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh));
1897     }
1898
1899     nhp->iface = nh->iface;
1900     nhp->weight = nh->weight;
1901     if (mls)
1902     {
1903       nhp->labels = nh->labels + mls->len;
1904       nhp->labels_orig = mls->len;
1905       if (nhp->labels <= MPLS_MAX_LABEL_STACK)
1906       {
1907         memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */
1908         memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */
1909       }
1910       else
1911       {
1912         log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)",
1913             nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK);
1914         skip_nexthop++;
1915         continue;
1916       }
1917     }
1918     if (ipa_nonzero(nh->gw))
1919     {
1920       nhp->gw = nh->gw;                 /* Router nexthop */
1921       nhp->flags |= (nh->flags & RNF_ONLINK);
1922     }
1923     else if (ipa_nonzero(he->link))
1924       nhp->gw = he->link;               /* Device nexthop with link-local address known */
1925     else
1926       nhp->gw = he->addr;               /* Device nexthop with link-local address unknown */
1927   }
1928
1929   if (skip_nexthop)
1930     if (nhr)
1931       nhr->next = NULL;
1932     else
1933     {
1934       a->dest = RTD_UNREACHABLE;
1935       log(L_WARN "No valid nexthop remaining, setting route unreachable");
1936       goto no_nexthop;
1937     }
1938 }
1939
1940 static inline rte *
1941 rt_next_hop_update_rte(rtable *tab UNUSED, rte *old)
1942 {
1943   rta *a = alloca(RTA_MAX_SIZE);
1944   memcpy(a, old->attrs, rta_size(old->attrs));
1945
1946   mpls_label_stack mls = { .len = a->nh.labels_orig };
1947   memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32));
1948
1949   rta_apply_hostentry(a, old->attrs->hostentry, &mls);
1950   a->aflags = 0;
1951
1952   rte *e = sl_alloc(rte_slab);
1953   memcpy(e, old, sizeof(rte));
1954   e->attrs = rta_lookup(a);
1955
1956   return e;
1957 }
1958
1959 static inline int
1960 rt_next_hop_update_net(rtable *tab, net *n)
1961 {
1962   rte **k, *e, *new, *old_best, **new_best;
1963   int count = 0;
1964   int free_old_best = 0;
1965
1966   old_best = n->routes;
1967   if (!old_best)
1968     return 0;
1969
1970   for (k = &n->routes; e = *k; k = &e->next)
1971     if (rta_next_hop_outdated(e->attrs))
1972       {
1973         new = rt_next_hop_update_rte(tab, e);
1974         *k = new;
1975
1976         rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
1977         rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
1978
1979         /* Call a pre-comparison hook */
1980         /* Not really an efficient way to compute this */
1981         if (e->attrs->src->proto->rte_recalculate)
1982           e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL);
1983
1984         if (e != old_best)
1985           rte_free_quick(e);
1986         else /* Freeing of the old best rte is postponed */
1987           free_old_best = 1;
1988
1989         e = new;
1990         count++;
1991       }
1992
1993   if (!count)
1994     return 0;
1995
1996   /* Find the new best route */
1997   new_best = NULL;
1998   for (k = &n->routes; e = *k; k = &e->next)
1999     {
2000       if (!new_best || rte_better(e, *new_best))
2001         new_best = k;
2002     }
2003
2004   /* Relink the new best route to the first position */
2005   new = *new_best;
2006   if (new != n->routes)
2007     {
2008       *new_best = new->next;
2009       new->next = n->routes;
2010       n->routes = new;
2011     }
2012
2013   /* Announce the new best route */
2014   if (new != old_best)
2015     {
2016       rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
2017       rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
2018     }
2019
2020   /* FIXME: Better announcement of merged routes */
2021   rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
2022
2023   if (free_old_best)
2024     rte_free_quick(old_best);
2025
2026   return count;
2027 }
2028
2029 static void
2030 rt_next_hop_update(rtable *tab)
2031 {
2032   struct fib_iterator *fit = &tab->nhu_fit;
2033   int max_feed = 32;
2034
2035   if (tab->nhu_state == NHU_CLEAN)
2036     return;
2037
2038   if (tab->nhu_state == NHU_SCHEDULED)
2039     {
2040       FIB_ITERATE_INIT(fit, &tab->fib);
2041       tab->nhu_state = NHU_RUNNING;
2042     }
2043
2044   FIB_ITERATE_START(&tab->fib, fit, net, n)
2045     {
2046       if (max_feed <= 0)
2047         {
2048           FIB_ITERATE_PUT(fit);
2049           ev_schedule(tab->rt_event);
2050           return;
2051         }
2052       max_feed -= rt_next_hop_update_net(tab, n);
2053     }
2054   FIB_ITERATE_END;
2055
2056   /* State change:
2057    *   NHU_DIRTY   -> NHU_SCHEDULED
2058    *   NHU_RUNNING -> NHU_CLEAN
2059    */
2060   tab->nhu_state &= 1;
2061
2062   if (tab->nhu_state != NHU_CLEAN)
2063     ev_schedule(tab->rt_event);
2064 }
2065
2066
2067 struct rtable_config *
2068 rt_new_table(struct symbol *s, uint addr_type)
2069 {
2070   /* Hack that allows to 'redefine' the master table */
2071   if ((s->class == SYM_TABLE) &&
2072       (s->def == new_config->def_tables[addr_type]) &&
2073       ((addr_type == NET_IP4) || (addr_type == NET_IP6)))
2074     return s->def;
2075
2076   struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config));
2077
2078   cf_define_symbol(s, SYM_TABLE, c);
2079   c->name = s->name;
2080   c->addr_type = addr_type;
2081   c->gc_max_ops = 1000;
2082   c->gc_min_time = 5;
2083
2084   add_tail(&new_config->tables, &c->n);
2085
2086   /* First table of each type is kept as default */
2087   if (! new_config->def_tables[addr_type])
2088     new_config->def_tables[addr_type] = c;
2089
2090   return c;
2091 }
2092
2093 /**
2094  * rt_lock_table - lock a routing table
2095  * @r: routing table to be locked
2096  *
2097  * Lock a routing table, because it's in use by a protocol,
2098  * preventing it from being freed when it gets undefined in a new
2099  * configuration.
2100  */
2101 void
2102 rt_lock_table(rtable *r)
2103 {
2104   r->use_count++;
2105 }
2106
2107 /**
2108  * rt_unlock_table - unlock a routing table
2109  * @r: routing table to be unlocked
2110  *
2111  * Unlock a routing table formerly locked by rt_lock_table(),
2112  * that is decrease its use count and delete it if it's scheduled
2113  * for deletion by configuration changes.
2114  */
2115 void
2116 rt_unlock_table(rtable *r)
2117 {
2118   if (!--r->use_count && r->deleted)
2119     {
2120       struct config *conf = r->deleted;
2121       DBG("Deleting routing table %s\n", r->name);
2122       r->config->table = NULL;
2123       if (r->hostcache)
2124         rt_free_hostcache(r);
2125       rem_node(&r->n);
2126       fib_free(&r->fib);
2127       rfree(r->rt_event);
2128       mb_free(r);
2129       config_del_obstacle(conf);
2130     }
2131 }
2132
2133 static struct rtable_config *
2134 rt_find_table_config(struct config *cf, char *name)
2135 {
2136   struct symbol *sym = cf_find_symbol(cf, name);
2137   return (sym && (sym->class == SYM_TABLE)) ? sym->def : NULL;
2138 }
2139
2140 /**
2141  * rt_commit - commit new routing table configuration
2142  * @new: new configuration
2143  * @old: original configuration or %NULL if it's boot time config
2144  *
2145  * Scan differences between @old and @new configuration and modify
2146  * the routing tables according to these changes. If @new defines a
2147  * previously unknown table, create it, if it omits a table existing
2148  * in @old, schedule it for deletion (it gets deleted when all protocols
2149  * disconnect from it by calling rt_unlock_table()), if it exists
2150  * in both configurations, leave it unchanged.
2151  */
2152 void
2153 rt_commit(struct config *new, struct config *old)
2154 {
2155   struct rtable_config *o, *r;
2156
2157   DBG("rt_commit:\n");
2158   if (old)
2159     {
2160       WALK_LIST(o, old->tables)
2161         {
2162           rtable *ot = o->table;
2163           if (!ot->deleted)
2164             {
2165               r = rt_find_table_config(new, o->name);
2166               if (r && (r->addr_type == o->addr_type) && !new->shutdown)
2167                 {
2168                   DBG("\t%s: same\n", o->name);
2169                   r->table = ot;
2170                   ot->name = r->name;
2171                   ot->config = r;
2172                   if (o->sorted != r->sorted)
2173                     log(L_WARN "Reconfiguration of rtable sorted flag not implemented");
2174                 }
2175               else
2176                 {
2177                   DBG("\t%s: deleted\n", o->name);
2178                   ot->deleted = old;
2179                   config_add_obstacle(old);
2180                   rt_lock_table(ot);
2181                   rt_unlock_table(ot);
2182                 }
2183             }
2184         }
2185     }
2186
2187   WALK_LIST(r, new->tables)
2188     if (!r->table)
2189       {
2190         rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable));
2191         DBG("\t%s: created\n", r->name);
2192         rt_setup(rt_table_pool, t, r);
2193         add_tail(&routing_tables, &t->n);
2194         r->table = t;
2195       }
2196   DBG("\tdone\n");
2197 }
2198
2199 static inline void
2200 do_feed_channel(struct channel *c, net *n, rte *e)
2201 {
2202   rte_update_lock();
2203   if (c->ra_mode == RA_ACCEPTED)
2204     rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1);
2205   else if (c->ra_mode == RA_MERGED)
2206     rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding);
2207   else /* RA_BASIC */
2208     rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding);
2209   rte_update_unlock();
2210 }
2211
2212 /**
2213  * rt_feed_channel - advertise all routes to a channel
2214  * @c: channel to be fed
2215  *
2216  * This function performs one pass of advertisement of routes to a channel that
2217  * is in the ES_FEEDING state. It is called by the protocol code as long as it
2218  * has something to do. (We avoid transferring all the routes in single pass in
2219  * order not to monopolize CPU time.)
2220  */
2221 int
2222 rt_feed_channel(struct channel *c)
2223 {
2224   struct fib_iterator *fit = &c->feed_fit;
2225   int max_feed = 256;
2226
2227   ASSERT(c->export_state == ES_FEEDING);
2228
2229   if (!c->feed_active)
2230     {
2231       FIB_ITERATE_INIT(fit, &c->table->fib);
2232       c->feed_active = 1;
2233     }
2234
2235   FIB_ITERATE_START(&c->table->fib, fit, net, n)
2236     {
2237       rte *e = n->routes;
2238       if (max_feed <= 0)
2239         {
2240           FIB_ITERATE_PUT(fit);
2241           return 0;
2242         }
2243
2244       /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */
2245
2246       if ((c->ra_mode == RA_OPTIMAL) ||
2247           (c->ra_mode == RA_ACCEPTED) ||
2248           (c->ra_mode == RA_MERGED))
2249         if (rte_is_valid(e))
2250           {
2251             /* In the meantime, the protocol may fell down */
2252             if (c->export_state != ES_FEEDING)
2253               goto done;
2254
2255             do_feed_channel(c, n, e);
2256             max_feed--;
2257           }
2258
2259       if (c->ra_mode == RA_ANY)
2260         for(e = n->routes; e; e = e->next)
2261           {
2262             /* In the meantime, the protocol may fell down */
2263             if (c->export_state != ES_FEEDING)
2264               goto done;
2265
2266             if (!rte_is_valid(e))
2267               continue;
2268
2269             do_feed_channel(c, n, e);
2270             max_feed--;
2271           }
2272     }
2273   FIB_ITERATE_END;
2274
2275 done:
2276   c->feed_active = 0;
2277   return 1;
2278 }
2279
2280 /**
2281  * rt_feed_baby_abort - abort protocol feeding
2282  * @c: channel
2283  *
2284  * This function is called by the protocol code when the protocol stops or
2285  * ceases to exist during the feeding.
2286  */
2287 void
2288 rt_feed_channel_abort(struct channel *c)
2289 {
2290   if (c->feed_active)
2291     {
2292       /* Unlink the iterator */
2293       fit_get(&c->table->fib, &c->feed_fit);
2294       c->feed_active = 0;
2295     }
2296 }
2297
2298
2299 int
2300 rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src)
2301 {
2302   struct rtable *tab = c->in_table;
2303   rte *old, **pos;
2304   net *net;
2305
2306   if (new)
2307   {
2308     net = net_get(tab, n);
2309
2310     if (!new->pref)
2311       new->pref = c->preference;
2312
2313     if (!rta_is_cached(new->attrs))
2314       new->attrs = rta_lookup(new->attrs);
2315   }
2316   else
2317   {
2318     net = net_find(tab, n);
2319
2320     if (!net)
2321       goto drop_withdraw;
2322   }
2323
2324   /* Find the old rte */
2325   for (pos = &net->routes; old = *pos; pos = &old->next)
2326     if (old->attrs->src == src)
2327     {
2328       if (new && rte_same(old, new))
2329         goto drop_update;
2330
2331       /* Remove the old rte */
2332       *pos = old->next;
2333       rte_free_quick(old);
2334       tab->rt_count--;
2335
2336       break;
2337     }
2338
2339   if (!new)
2340   {
2341     if (!old)
2342       goto drop_withdraw;
2343
2344     return 1;
2345   }
2346
2347   struct channel_limit *l = &c->rx_limit;
2348   if (l->action && !old)
2349   {
2350     if (tab->rt_count >= l->limit)
2351       channel_notify_limit(c, l, PLD_RX, tab->rt_count);
2352
2353     if (l->state == PLS_BLOCKED)
2354     {
2355       rte_trace_in(D_FILTERS, c->proto, new, "ignored [limit]");
2356       goto drop_update;
2357     }
2358   }
2359
2360   /* Insert the new rte */
2361   rte *e = rte_do_cow(new);
2362   e->flags |= REF_COW;
2363   e->net = net;
2364   e->sender = c;
2365   e->lastmod = current_time();
2366   e->next = *pos;
2367   *pos = e;
2368   tab->rt_count++;
2369   return 1;
2370
2371 drop_update:
2372   c->stats.imp_updates_received++;
2373   c->stats.imp_updates_ignored++;
2374   rte_free(new);
2375   return 0;
2376
2377 drop_withdraw:
2378   c->stats.imp_withdraws_received++;
2379   c->stats.imp_withdraws_ignored++;
2380   return 0;
2381 }
2382
2383 int
2384 rt_reload_channel(struct channel *c)
2385 {
2386   struct rtable *tab = c->in_table;
2387   struct fib_iterator *fit = &c->reload_fit;
2388   int max_feed = 64;
2389
2390   ASSERT(c->channel_state == CS_UP);
2391
2392   if (!c->reload_active)
2393   {
2394     FIB_ITERATE_INIT(fit, &tab->fib);
2395     c->reload_active = 1;
2396   }
2397
2398   FIB_ITERATE_START(&tab->fib, fit, net, n)
2399   {
2400     if (max_feed <= 0)
2401     {
2402       FIB_ITERATE_PUT(fit);
2403       return 0;
2404     }
2405
2406     for (rte *e = n->routes; e; e = e->next)
2407     {
2408       rte_update2(c, n->n.addr, rte_do_cow(e), e->attrs->src);
2409       max_feed--;
2410     }
2411   }
2412   FIB_ITERATE_END;
2413
2414   c->reload_active = 0;
2415   return 1;
2416 }
2417
2418 void
2419 rt_reload_channel_abort(struct channel *c)
2420 {
2421   if (c->reload_active)
2422   {
2423     /* Unlink the iterator */
2424     fit_get(&c->in_table->fib, &c->reload_fit);
2425     c->reload_active = 0;
2426   }
2427 }
2428
2429 void
2430 rt_prune_sync(rtable *t, int all)
2431 {
2432   FIB_WALK(&t->fib, net, n)
2433   {
2434     rte *e, **ee = &n->routes;
2435     while (e = *ee)
2436     {
2437       if (all || (e->flags & (REF_STALE | REF_DISCARD)))
2438       {
2439         *ee = e->next;
2440         rte_free_quick(e);
2441         t->rt_count--;
2442       }
2443       else
2444         ee = &e->next;
2445     }
2446   }
2447   FIB_WALK_END;
2448 }
2449
2450
2451 static inline u32
2452 hc_hash(ip_addr a, rtable *dep)
2453 {
2454   return ipa_hash(a) ^ ptr_hash(dep);
2455 }
2456
2457 static inline void
2458 hc_insert(struct hostcache *hc, struct hostentry *he)
2459 {
2460   uint k = he->hash_key >> hc->hash_shift;
2461   he->next = hc->hash_table[k];
2462   hc->hash_table[k] = he;
2463 }
2464
2465 static inline void
2466 hc_remove(struct hostcache *hc, struct hostentry *he)
2467 {
2468   struct hostentry **hep;
2469   uint k = he->hash_key >> hc->hash_shift;
2470
2471   for (hep = &hc->hash_table[k]; *hep != he; hep = &(*hep)->next);
2472   *hep = he->next;
2473 }
2474
2475 #define HC_DEF_ORDER 10
2476 #define HC_HI_MARK *4
2477 #define HC_HI_STEP 2
2478 #define HC_HI_ORDER 16                  /* Must be at most 16 */
2479 #define HC_LO_MARK /5
2480 #define HC_LO_STEP 2
2481 #define HC_LO_ORDER 10
2482
2483 static void
2484 hc_alloc_table(struct hostcache *hc, unsigned order)
2485 {
2486   uint hsize = 1 << order;
2487   hc->hash_order = order;
2488   hc->hash_shift = 32 - order;
2489   hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK);
2490   hc->hash_min = (order <= HC_LO_ORDER) ?  0U : (hsize HC_LO_MARK);
2491
2492   hc->hash_table = mb_allocz(rt_table_pool, hsize * sizeof(struct hostentry *));
2493 }
2494
2495 static void
2496 hc_resize(struct hostcache *hc, unsigned new_order)
2497 {
2498   struct hostentry **old_table = hc->hash_table;
2499   struct hostentry *he, *hen;
2500   uint old_size = 1 << hc->hash_order;
2501   uint i;
2502
2503   hc_alloc_table(hc, new_order);
2504   for (i = 0; i < old_size; i++)
2505     for (he = old_table[i]; he != NULL; he=hen)
2506       {
2507         hen = he->next;
2508         hc_insert(hc, he);
2509       }
2510   mb_free(old_table);
2511 }
2512
2513 static struct hostentry *
2514 hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsigned k)
2515 {
2516   struct hostentry *he = sl_alloc(hc->slab);
2517
2518   *he = (struct hostentry) {
2519     .addr = a,
2520     .link = ll,
2521     .tab = dep,
2522     .hash_key = k,
2523   };
2524
2525   add_tail(&hc->hostentries, &he->ln);
2526   hc_insert(hc, he);
2527
2528   hc->hash_items++;
2529   if (hc->hash_items > hc->hash_max)
2530     hc_resize(hc, hc->hash_order + HC_HI_STEP);
2531
2532   return he;
2533 }
2534
2535 static void
2536 hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
2537 {
2538   rta_free(he->src);
2539
2540   rem_node(&he->ln);
2541   hc_remove(hc, he);
2542   sl_free(hc->slab, he);
2543
2544   hc->hash_items--;
2545   if (hc->hash_items < hc->hash_min)
2546     hc_resize(hc, hc->hash_order - HC_LO_STEP);
2547 }
2548
2549 static void
2550 rt_init_hostcache(rtable *tab)
2551 {
2552   struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
2553   init_list(&hc->hostentries);
2554
2555   hc->hash_items = 0;
2556   hc_alloc_table(hc, HC_DEF_ORDER);
2557   hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry));
2558
2559   hc->lp = lp_new(rt_table_pool, LP_GOOD_SIZE(1024));
2560   hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2561
2562   tab->hostcache = hc;
2563 }
2564
2565 static void
2566 rt_free_hostcache(rtable *tab)
2567 {
2568   struct hostcache *hc = tab->hostcache;
2569
2570   node *n;
2571   WALK_LIST(n, hc->hostentries)
2572     {
2573       struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
2574       rta_free(he->src);
2575
2576       if (he->uc)
2577         log(L_ERR "Hostcache is not empty in table %s", tab->name);
2578     }
2579
2580   rfree(hc->slab);
2581   rfree(hc->lp);
2582   mb_free(hc->hash_table);
2583   mb_free(hc);
2584 }
2585
2586 static void
2587 rt_notify_hostcache(rtable *tab, net *net)
2588 {
2589   if (tab->hcu_scheduled)
2590     return;
2591
2592   if (trie_match_net(tab->hostcache->trie, net->n.addr))
2593     rt_schedule_hcu(tab);
2594 }
2595
2596 static int
2597 if_local_addr(ip_addr a, struct iface *i)
2598 {
2599   struct ifa *b;
2600
2601   WALK_LIST(b, i->addrs)
2602     if (ipa_equal(a, b->ip))
2603       return 1;
2604
2605   return 0;
2606 }
2607
2608 static u32
2609 rt_get_igp_metric(rte *rt)
2610 {
2611   eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
2612
2613   if (ea)
2614     return ea->u.data;
2615
2616   rta *a = rt->attrs;
2617
2618 #ifdef CONFIG_OSPF
2619   if ((a->source == RTS_OSPF) ||
2620       (a->source == RTS_OSPF_IA) ||
2621       (a->source == RTS_OSPF_EXT1))
2622     return rt->u.ospf.metric1;
2623 #endif
2624
2625 #ifdef CONFIG_RIP
2626   if (a->source == RTS_RIP)
2627     return rt->u.rip.metric;
2628 #endif
2629
2630   if (a->source == RTS_DEVICE)
2631     return 0;
2632
2633   return IGP_METRIC_UNKNOWN;
2634 }
2635
2636 static int
2637 rt_update_hostentry(rtable *tab, struct hostentry *he)
2638 {
2639   rta *old_src = he->src;
2640   int direct = 0;
2641   int pxlen = 0;
2642
2643   /* Reset the hostentry */
2644   he->src = NULL;
2645   he->dest = RTD_UNREACHABLE;
2646   he->nexthop_linkable = 0;
2647   he->igp_metric = 0;
2648
2649   net_addr he_addr;
2650   net_fill_ip_host(&he_addr, he->addr);
2651   net *n = net_route(tab, &he_addr);
2652   if (n)
2653     {
2654       rte *e = n->routes;
2655       rta *a = e->attrs;
2656       pxlen = n->n.addr->pxlen;
2657
2658       if (a->hostentry)
2659         {
2660           /* Recursive route should not depend on another recursive route */
2661           log(L_WARN "Next hop address %I resolvable through recursive route for %N",
2662               he->addr, n->n.addr);
2663           goto done;
2664         }
2665
2666       if (a->dest == RTD_UNICAST)
2667         {
2668           for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
2669             if (ipa_zero(nh->gw))
2670               {
2671                 if (if_local_addr(he->addr, nh->iface))
2672                   {
2673                     /* The host address is a local address, this is not valid */
2674                     log(L_WARN "Next hop address %I is a local address of iface %s",
2675                         he->addr, nh->iface->name);
2676                     goto done;
2677                   }
2678
2679                 direct++;
2680               }
2681         }
2682
2683       he->src = rta_clone(a);
2684       he->dest = a->dest;
2685       he->nexthop_linkable = !direct;
2686       he->igp_metric = rt_get_igp_metric(e);
2687     }
2688
2689 done:
2690   /* Add a prefix range to the trie */
2691   trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen);
2692
2693   rta_free(old_src);
2694   return old_src != he->src;
2695 }
2696
2697 static void
2698 rt_update_hostcache(rtable *tab)
2699 {
2700   struct hostcache *hc = tab->hostcache;
2701   struct hostentry *he;
2702   node *n, *x;
2703
2704   /* Reset the trie */
2705   lp_flush(hc->lp);
2706   hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node));
2707
2708   WALK_LIST_DELSAFE(n, x, hc->hostentries)
2709     {
2710       he = SKIP_BACK(struct hostentry, ln, n);
2711       if (!he->uc)
2712         {
2713           hc_delete_hostentry(hc, he);
2714           continue;
2715         }
2716
2717       if (rt_update_hostentry(tab, he))
2718         rt_schedule_nhu(he->tab);
2719     }
2720
2721   tab->hcu_scheduled = 0;
2722 }
2723
2724 struct hostentry *
2725 rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
2726 {
2727   struct hostentry *he;
2728
2729   if (!tab->hostcache)
2730     rt_init_hostcache(tab);
2731
2732   u32 k = hc_hash(a, dep);
2733   struct hostcache *hc = tab->hostcache;
2734   for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next)
2735     if (ipa_equal(he->addr, a) && (he->tab == dep))
2736       return he;
2737
2738   he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
2739   rt_update_hostentry(tab, he);
2740   return he;
2741 }
2742
2743
2744 /*
2745  *  Documentation for functions declared inline in route.h
2746  */
2747 #if 0
2748
2749 /**
2750  * net_find - find a network entry
2751  * @tab: a routing table
2752  * @addr: address of the network
2753  *
2754  * net_find() looks up the given network in routing table @tab and
2755  * returns a pointer to its &net entry or %NULL if no such network
2756  * exists.
2757  */
2758 static inline net *net_find(rtable *tab, net_addr *addr)
2759 { DUMMY; }
2760
2761 /**
2762  * net_get - obtain a network entry
2763  * @tab: a routing table
2764  * @addr: address of the network
2765  *
2766  * net_get() looks up the given network in routing table @tab and
2767  * returns a pointer to its &net entry. If no such entry exists, it's
2768  * created.
2769  */
2770 static inline net *net_get(rtable *tab, net_addr *addr)
2771 { DUMMY; }
2772
2773 /**
2774  * rte_cow - copy a route for writing
2775  * @r: a route entry to be copied
2776  *
2777  * rte_cow() takes a &rte and prepares it for modification. The exact action
2778  * taken depends on the flags of the &rte -- if it's a temporary entry, it's
2779  * just returned unchanged, else a new temporary entry with the same contents
2780  * is created.
2781  *
2782  * The primary use of this function is inside the filter machinery -- when
2783  * a filter wants to modify &rte contents (to change the preference or to
2784  * attach another set of attributes), it must ensure that the &rte is not
2785  * shared with anyone else (and especially that it isn't stored in any routing
2786  * table).
2787  *
2788  * Result: a pointer to the new writable &rte.
2789  */
2790 static inline rte * rte_cow(rte *r)
2791 { DUMMY; }
2792
2793 #endif