nest/rt-attr.c

   1 /*
   2  *      BIRD -- Route Attribute Cache
   3  *
   4  *      (c) 1998--2000 Martin Mares <mj@ucw.cz>
   5  *
   6  *      Can be freely distributed and used under the terms of the GNU GPL.
   7  */
   8
   9 /**
  10  * DOC: Route attribute cache
  11  *
  12  * Each route entry carries a set of route attributes. Several of them
  13  * vary from route to route, but most attributes are usually common
  14  * for a large number of routes. To conserve memory, we've decided to
  15  * store only the varying ones directly in the &rte and hold the rest
  16  * in a special structure called &rta which is shared among all the
  17  * &rte's with these attributes.
  18  *
  19  * Each &rta contains all the static attributes of the route (i.e.,
  20  * those which are always present) as structure members and a list of
  21  * dynamic attributes represented by a linked list of &ea_list
  22  * structures, each of them consisting of an array of &eattr's containing
  23  * the individual attributes. An attribute can be specified more than once
  24  * in the &ea_list chain and in such case the first occurrence overrides
  25  * the others. This semantics is used especially when someone (for example
  26  * a filter) wishes to alter values of several dynamic attributes, but
  27  * it wants to preserve the original attribute lists maintained by
  28  * another module.
  29  *
  30  * Each &eattr contains an attribute identifier (split to protocol ID and
  31  * per-protocol attribute ID), protocol dependent flags, a type code (consisting
  32  * of several bit fields describing attribute characteristics) and either an
  33  * embedded 32-bit value or a pointer to a &adata structure holding attribute
  34  * contents.
  35  *
  36  * There exist two variants of &rta's -- cached and un-cached ones. Un-cached
  37  * &rta's can have arbitrarily complex structure of &ea_list's and they
  38  * can be modified by any module in the route processing chain. Cached
  39  * &rta's have their attribute lists normalized (that means at most one
  40  * &ea_list is present and its values are sorted in order to speed up
  41  * searching), they are stored in a hash table to make fast lookup possible
  42  * and they are provided with a use count to allow sharing.
  43  *
  44  * Routing tables always contain only cached &rta's.
  45  */
  46
  47 #include "nest/bird.h"
  48 #include "nest/route.h"
  49 #include "nest/protocol.h"
  50 #include "nest/iface.h"
  51 #include "nest/cli.h"
  52 #include "nest/attrs.h"
  53 #include "lib/alloca.h"
  54 #include "lib/hash.h"
  55 #include "lib/idm.h"
  56 #include "lib/resource.h"
  57 #include "lib/string.h"
  58
  59 #include <stddef.h>
  60
  61 const char * rta_dest_names[RTD_MAX] = {
  62   [RTD_NONE]            = "",
  63   [RTD_UNICAST]         = "unicast",
  64   [RTD_BLACKHOLE]       = "blackhole",
  65   [RTD_UNREACHABLE]     = "unreachable",
  66   [RTD_PROHIBIT]        = "prohibited",
  67 };
  68
  69 pool *rta_pool;
  70
  71 static slab *rta_slab_[4];
  72 static slab *nexthop_slab_[4];
  73 static slab *rte_src_slab;
  74
  75 static struct idm src_ids;
  76 #define SRC_ID_INIT_SIZE 4
  77
  78 /* rte source hash */
  79
  80 #define RSH_KEY(n)              n->proto, n->private_id
  81 #define RSH_NEXT(n)             n->next
  82 #define RSH_EQ(p1,n1,p2,n2)     p1 == p2 && n1 == n2
  83 #define RSH_FN(p,n)             p->hash_key ^ u32_hash(n)
  84
  85 #define RSH_REHASH              rte_src_rehash
  86 #define RSH_PARAMS              /2, *2, 1, 1, 8, 20
  87 #define RSH_INIT_ORDER          6
  88
  89 static HASH(struct rte_src) src_hash;
  90
  91 struct protocol *attr_class_to_protocol[EAP_MAX];
  92
  93
  94 static void
  95 rte_src_init(void)
  96 {
  97   rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src));
  98
  99   idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE);
 100
 101   HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER);
 102 }
 103
 104
 105 HASH_DEFINE_REHASH_FN(RSH, struct rte_src)
 106
 107 struct rte_src *
 108 rt_find_source(struct proto *p, u32 id)
 109 {
 110   return HASH_FIND(src_hash, RSH, p, id);
 111 }
 112
 113 struct rte_src *
 114 rt_get_source(struct proto *p, u32 id)
 115 {
 116   struct rte_src *src = rt_find_source(p, id);
 117
 118   if (src)
 119     return src;
 120
 121   src = sl_alloc(rte_src_slab);
 122   src->proto = p;
 123   src->private_id = id;
 124   src->global_id = idm_alloc(&src_ids);
 125   src->uc = 0;
 126
 127   HASH_INSERT2(src_hash, RSH, rta_pool, src);
 128
 129   return src;
 130 }
 131
 132 void
 133 rt_prune_sources(void)
 134 {
 135   HASH_WALK_FILTER(src_hash, next, src, sp)
 136   {
 137     if (src->uc == 0)
 138     {
 139       HASH_DO_REMOVE(src_hash, RSH, sp);
 140       idm_free(&src_ids, src->global_id);
 141       sl_free(rte_src_slab, src);
 142     }
 143   }
 144   HASH_WALK_FILTER_END;
 145
 146   HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool);
 147 }
 148
 149
 150 /*
 151  *      Multipath Next Hop
 152  */
 153
 154 static inline u32
 155 nexthop_hash(struct nexthop *x)
 156 {
 157   u32 h = 0;
 158   for (; x; x = x->next)
 159   {
 160     h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9);
 161
 162     for (int i = 0; i < x->labels; i++)
 163       h ^= x->label[i] ^ (h << 6) ^ (h >> 7);
 164   }
 165
 166   return h;
 167 }
 168
 169 int
 170 nexthop__same(struct nexthop *x, struct nexthop *y)
 171 {
 172   for (; x && y; x = x->next, y = y->next)
 173   {
 174     if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight) || (x->labels != y->labels))
 175       return 0;
 176
 177     for (int i = 0; i < x->labels; i++)
 178       if (x->label[i] != y->label[i])
 179         return 0;
 180   }
 181
 182   return x == y;
 183 }
 184
 185 static int
 186 nexthop_compare_node(struct nexthop *x, struct nexthop *y)
 187 {
 188   int r;
 189
 190   if (!x)
 191     return 1;
 192
 193   if (!y)
 194     return -1;
 195
 196   r = ((int) y->weight) - ((int) x->weight);
 197   if (r)
 198     return r;
 199
 200   r = ipa_compare(x->gw, y->gw);
 201   if (r)
 202     return r;
 203
 204   r = ((int) y->labels) - ((int) x->labels);
 205   if (r)
 206     return r;
 207
 208   for (int i = 0; i < y->labels; i++)
 209   {
 210     r = ((int) y->label[i]) - ((int) x->label[i]);
 211     if (r)
 212       return r;
 213   }
 214
 215   return ((int) x->iface->index) - ((int) y->iface->index);
 216 }
 217
 218 static inline struct nexthop *
 219 nexthop_copy_node(const struct nexthop *src, linpool *lp)
 220 {
 221   struct nexthop *n = lp_alloc(lp, nexthop_size(src));
 222
 223   memcpy(n, src, nexthop_size(src));
 224   n->next = NULL;
 225
 226   return n;
 227 }
 228
 229 /**
 230  * nexthop_merge - merge nexthop lists
 231  * @x: list 1
 232  * @y: list 2
 233  * @rx: reusability of list @x
 234  * @ry: reusability of list @y
 235  * @max: max number of nexthops
 236  * @lp: linpool for allocating nexthops
 237  *
 238  * The nexthop_merge() function takes two nexthop lists @x and @y and merges them,
 239  * eliminating possible duplicates. The input lists must be sorted and the
 240  * result is sorted too. The number of nexthops in result is limited by @max.
 241  * New nodes are allocated from linpool @lp.
 242  *
 243  * The arguments @rx and @ry specify whether corresponding input lists may be
 244  * consumed by the function (i.e. their nodes reused in the resulting list), in
 245  * that case the caller should not access these lists after that. To eliminate
 246  * issues with deallocation of these lists, the caller should use some form of
 247  * bulk deallocation (e.g. stack or linpool) to free these nodes when the
 248  * resulting list is no longer needed. When reusability is not set, the
 249  * corresponding lists are not modified nor linked from the resulting list.
 250  */
 251 struct nexthop *
 252 nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp)
 253 {
 254   struct nexthop *root = NULL;
 255   struct nexthop **n = &root;
 256
 257   while ((x || y) && max--)
 258   {
 259     int cmp = nexthop_compare_node(x, y);
 260     if (cmp < 0)
 261     {
 262       *n = rx ? x : nexthop_copy_node(x, lp);
 263       x = x->next;
 264     }
 265     else if (cmp > 0)
 266     {
 267       *n = ry ? y : nexthop_copy_node(y, lp);
 268       y = y->next;
 269     }
 270     else
 271     {
 272       *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp));
 273       x = x->next;
 274       y = y->next;
 275     }
 276     n = &((*n)->next);
 277   }
 278   *n = NULL;
 279
 280   return root;
 281 }
 282
 283 void
 284 nexthop_insert(struct nexthop **n, struct nexthop *x)
 285 {
 286   for (; *n; n = &((*n)->next))
 287   {
 288     int cmp = nexthop_compare_node(*n, x);
 289
 290     if (cmp < 0)
 291       continue;
 292     else if (cmp > 0)
 293       break;
 294     else
 295       return;
 296   }
 297
 298   x->next = *n;
 299   *n = x;
 300 }
 301
 302 int
 303 nexthop_is_sorted(struct nexthop *x)
 304 {
 305   for (; x && x->next; x = x->next)
 306     if (nexthop_compare_node(x, x->next) >= 0)
 307       return 0;
 308
 309   return 1;
 310 }
 311
 312 static inline slab *
 313 nexthop_slab(struct nexthop *nh)
 314 {
 315   return nexthop_slab_[MIN(nh->labels, 3)];
 316 }
 317
 318 static struct nexthop *
 319 nexthop_copy(struct nexthop *o)
 320 {
 321   struct nexthop *first = NULL;
 322   struct nexthop **last = &first;
 323
 324   for (; o; o = o->next)
 325     {
 326       struct nexthop *n = sl_alloc(nexthop_slab(o));
 327       n->gw = o->gw;
 328       n->iface = o->iface;
 329       n->next = NULL;
 330       n->weight = o->weight;
 331       n->labels = o->labels;
 332       for (int i=0; i<o->labels; i++)
 333         n->label[i] = o->label[i];
 334
 335       *last = n;
 336       last = &(n->next);
 337     }
 338
 339   return first;
 340 }
 341
 342 static void
 343 nexthop_free(struct nexthop *o)
 344 {
 345   struct nexthop *n;
 346
 347   while (o)
 348     {
 349       n = o->next;
 350       sl_free(nexthop_slab(o), o);
 351       o = n;
 352     }
 353 }
 354
 355
 356 /*
 357  *      Extended Attributes
 358  */
 359
 360 static inline eattr *
 361 ea__find(ea_list *e, unsigned id)
 362 {
 363   eattr *a;
 364   int l, r, m;
 365
 366   while (e)
 367     {
 368       if (e->flags & EALF_BISECT)
 369         {
 370           l = 0;
 371           r = e->count - 1;
 372           while (l <= r)
 373             {
 374               m = (l+r) / 2;
 375               a = &e->attrs[m];
 376               if (a->id == id)
 377                 return a;
 378               else if (a->id < id)
 379                 l = m+1;
 380               else
 381                 r = m-1;
 382             }
 383         }
 384       else
 385         for(m=0; m<e->count; m++)
 386           if (e->attrs[m].id == id)
 387             return &e->attrs[m];
 388       e = e->next;
 389     }
 390   return NULL;
 391 }
 392
 393 /**
 394  * ea_find - find an extended attribute
 395  * @e: attribute list to search in
 396  * @id: attribute ID to search for
 397  *
 398  * Given an extended attribute list, ea_find() searches for a first
 399  * occurrence of an attribute with specified ID, returning either a pointer
 400  * to its &eattr structure or %NULL if no such attribute exists.
 401  */
 402 eattr *
 403 ea_find(ea_list *e, unsigned id)
 404 {
 405   eattr *a = ea__find(e, id & EA_CODE_MASK);
 406
 407   if (a && (a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF &&
 408       !(id & EA_ALLOW_UNDEF))
 409     return NULL;
 410   return a;
 411 }
 412
 413 /**
 414  * ea_walk - walk through extended attributes
 415  * @s: walk state structure
 416  * @id: start of attribute ID interval
 417  * @max: length of attribute ID interval
 418  *
 419  * Given an extended attribute list, ea_walk() walks through the list looking
 420  * for first occurrences of attributes with ID in specified interval from @id to
 421  * (@id + @max - 1), returning pointers to found &eattr structures, storing its
 422  * walk state in @s for subsequent calls.
 423  *
 424  * The function ea_walk() is supposed to be called in a loop, with initially
 425  * zeroed walk state structure @s with filled the initial extended attribute
 426  * list, returning one found attribute in each call or %NULL when no other
 427  * attribute exists. The extended attribute list or the arguments should not be
 428  * modified between calls. The maximum value of @max is 128.
 429  */
 430 eattr *
 431 ea_walk(struct ea_walk_state *s, uint id, uint max)
 432 {
 433   ea_list *e = s->eattrs;
 434   eattr *a = s->ea;
 435   eattr *a_max;
 436
 437   max = id + max;
 438
 439   if (a)
 440     goto step;
 441
 442   for (; e; e = e->next)
 443   {
 444     if (e->flags & EALF_BISECT)
 445     {
 446       int l, r, m;
 447
 448       l = 0;
 449       r = e->count - 1;
 450       while (l < r)
 451       {
 452         m = (l+r) / 2;
 453         if (e->attrs[m].id < id)
 454           l = m + 1;
 455         else
 456           r = m;
 457       }
 458       a = e->attrs + l;
 459     }
 460     else
 461       a = e->attrs;
 462
 463   step:
 464     a_max = e->attrs + e->count;
 465     for (; a < a_max; a++)
 466       if ((a->id >= id) && (a->id < max))
 467       {
 468         int n = a->id - id;
 469
 470         if (BIT32_TEST(s->visited, n))
 471           continue;
 472
 473         BIT32_SET(s->visited, n);
 474
 475         if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
 476           continue;
 477
 478         s->eattrs = e;
 479         s->ea = a;
 480         return a;
 481       }
 482       else if (e->flags & EALF_BISECT)
 483         break;
 484   }
 485
 486   return NULL;
 487 }
 488
 489 /**
 490  * ea_get_int - fetch an integer attribute
 491  * @e: attribute list
 492  * @id: attribute ID
 493  * @def: default value
 494  *
 495  * This function is a shortcut for retrieving a value of an integer attribute
 496  * by calling ea_find() to find the attribute, extracting its value or returning
 497  * a provided default if no such attribute is present.
 498  */
 499 int
 500 ea_get_int(ea_list *e, unsigned id, int def)
 501 {
 502   eattr *a = ea_find(e, id);
 503   if (!a)
 504     return def;
 505   return a->u.data;
 506 }
 507
 508 static inline void
 509 ea_do_sort(ea_list *e)
 510 {
 511   unsigned n = e->count;
 512   eattr *a = e->attrs;
 513   eattr *b = alloca(n * sizeof(eattr));
 514   unsigned s, ss;
 515
 516   /* We need to use a stable sorting algorithm, hence mergesort */
 517   do
 518     {
 519       s = ss = 0;
 520       while (s < n)
 521         {
 522           eattr *p, *q, *lo, *hi;
 523           p = b;
 524           ss = s;
 525           *p++ = a[s++];
 526           while (s < n && p[-1].id <= a[s].id)
 527             *p++ = a[s++];
 528           if (s < n)
 529             {
 530               q = p;
 531               *p++ = a[s++];
 532               while (s < n && p[-1].id <= a[s].id)
 533                 *p++ = a[s++];
 534               lo = b;
 535               hi = q;
 536               s = ss;
 537               while (lo < q && hi < p)
 538                 if (lo->id <= hi->id)
 539                   a[s++] = *lo++;
 540                 else
 541                   a[s++] = *hi++;
 542               while (lo < q)
 543                 a[s++] = *lo++;
 544               while (hi < p)
 545                 a[s++] = *hi++;
 546             }
 547         }
 548     }
 549   while (ss);
 550 }
 551
 552 static inline void
 553 ea_do_prune(ea_list *e)
 554 {
 555   eattr *s, *d, *l, *s0;
 556   int i = 0;
 557
 558   /* Discard duplicates and undefs. Do you remember sorting was stable? */
 559   s = d = e->attrs;
 560   l = e->attrs + e->count;
 561   while (s < l)
 562     {
 563       s0 = s++;
 564       while (s < l && s->id == s[-1].id)
 565         s++;
 566       /* s0 is the most recent version, s[-1] the oldest one */
 567       if ((s0->type & EAF_TYPE_MASK) != EAF_TYPE_UNDEF)
 568         {
 569           *d = *s0;
 570           d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED);
 571           d++;
 572           i++;
 573         }
 574     }
 575   e->count = i;
 576 }
 577
 578 /**
 579  * ea_sort - sort an attribute list
 580  * @e: list to be sorted
 581  *
 582  * This function takes a &ea_list chain and sorts the attributes
 583  * within each of its entries.
 584  *
 585  * If an attribute occurs multiple times in a single &ea_list,
 586  * ea_sort() leaves only the first (the only significant) occurrence.
 587  */
 588 void
 589 ea_sort(ea_list *e)
 590 {
 591   while (e)
 592     {
 593       if (!(e->flags & EALF_SORTED))
 594         {
 595           ea_do_sort(e);
 596           ea_do_prune(e);
 597           e->flags |= EALF_SORTED;
 598         }
 599       if (e->count > 5)
 600         e->flags |= EALF_BISECT;
 601       e = e->next;
 602     }
 603 }
 604
 605 /**
 606  * ea_scan - estimate attribute list size
 607  * @e: attribute list
 608  *
 609  * This function calculates an upper bound of the size of
 610  * a given &ea_list after merging with ea_merge().
 611  */
 612 unsigned
 613 ea_scan(ea_list *e)
 614 {
 615   unsigned cnt = 0;
 616
 617   while (e)
 618     {
 619       cnt += e->count;
 620       e = e->next;
 621     }
 622   return sizeof(ea_list) + sizeof(eattr)*cnt;
 623 }
 624
 625 /**
 626  * ea_merge - merge segments of an attribute list
 627  * @e: attribute list
 628  * @t: buffer to store the result to
 629  *
 630  * This function takes a possibly multi-segment attribute list
 631  * and merges all of its segments to one.
 632  *
 633  * The primary use of this function is for &ea_list normalization:
 634  * first call ea_scan() to determine how much memory will the result
 635  * take, then allocate a buffer (usually using alloca()), merge the
 636  * segments with ea_merge() and finally sort and prune the result
 637  * by calling ea_sort().
 638  */
 639 void
 640 ea_merge(ea_list *e, ea_list *t)
 641 {
 642   eattr *d = t->attrs;
 643
 644   t->flags = 0;
 645   t->count = 0;
 646   t->next = NULL;
 647   while (e)
 648     {
 649       memcpy(d, e->attrs, sizeof(eattr)*e->count);
 650       t->count += e->count;
 651       d += e->count;
 652       e = e->next;
 653     }
 654 }
 655
 656 /**
 657  * ea_same - compare two &ea_list's
 658  * @x: attribute list
 659  * @y: attribute list
 660  *
 661  * ea_same() compares two normalized attribute lists @x and @y and returns
 662  * 1 if they contain the same attributes, 0 otherwise.
 663  */
 664 int
 665 ea_same(ea_list *x, ea_list *y)
 666 {
 667   int c;
 668
 669   if (!x || !y)
 670     return x == y;
 671   ASSERT(!x->next && !y->next);
 672   if (x->count != y->count)
 673     return 0;
 674   for(c=0; c<x->count; c++)
 675     {
 676       eattr *a = &x->attrs[c];
 677       eattr *b = &y->attrs[c];
 678
 679       if (a->id != b->id ||
 680           a->flags != b->flags ||
 681           a->type != b->type ||
 682           ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr)))
 683         return 0;
 684     }
 685   return 1;
 686 }
 687
 688 static inline ea_list *
 689 ea_list_copy(ea_list *o)
 690 {
 691   ea_list *n;
 692   unsigned i, len;
 693
 694   if (!o)
 695     return NULL;
 696   ASSERT(!o->next);
 697   len = sizeof(ea_list) + sizeof(eattr) * o->count;
 698   n = mb_alloc(rta_pool, len);
 699   memcpy(n, o, len);
 700   n->flags |= EALF_CACHED;
 701   for(i=0; i<o->count; i++)
 702     {
 703       eattr *a = &n->attrs[i];
 704       if (!(a->type & EAF_EMBEDDED))
 705         {
 706           unsigned size = sizeof(struct adata) + a->u.ptr->length;
 707           struct adata *d = mb_alloc(rta_pool, size);
 708           memcpy(d, a->u.ptr, size);
 709           a->u.ptr = d;
 710         }
 711     }
 712   return n;
 713 }
 714
 715 static inline void
 716 ea_free(ea_list *o)
 717 {
 718   int i;
 719
 720   if (o)
 721     {
 722       ASSERT(!o->next);
 723       for(i=0; i<o->count; i++)
 724         {
 725           eattr *a = &o->attrs[i];
 726           if (!(a->type & EAF_EMBEDDED))
 727             mb_free(a->u.ptr);
 728         }
 729       mb_free(o);
 730     }
 731 }
 732
 733 static int
 734 get_generic_attr(eattr *a, byte **buf, int buflen UNUSED)
 735 {
 736   if (a->id == EA_GEN_IGP_METRIC)
 737     {
 738       *buf += bsprintf(*buf, "igp_metric");
 739       return GA_NAME;
 740     }
 741
 742   return GA_UNKNOWN;
 743 }
 744
 745 void
 746 ea_format_bitfield(struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max)
 747 {
 748   byte *bound = buf + bufsize - 32;
 749   u32 data = a->u.data;
 750   int i;
 751
 752   for (i = min; i < max; i++)
 753     if ((data & (1u << i)) && names[i])
 754     {
 755       if (buf > bound)
 756       {
 757         strcpy(buf, " ...");
 758         return;
 759       }
 760
 761       buf += bsprintf(buf, " %s", names[i]);
 762       data &= ~(1u << i);
 763     }
 764
 765   if (data)
 766     bsprintf(buf, " %08x", data);
 767
 768   return;
 769 }
 770
 771 static inline void
 772 opaque_format(struct adata *ad, byte *buf, uint size)
 773 {
 774   byte *bound = buf + size - 10;
 775   uint i;
 776
 777   for(i = 0; i < ad->length; i++)
 778     {
 779       if (buf > bound)
 780         {
 781           strcpy(buf, " ...");
 782           return;
 783         }
 784       if (i)
 785         *buf++ = ' ';
 786
 787       buf += bsprintf(buf, "%02x", ad->data[i]);
 788     }
 789
 790   *buf = 0;
 791   return;
 792 }
 793
 794 static inline void
 795 ea_show_int_set(struct cli *c, struct adata *ad, int way, byte *pos, byte *buf, byte *end)
 796 {
 797   int i = int_set_format(ad, way, 0, pos, end - pos);
 798   cli_printf(c, -1012, "\t%s", buf);
 799   while (i)
 800     {
 801       i = int_set_format(ad, way, i, buf, end - buf - 1);
 802       cli_printf(c, -1012, "\t\t%s", buf);
 803     }
 804 }
 805
 806 static inline void
 807 ea_show_ec_set(struct cli *c, struct adata *ad, byte *pos, byte *buf, byte *end)
 808 {
 809   int i = ec_set_format(ad, 0, pos, end - pos);
 810   cli_printf(c, -1012, "\t%s", buf);
 811   while (i)
 812     {
 813       i = ec_set_format(ad, i, buf, end - buf - 1);
 814       cli_printf(c, -1012, "\t\t%s", buf);
 815     }
 816 }
 817
 818 static inline void
 819 ea_show_lc_set(struct cli *c, struct adata *ad, byte *pos, byte *buf, byte *end)
 820 {
 821   int i = lc_set_format(ad, 0, pos, end - pos);
 822   cli_printf(c, -1012, "\t%s", buf);
 823   while (i)
 824     {
 825       i = lc_set_format(ad, i, buf, end - buf - 1);
 826       cli_printf(c, -1012, "\t\t%s", buf);
 827     }
 828 }
 829
 830 /**
 831  * ea_show - print an &eattr to CLI
 832  * @c: destination CLI
 833  * @e: attribute to be printed
 834  *
 835  * This function takes an extended attribute represented by its &eattr
 836  * structure and prints it to the CLI according to the type information.
 837  *
 838  * If the protocol defining the attribute provides its own
 839  * get_attr() hook, it's consulted first.
 840  */
 841 void
 842 ea_show(struct cli *c, eattr *e)
 843 {
 844   struct protocol *p;
 845   int status = GA_UNKNOWN;
 846   struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr;
 847   byte buf[CLI_MSG_SIZE];
 848   byte *pos = buf, *end = buf + sizeof(buf);
 849
 850   if (p = attr_class_to_protocol[EA_PROTO(e->id)])
 851     {
 852       pos += bsprintf(pos, "%s.", p->name);
 853       if (p->get_attr)
 854         status = p->get_attr(e, pos, end - pos);
 855       pos += strlen(pos);
 856     }
 857   else if (EA_PROTO(e->id))
 858     pos += bsprintf(pos, "%02x.", EA_PROTO(e->id));
 859   else
 860     status = get_generic_attr(e, &pos, end - pos);
 861
 862   if (status < GA_NAME)
 863     pos += bsprintf(pos, "%02x", EA_ID(e->id));
 864   if (status < GA_FULL)
 865     {
 866       *pos++ = ':';
 867       *pos++ = ' ';
 868       switch (e->type & EAF_TYPE_MASK)
 869         {
 870         case EAF_TYPE_INT:
 871           bsprintf(pos, "%u", e->u.data);
 872           break;
 873         case EAF_TYPE_OPAQUE:
 874           opaque_format(ad, pos, end - pos);
 875           break;
 876         case EAF_TYPE_IP_ADDRESS:
 877           bsprintf(pos, "%I", *(ip_addr *) ad->data);
 878           break;
 879         case EAF_TYPE_ROUTER_ID:
 880           bsprintf(pos, "%R", e->u.data);
 881           break;
 882         case EAF_TYPE_AS_PATH:
 883           as_path_format(ad, pos, end - pos);
 884           break;
 885         case EAF_TYPE_BITFIELD:
 886           bsprintf(pos, "%08x", e->u.data);
 887           break;
 888         case EAF_TYPE_INT_SET:
 889           ea_show_int_set(c, ad, 1, pos, buf, end);
 890           return;
 891         case EAF_TYPE_EC_SET:
 892           ea_show_ec_set(c, ad, pos, buf, end);
 893           return;
 894         case EAF_TYPE_LC_SET:
 895           ea_show_lc_set(c, ad, pos, buf, end);
 896           return;
 897         case EAF_TYPE_UNDEF:
 898         default:
 899           bsprintf(pos, "<type %02x>", e->type);
 900         }
 901     }
 902   cli_printf(c, -1012, "\t%s", buf);
 903 }
 904
 905 /**
 906  * ea_dump - dump an extended attribute
 907  * @e: attribute to be dumped
 908  *
 909  * ea_dump() dumps contents of the extended attribute given to
 910  * the debug output.
 911  */
 912 void
 913 ea_dump(ea_list *e)
 914 {
 915   int i;
 916
 917   if (!e)
 918     {
 919       debug("NONE");
 920       return;
 921     }
 922   while (e)
 923     {
 924       debug("[%c%c%c]",
 925             (e->flags & EALF_SORTED) ? 'S' : 's',
 926             (e->flags & EALF_BISECT) ? 'B' : 'b',
 927             (e->flags & EALF_CACHED) ? 'C' : 'c');
 928       for(i=0; i<e->count; i++)
 929         {
 930           eattr *a = &e->attrs[i];
 931           debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags);
 932           if (a->type & EAF_TEMP)
 933             debug("T");
 934           debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]);
 935           if (a->type & EAF_ORIGINATED)
 936             debug("o");
 937           if (a->type & EAF_EMBEDDED)
 938             debug(":%08x", a->u.data);
 939           else
 940             {
 941               int j, len = a->u.ptr->length;
 942               debug("[%d]:", len);
 943               for(j=0; j<len; j++)
 944                 debug("%02x", a->u.ptr->data[j]);
 945             }
 946         }
 947       if (e = e->next)
 948         debug(" | ");
 949     }
 950 }
 951
 952 /**
 953  * ea_hash - calculate an &ea_list hash key
 954  * @e: attribute list
 955  *
 956  * ea_hash() takes an extended attribute list and calculated a hopefully
 957  * uniformly distributed hash value from its contents.
 958  */
 959 inline uint
 960 ea_hash(ea_list *e)
 961 {
 962   const u64 mul = 0x68576150f3d6847;
 963   u64 h = 0xafcef24eda8b29;
 964   int i;
 965
 966   if (e)                        /* Assuming chain of length 1 */
 967     {
 968       for(i=0; i<e->count; i++)
 969         {
 970           struct eattr *a = &e->attrs[i];
 971           h ^= a->id; h *= mul;
 972           if (a->type & EAF_EMBEDDED)
 973             h ^= a->u.data;
 974           else
 975             {
 976               struct adata *d = a->u.ptr;
 977               h ^= mem_hash(d->data, d->length);
 978             }
 979           h *= mul;
 980         }
 981     }
 982   return (h >> 32) ^ (h & 0xffffffff);
 983 }
 984
 985 /**
 986  * ea_append - concatenate &ea_list's
 987  * @to: destination list (can be %NULL)
 988  * @what: list to be appended (can be %NULL)
 989  *
 990  * This function appends the &ea_list @what at the end of
 991  * &ea_list @to and returns a pointer to the resulting list.
 992  */
 993 ea_list *
 994 ea_append(ea_list *to, ea_list *what)
 995 {
 996   ea_list *res;
 997
 998   if (!to)
 999     return what;
1000   res = to;
1001   while (to->next)
1002     to = to->next;
1003   to->next = what;
1004   return res;
1005 }
1006
1007 /*
1008  *      rta's
1009  */
1010
1011 static uint rta_cache_count;
1012 static uint rta_cache_size = 32;
1013 static uint rta_cache_limit;
1014 static uint rta_cache_mask;
1015 static rta **rta_hash_table;
1016
1017 static void
1018 rta_alloc_hash(void)
1019 {
1020   rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size);
1021   if (rta_cache_size < 32768)
1022     rta_cache_limit = rta_cache_size * 2;
1023   else
1024     rta_cache_limit = ~0;
1025   rta_cache_mask = rta_cache_size - 1;
1026 }
1027
1028 static inline uint
1029 rta_hash(rta *a)
1030 {
1031   u64 h;
1032   mem_hash_init(&h);
1033 #define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f));
1034   MIX(src);
1035   MIX(hostentry);
1036   MIX(from);
1037   MIX(igp_metric);
1038   MIX(source);
1039   MIX(scope);
1040   MIX(dest);
1041 #undef MIX
1042
1043   return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs);
1044 }
1045
1046 static inline int
1047 rta_same(rta *x, rta *y)
1048 {
1049   return (x->src == y->src &&
1050           x->source == y->source &&
1051           x->scope == y->scope &&
1052           x->dest == y->dest &&
1053           x->igp_metric == y->igp_metric &&
1054           ipa_equal(x->from, y->from) &&
1055           x->hostentry == y->hostentry &&
1056           nexthop_same(&(x->nh), &(y->nh)) &&
1057           ea_same(x->eattrs, y->eattrs));
1058 }
1059
1060 static inline slab *
1061 rta_slab(rta *a)
1062 {
1063   return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels];
1064 }
1065
1066 static rta *
1067 rta_copy(rta *o)
1068 {
1069   rta *r = sl_alloc(rta_slab(o));
1070
1071   memcpy(r, o, rta_size(o));
1072   r->uc = 1;
1073   r->nh.next = nexthop_copy(o->nh.next);
1074   r->eattrs = ea_list_copy(o->eattrs);
1075   return r;
1076 }
1077
1078 static inline void
1079 rta_insert(rta *r)
1080 {
1081   uint h = r->hash_key & rta_cache_mask;
1082   r->next = rta_hash_table[h];
1083   if (r->next)
1084     r->next->pprev = &r->next;
1085   r->pprev = &rta_hash_table[h];
1086   rta_hash_table[h] = r;
1087 }
1088
1089 static void
1090 rta_rehash(void)
1091 {
1092   uint ohs = rta_cache_size;
1093   uint h;
1094   rta *r, *n;
1095   rta **oht = rta_hash_table;
1096
1097   rta_cache_size = 2*rta_cache_size;
1098   DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size);
1099   rta_alloc_hash();
1100   for(h=0; h<ohs; h++)
1101     for(r=oht[h]; r; r=n)
1102       {
1103         n = r->next;
1104         rta_insert(r);
1105       }
1106   mb_free(oht);
1107 }
1108
1109 /**
1110  * rta_lookup - look up a &rta in attribute cache
1111  * @o: a un-cached &rta
1112  *
1113  * rta_lookup() gets an un-cached &rta structure and returns its cached
1114  * counterpart. It starts with examining the attribute cache to see whether
1115  * there exists a matching entry. If such an entry exists, it's returned and
1116  * its use count is incremented, else a new entry is created with use count
1117  * set to 1.
1118  *
1119  * The extended attribute lists attached to the &rta are automatically
1120  * converted to the normalized form.
1121  */
1122 rta *
1123 rta_lookup(rta *o)
1124 {
1125   rta *r;
1126   uint h;
1127
1128   ASSERT(!(o->aflags & RTAF_CACHED));
1129   if (o->eattrs)
1130     {
1131       if (o->eattrs->next)      /* Multiple ea_list's, need to merge them */
1132         {
1133           ea_list *ml = alloca(ea_scan(o->eattrs));
1134           ea_merge(o->eattrs, ml);
1135           o->eattrs = ml;
1136         }
1137       ea_sort(o->eattrs);
1138     }
1139
1140   h = rta_hash(o);
1141   for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next)
1142     if (r->hash_key == h && rta_same(r, o))
1143       return rta_clone(r);
1144
1145   r = rta_copy(o);
1146   r->hash_key = h;
1147   r->aflags = RTAF_CACHED;
1148   rt_lock_source(r->src);
1149   rt_lock_hostentry(r->hostentry);
1150   rta_insert(r);
1151
1152   if (++rta_cache_count > rta_cache_limit)
1153     rta_rehash();
1154
1155   return r;
1156 }
1157
1158 void
1159 rta__free(rta *a)
1160 {
1161   ASSERT(rta_cache_count && (a->aflags & RTAF_CACHED));
1162   rta_cache_count--;
1163   *a->pprev = a->next;
1164   if (a->next)
1165     a->next->pprev = a->pprev;
1166   rt_unlock_hostentry(a->hostentry);
1167   rt_unlock_source(a->src);
1168   if (a->nh.next)
1169     nexthop_free(a->nh.next);
1170   ea_free(a->eattrs);
1171   a->aflags = 0;                /* Poison the entry */
1172   sl_free(rta_slab(a), a);
1173 }
1174
1175 rta *
1176 rta_do_cow(rta *o, linpool *lp)
1177 {
1178   rta *r = lp_alloc(lp, rta_size(o));
1179   memcpy(r, o, rta_size(o));
1180   for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next)
1181     {
1182       *nhn = lp_alloc(lp, nexthop_size(nho));
1183       memcpy(*nhn, nho, nexthop_size(nho));
1184       nhn = &((*nhn)->next);
1185     }
1186   r->aflags = 0;
1187   r->uc = 0;
1188   return r;
1189 }
1190
1191 /**
1192  * rta_dump - dump route attributes
1193  * @a: attribute structure to dump
1194  *
1195  * This function takes a &rta and dumps its contents to the debug output.
1196  */
1197 void
1198 rta_dump(rta *a)
1199 {
1200   static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE",
1201                          "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP",
1202                          "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1",
1203                          "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" };
1204   static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" };
1205
1206   debug("p=%s uc=%d %s %s%s h=%04x",
1207         a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope),
1208         rtd[a->dest], a->hash_key);
1209   if (!(a->aflags & RTAF_CACHED))
1210     debug(" !CACHED");
1211   debug(" <-%I", a->from);
1212   if (a->dest == RTD_UNICAST)
1213     for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
1214       {
1215         if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw);
1216         if (nh->labels) debug(" L %d", nh->label[0]);
1217         for (int i=1; i<nh->labels; i++)
1218           debug("/%d", nh->label[i]);
1219         debug(" [%s]", nh->iface ? nh->iface->name : "???");
1220       }
1221   if (a->eattrs)
1222     {
1223       debug(" EA: ");
1224       ea_dump(a->eattrs);
1225     }
1226 }
1227
1228 /**
1229  * rta_dump_all - dump attribute cache
1230  *
1231  * This function dumps the whole contents of route attribute cache
1232  * to the debug output.
1233  */
1234 void
1235 rta_dump_all(void)
1236 {
1237   rta *a;
1238   uint h;
1239
1240   debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit);
1241   for(h=0; h<rta_cache_size; h++)
1242     for(a=rta_hash_table[h]; a; a=a->next)
1243       {
1244         debug("%p ", a);
1245         rta_dump(a);
1246         debug("\n");
1247       }
1248   debug("\n");
1249 }
1250
1251 void
1252 rta_show(struct cli *c, rta *a, ea_list *eal)
1253 {
1254   static char *src_names[] = { "dummy", "static", "inherit", "device", "static-device", "redirect",
1255                                "RIP", "OSPF", "OSPF-IA", "OSPF-E1", "OSPF-E2", "BGP", "pipe" };
1256   int i;
1257
1258   cli_printf(c, -1008, "\tType: %s %s", src_names[a->source], ip_scope_text(a->scope));
1259   if (!eal)
1260     eal = a->eattrs;
1261   for(; eal; eal=eal->next)
1262     for(i=0; i<eal->count; i++)
1263       ea_show(c, &eal->attrs[i]);
1264 }
1265
1266 /**
1267  * rta_init - initialize route attribute cache
1268  *
1269  * This function is called during initialization of the routing
1270  * table module to set up the internals of the attribute cache.
1271  */
1272 void
1273 rta_init(void)
1274 {
1275   rta_pool = rp_new(&root_pool, "Attributes");
1276
1277   rta_slab_[0] = sl_new(rta_pool, sizeof(rta));
1278   rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32));
1279   rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2);
1280   rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
1281
1282   nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop));
1283   nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32));
1284   nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2);
1285   nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
1286
1287   rta_alloc_hash();
1288   rte_src_init();
1289 }
1290
1291 /*
1292  *  Documentation for functions declared inline in route.h
1293  */
1294 #if 0
1295
1296 /**
1297  * rta_clone - clone route attributes
1298  * @r: a &rta to be cloned
1299  *
1300  * rta_clone() takes a cached &rta and returns its identical cached
1301  * copy. Currently it works by just returning the original &rta with
1302  * its use count incremented.
1303  */
1304 static inline rta *rta_clone(rta *r)
1305 { DUMMY; }
1306
1307 /**
1308  * rta_free - free route attributes
1309  * @r: a &rta to be freed
1310  *
1311  * If you stop using a &rta (for example when deleting a route which uses
1312  * it), you need to call rta_free() to notify the attribute cache the
1313  * attribute is no longer in use and can be freed if you were the last
1314  * user (which rta_free() tests by inspecting the use count).
1315  */
1316 static inline void rta_free(rta *r)
1317 { DUMMY; }
1318
1319 #endif