nest/rt-attr.c

   1 /*
   2  *      BIRD -- Route Attribute Cache
   3  *
   4  *      (c) 1998--2000 Martin Mares <mj@ucw.cz>
   5  *
   6  *      Can be freely distributed and used under the terms of the GNU GPL.
   7  */
   8
   9 /**
  10  * DOC: Route attribute cache
  11  *
  12  * Each route entry carries a set of route attributes. Several of them
  13  * vary from route to route, but most attributes are usually common
  14  * for a large number of routes. To conserve memory, we've decided to
  15  * store only the varying ones directly in the &rte and hold the rest
  16  * in a special structure called &rta which is shared among all the
  17  * &rte's with these attributes.
  18  *
  19  * Each &rta contains all the static attributes of the route (i.e.,
  20  * those which are always present) as structure members and a list of
  21  * dynamic attributes represented by a linked list of &ea_list
  22  * structures, each of them consisting of an array of &eattr's containing
  23  * the individual attributes. An attribute can be specified more than once
  24  * in the &ea_list chain and in such case the first occurrence overrides
  25  * the others. This semantics is used especially when someone (for example
  26  * a filter) wishes to alter values of several dynamic attributes, but
  27  * it wants to preserve the original attribute lists maintained by
  28  * another module.
  29  *
  30  * Each &eattr contains an attribute identifier (split to protocol ID and
  31  * per-protocol attribute ID), protocol dependent flags, a type code (consisting
  32  * of several bit fields describing attribute characteristics) and either an
  33  * embedded 32-bit value or a pointer to a &adata structure holding attribute
  34  * contents.
  35  *
  36  * There exist two variants of &rta's -- cached and un-cached ones. Un-cached
  37  * &rta's can have arbitrarily complex structure of &ea_list's and they
  38  * can be modified by any module in the route processing chain. Cached
  39  * &rta's have their attribute lists normalized (that means at most one
  40  * &ea_list is present and its values are sorted in order to speed up
  41  * searching), they are stored in a hash table to make fast lookup possible
  42  * and they are provided with a use count to allow sharing.
  43  *
  44  * Routing tables always contain only cached &rta's.
  45  */
  46
  47 #include "nest/bird.h"
  48 #include "nest/route.h"
  49 #include "nest/protocol.h"
  50 #include "nest/iface.h"
  51 #include "nest/cli.h"
  52 #include "nest/attrs.h"
  53 #include "lib/alloca.h"
  54 #include "lib/hash.h"
  55 #include "lib/idm.h"
  56 #include "lib/resource.h"
  57 #include "lib/string.h"
  58
  59 #include <stddef.h>
  60
  61 pool *rta_pool;
  62
  63 static slab *rta_slab_[4];
  64 static slab *nexthop_slab_[4];
  65 static slab *rte_src_slab;
  66
  67 static struct idm src_ids;
  68 #define SRC_ID_INIT_SIZE 4
  69
  70 /* rte source hash */
  71
  72 #define RSH_KEY(n)              n->proto, n->private_id
  73 #define RSH_NEXT(n)             n->next
  74 #define RSH_EQ(p1,n1,p2,n2)     p1 == p2 && n1 == n2
  75 #define RSH_FN(p,n)             p->hash_key ^ u32_hash(n)
  76
  77 #define RSH_REHASH              rte_src_rehash
  78 #define RSH_PARAMS              /2, *2, 1, 1, 8, 20
  79 #define RSH_INIT_ORDER          6
  80
  81 static HASH(struct rte_src) src_hash;
  82
  83 struct protocol *attr_class_to_protocol[EAP_MAX];
  84
  85
  86 static void
  87 rte_src_init(void)
  88 {
  89   rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src));
  90
  91   idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE);
  92
  93   HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER);
  94 }
  95
  96
  97 HASH_DEFINE_REHASH_FN(RSH, struct rte_src)
  98
  99 struct rte_src *
 100 rt_find_source(struct proto *p, u32 id)
 101 {
 102   return HASH_FIND(src_hash, RSH, p, id);
 103 }
 104
 105 struct rte_src *
 106 rt_get_source(struct proto *p, u32 id)
 107 {
 108   struct rte_src *src = rt_find_source(p, id);
 109
 110   if (src)
 111     return src;
 112
 113   src = sl_alloc(rte_src_slab);
 114   src->proto = p;
 115   src->private_id = id;
 116   src->global_id = idm_alloc(&src_ids);
 117   src->uc = 0;
 118
 119   HASH_INSERT2(src_hash, RSH, rta_pool, src);
 120
 121   return src;
 122 }
 123
 124 void
 125 rt_prune_sources(void)
 126 {
 127   HASH_WALK_FILTER(src_hash, next, src, sp)
 128   {
 129     if (src->uc == 0)
 130     {
 131       HASH_DO_REMOVE(src_hash, RSH, sp);
 132       idm_free(&src_ids, src->global_id);
 133       sl_free(rte_src_slab, src);
 134     }
 135   }
 136   HASH_WALK_FILTER_END;
 137
 138   HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool);
 139 }
 140
 141
 142 /*
 143  *      Multipath Next Hop
 144  */
 145
 146 static inline u32
 147 nexthop_hash(struct nexthop *x)
 148 {
 149   u32 h = 0;
 150   for (; x; x = x->next)
 151   {
 152     h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9);
 153
 154     for (int i = 0; i < x->labels; i++)
 155       h ^= x->label[i] ^ (h << 6) ^ (h >> 7);
 156   }
 157
 158   return h;
 159 }
 160
 161 int
 162 nexthop__same(struct nexthop *x, struct nexthop *y)
 163 {
 164   for (; x && y; x = x->next, y = y->next)
 165   {
 166     if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight) || (x->labels != y->labels))
 167       return 0;
 168
 169     for (int i = 0; i < x->labels; i++)
 170       if (x->label[i] != y->label[i])
 171         return 0;
 172   }
 173
 174   return x == y;
 175 }
 176
 177 static int
 178 nexthop_compare_node(struct nexthop *x, struct nexthop *y)
 179 {
 180   int r;
 181
 182   if (!x)
 183     return 1;
 184
 185   if (!y)
 186     return -1;
 187
 188   r = ((int) y->weight) - ((int) x->weight);
 189   if (r)
 190     return r;
 191
 192   r = ipa_compare(x->gw, y->gw);
 193   if (r)
 194     return r;
 195
 196   r = ((int) y->labels) - ((int) x->labels);
 197   if (r)
 198     return r;
 199
 200   for (int i = 0; i < y->labels; i++)
 201   {
 202     r = ((int) y->label[i]) - ((int) x->label[i]);
 203     if (r)
 204       return r;
 205   }
 206
 207   return ((int) x->iface->index) - ((int) y->iface->index);
 208 }
 209
 210 static inline struct nexthop *
 211 nexthop_copy_node(const struct nexthop *src, linpool *lp)
 212 {
 213   struct nexthop *n = lp_alloc(lp, nexthop_size(src));
 214
 215   memcpy(n, src, nexthop_size(src));
 216   n->next = NULL;
 217
 218   return n;
 219 }
 220
 221 /**
 222  * nexthop_merge - merge nexthop lists
 223  * @x: list 1
 224  * @y: list 2
 225  * @rx: reusability of list @x
 226  * @ry: reusability of list @y
 227  * @max: max number of nexthops
 228  * @lp: linpool for allocating nexthops
 229  *
 230  * The nexthop_merge() function takes two nexthop lists @x and @y and merges them,
 231  * eliminating possible duplicates. The input lists must be sorted and the
 232  * result is sorted too. The number of nexthops in result is limited by @max.
 233  * New nodes are allocated from linpool @lp.
 234  *
 235  * The arguments @rx and @ry specify whether corresponding input lists may be
 236  * consumed by the function (i.e. their nodes reused in the resulting list), in
 237  * that case the caller should not access these lists after that. To eliminate
 238  * issues with deallocation of these lists, the caller should use some form of
 239  * bulk deallocation (e.g. stack or linpool) to free these nodes when the
 240  * resulting list is no longer needed. When reusability is not set, the
 241  * corresponding lists are not modified nor linked from the resulting list.
 242  */
 243 struct nexthop *
 244 nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp)
 245 {
 246   struct nexthop *root = NULL;
 247   struct nexthop **n = &root;
 248
 249   while ((x || y) && max--)
 250   {
 251     int cmp = nexthop_compare_node(x, y);
 252     if (cmp < 0)
 253     {
 254       *n = rx ? x : nexthop_copy_node(x, lp);
 255       x = x->next;
 256     }
 257     else if (cmp > 0)
 258     {
 259       *n = ry ? y : nexthop_copy_node(y, lp);
 260       y = y->next;
 261     }
 262     else
 263     {
 264       *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp));
 265       x = x->next;
 266       y = y->next;
 267     }
 268     n = &((*n)->next);
 269   }
 270   *n = NULL;
 271
 272   return root;
 273 }
 274
 275 void
 276 nexthop_insert(struct nexthop **n, struct nexthop *x)
 277 {
 278   for (; *n; n = &((*n)->next))
 279   {
 280     int cmp = nexthop_compare_node(*n, x);
 281
 282     if (cmp < 0)
 283       continue;
 284     else if (cmp > 0)
 285       break;
 286     else
 287       return;
 288   }
 289
 290   x->next = *n;
 291   *n = x;
 292 }
 293
 294 int
 295 nexthop_is_sorted(struct nexthop *x)
 296 {
 297   for (; x && x->next; x = x->next)
 298     if (nexthop_compare_node(x, x->next) >= 0)
 299       return 0;
 300
 301   return 1;
 302 }
 303
 304 static inline slab *
 305 nexthop_slab(struct nexthop *nh)
 306 {
 307   return nexthop_slab_[MIN(nh->labels, 3)];
 308 }
 309
 310 static struct nexthop *
 311 nexthop_copy(struct nexthop *o)
 312 {
 313   struct nexthop *first = NULL;
 314   struct nexthop **last = &first;
 315
 316   for (; o; o = o->next)
 317     {
 318       struct nexthop *n = sl_alloc(nexthop_slab(o));
 319       n->gw = o->gw;
 320       n->iface = o->iface;
 321       n->next = NULL;
 322       n->weight = o->weight;
 323       n->labels = o->labels;
 324       for (int i=0; i<o->labels; i++)
 325         n->label[i] = o->label[i];
 326
 327       *last = n;
 328       last = &(n->next);
 329     }
 330
 331   return first;
 332 }
 333
 334 static void
 335 nexthop_free(struct nexthop *o)
 336 {
 337   struct nexthop *n;
 338
 339   while (o)
 340     {
 341       n = o->next;
 342       sl_free(nexthop_slab(o), o);
 343       o = n;
 344     }
 345 }
 346
 347
 348 /*
 349  *      Extended Attributes
 350  */
 351
 352 static inline eattr *
 353 ea__find(ea_list *e, unsigned id)
 354 {
 355   eattr *a;
 356   int l, r, m;
 357
 358   while (e)
 359     {
 360       if (e->flags & EALF_BISECT)
 361         {
 362           l = 0;
 363           r = e->count - 1;
 364           while (l <= r)
 365             {
 366               m = (l+r) / 2;
 367               a = &e->attrs[m];
 368               if (a->id == id)
 369                 return a;
 370               else if (a->id < id)
 371                 l = m+1;
 372               else
 373                 r = m-1;
 374             }
 375         }
 376       else
 377         for(m=0; m<e->count; m++)
 378           if (e->attrs[m].id == id)
 379             return &e->attrs[m];
 380       e = e->next;
 381     }
 382   return NULL;
 383 }
 384
 385 /**
 386  * ea_find - find an extended attribute
 387  * @e: attribute list to search in
 388  * @id: attribute ID to search for
 389  *
 390  * Given an extended attribute list, ea_find() searches for a first
 391  * occurrence of an attribute with specified ID, returning either a pointer
 392  * to its &eattr structure or %NULL if no such attribute exists.
 393  */
 394 eattr *
 395 ea_find(ea_list *e, unsigned id)
 396 {
 397   eattr *a = ea__find(e, id & EA_CODE_MASK);
 398
 399   if (a && (a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF &&
 400       !(id & EA_ALLOW_UNDEF))
 401     return NULL;
 402   return a;
 403 }
 404
 405 /**
 406  * ea_walk - walk through extended attributes
 407  * @s: walk state structure
 408  * @id: start of attribute ID interval
 409  * @max: length of attribute ID interval
 410  *
 411  * Given an extended attribute list, ea_walk() walks through the list looking
 412  * for first occurrences of attributes with ID in specified interval from @id to
 413  * (@id + @max - 1), returning pointers to found &eattr structures, storing its
 414  * walk state in @s for subsequent calls.
 415  *
 416  * The function ea_walk() is supposed to be called in a loop, with initially
 417  * zeroed walk state structure @s with filled the initial extended attribute
 418  * list, returning one found attribute in each call or %NULL when no other
 419  * attribute exists. The extended attribute list or the arguments should not be
 420  * modified between calls. The maximum value of @max is 128.
 421  */
 422 eattr *
 423 ea_walk(struct ea_walk_state *s, uint id, uint max)
 424 {
 425   ea_list *e = s->eattrs;
 426   eattr *a = s->ea;
 427   eattr *a_max;
 428
 429   max = id + max;
 430
 431   if (a)
 432     goto step;
 433
 434   for (; e; e = e->next)
 435   {
 436     if (e->flags & EALF_BISECT)
 437     {
 438       int l, r, m;
 439
 440       l = 0;
 441       r = e->count - 1;
 442       while (l < r)
 443       {
 444         m = (l+r) / 2;
 445         if (e->attrs[m].id < id)
 446           l = m + 1;
 447         else
 448           r = m;
 449       }
 450       a = e->attrs + l;
 451     }
 452     else
 453       a = e->attrs;
 454
 455   step:
 456     a_max = e->attrs + e->count;
 457     for (; a < a_max; a++)
 458       if ((a->id >= id) && (a->id < max))
 459       {
 460         int n = a->id - id;
 461
 462         if (BIT32_TEST(s->visited, n))
 463           continue;
 464
 465         BIT32_SET(s->visited, n);
 466
 467         if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
 468           continue;
 469
 470         s->eattrs = e;
 471         s->ea = a;
 472         return a;
 473       }
 474       else if (e->flags & EALF_BISECT)
 475         break;
 476   }
 477
 478   return NULL;
 479 }
 480
 481 /**
 482  * ea_get_int - fetch an integer attribute
 483  * @e: attribute list
 484  * @id: attribute ID
 485  * @def: default value
 486  *
 487  * This function is a shortcut for retrieving a value of an integer attribute
 488  * by calling ea_find() to find the attribute, extracting its value or returning
 489  * a provided default if no such attribute is present.
 490  */
 491 int
 492 ea_get_int(ea_list *e, unsigned id, int def)
 493 {
 494   eattr *a = ea_find(e, id);
 495   if (!a)
 496     return def;
 497   return a->u.data;
 498 }
 499
 500 static inline void
 501 ea_do_sort(ea_list *e)
 502 {
 503   unsigned n = e->count;
 504   eattr *a = e->attrs;
 505   eattr *b = alloca(n * sizeof(eattr));
 506   unsigned s, ss;
 507
 508   /* We need to use a stable sorting algorithm, hence mergesort */
 509   do
 510     {
 511       s = ss = 0;
 512       while (s < n)
 513         {
 514           eattr *p, *q, *lo, *hi;
 515           p = b;
 516           ss = s;
 517           *p++ = a[s++];
 518           while (s < n && p[-1].id <= a[s].id)
 519             *p++ = a[s++];
 520           if (s < n)
 521             {
 522               q = p;
 523               *p++ = a[s++];
 524               while (s < n && p[-1].id <= a[s].id)
 525                 *p++ = a[s++];
 526               lo = b;
 527               hi = q;
 528               s = ss;
 529               while (lo < q && hi < p)
 530                 if (lo->id <= hi->id)
 531                   a[s++] = *lo++;
 532                 else
 533                   a[s++] = *hi++;
 534               while (lo < q)
 535                 a[s++] = *lo++;
 536               while (hi < p)
 537                 a[s++] = *hi++;
 538             }
 539         }
 540     }
 541   while (ss);
 542 }
 543
 544 static inline void
 545 ea_do_prune(ea_list *e)
 546 {
 547   eattr *s, *d, *l, *s0;
 548   int i = 0;
 549
 550   /* Discard duplicates and undefs. Do you remember sorting was stable? */
 551   s = d = e->attrs;
 552   l = e->attrs + e->count;
 553   while (s < l)
 554     {
 555       s0 = s++;
 556       while (s < l && s->id == s[-1].id)
 557         s++;
 558       /* s0 is the most recent version, s[-1] the oldest one */
 559       if ((s0->type & EAF_TYPE_MASK) != EAF_TYPE_UNDEF)
 560         {
 561           *d = *s0;
 562           d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED);
 563           d++;
 564           i++;
 565         }
 566     }
 567   e->count = i;
 568 }
 569
 570 /**
 571  * ea_sort - sort an attribute list
 572  * @e: list to be sorted
 573  *
 574  * This function takes a &ea_list chain and sorts the attributes
 575  * within each of its entries.
 576  *
 577  * If an attribute occurs multiple times in a single &ea_list,
 578  * ea_sort() leaves only the first (the only significant) occurrence.
 579  */
 580 void
 581 ea_sort(ea_list *e)
 582 {
 583   while (e)
 584     {
 585       if (!(e->flags & EALF_SORTED))
 586         {
 587           ea_do_sort(e);
 588           ea_do_prune(e);
 589           e->flags |= EALF_SORTED;
 590         }
 591       if (e->count > 5)
 592         e->flags |= EALF_BISECT;
 593       e = e->next;
 594     }
 595 }
 596
 597 /**
 598  * ea_scan - estimate attribute list size
 599  * @e: attribute list
 600  *
 601  * This function calculates an upper bound of the size of
 602  * a given &ea_list after merging with ea_merge().
 603  */
 604 unsigned
 605 ea_scan(ea_list *e)
 606 {
 607   unsigned cnt = 0;
 608
 609   while (e)
 610     {
 611       cnt += e->count;
 612       e = e->next;
 613     }
 614   return sizeof(ea_list) + sizeof(eattr)*cnt;
 615 }
 616
 617 /**
 618  * ea_merge - merge segments of an attribute list
 619  * @e: attribute list
 620  * @t: buffer to store the result to
 621  *
 622  * This function takes a possibly multi-segment attribute list
 623  * and merges all of its segments to one.
 624  *
 625  * The primary use of this function is for &ea_list normalization:
 626  * first call ea_scan() to determine how much memory will the result
 627  * take, then allocate a buffer (usually using alloca()), merge the
 628  * segments with ea_merge() and finally sort and prune the result
 629  * by calling ea_sort().
 630  */
 631 void
 632 ea_merge(ea_list *e, ea_list *t)
 633 {
 634   eattr *d = t->attrs;
 635
 636   t->flags = 0;
 637   t->count = 0;
 638   t->next = NULL;
 639   while (e)
 640     {
 641       memcpy(d, e->attrs, sizeof(eattr)*e->count);
 642       t->count += e->count;
 643       d += e->count;
 644       e = e->next;
 645     }
 646 }
 647
 648 /**
 649  * ea_same - compare two &ea_list's
 650  * @x: attribute list
 651  * @y: attribute list
 652  *
 653  * ea_same() compares two normalized attribute lists @x and @y and returns
 654  * 1 if they contain the same attributes, 0 otherwise.
 655  */
 656 int
 657 ea_same(ea_list *x, ea_list *y)
 658 {
 659   int c;
 660
 661   if (!x || !y)
 662     return x == y;
 663   ASSERT(!x->next && !y->next);
 664   if (x->count != y->count)
 665     return 0;
 666   for(c=0; c<x->count; c++)
 667     {
 668       eattr *a = &x->attrs[c];
 669       eattr *b = &y->attrs[c];
 670
 671       if (a->id != b->id ||
 672           a->flags != b->flags ||
 673           a->type != b->type ||
 674           ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr)))
 675         return 0;
 676     }
 677   return 1;
 678 }
 679
 680 static inline ea_list *
 681 ea_list_copy(ea_list *o)
 682 {
 683   ea_list *n;
 684   unsigned i, len;
 685
 686   if (!o)
 687     return NULL;
 688   ASSERT(!o->next);
 689   len = sizeof(ea_list) + sizeof(eattr) * o->count;
 690   n = mb_alloc(rta_pool, len);
 691   memcpy(n, o, len);
 692   n->flags |= EALF_CACHED;
 693   for(i=0; i<o->count; i++)
 694     {
 695       eattr *a = &n->attrs[i];
 696       if (!(a->type & EAF_EMBEDDED))
 697         {
 698           unsigned size = sizeof(struct adata) + a->u.ptr->length;
 699           struct adata *d = mb_alloc(rta_pool, size);
 700           memcpy(d, a->u.ptr, size);
 701           a->u.ptr = d;
 702         }
 703     }
 704   return n;
 705 }
 706
 707 static inline void
 708 ea_free(ea_list *o)
 709 {
 710   int i;
 711
 712   if (o)
 713     {
 714       ASSERT(!o->next);
 715       for(i=0; i<o->count; i++)
 716         {
 717           eattr *a = &o->attrs[i];
 718           if (!(a->type & EAF_EMBEDDED))
 719             mb_free(a->u.ptr);
 720         }
 721       mb_free(o);
 722     }
 723 }
 724
 725 static int
 726 get_generic_attr(eattr *a, byte **buf, int buflen UNUSED)
 727 {
 728   if (a->id == EA_GEN_IGP_METRIC)
 729     {
 730       *buf += bsprintf(*buf, "igp_metric");
 731       return GA_NAME;
 732     }
 733
 734   return GA_UNKNOWN;
 735 }
 736
 737 void
 738 ea_format_bitfield(struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max)
 739 {
 740   byte *bound = buf + bufsize - 32;
 741   u32 data = a->u.data;
 742   int i;
 743
 744   for (i = min; i < max; i++)
 745     if ((data & (1u << i)) && names[i])
 746     {
 747       if (buf > bound)
 748       {
 749         strcpy(buf, " ...");
 750         return;
 751       }
 752
 753       buf += bsprintf(buf, " %s", names[i]);
 754       data &= ~(1u << i);
 755     }
 756
 757   if (data)
 758     bsprintf(buf, " %08x", data);
 759
 760   return;
 761 }
 762
 763 static inline void
 764 opaque_format(struct adata *ad, byte *buf, uint size)
 765 {
 766   byte *bound = buf + size - 10;
 767   uint i;
 768
 769   for(i = 0; i < ad->length; i++)
 770     {
 771       if (buf > bound)
 772         {
 773           strcpy(buf, " ...");
 774           return;
 775         }
 776       if (i)
 777         *buf++ = ' ';
 778
 779       buf += bsprintf(buf, "%02x", ad->data[i]);
 780     }
 781
 782   *buf = 0;
 783   return;
 784 }
 785
 786 static inline void
 787 ea_show_int_set(struct cli *c, struct adata *ad, int way, byte *pos, byte *buf, byte *end)
 788 {
 789   int i = int_set_format(ad, way, 0, pos, end - pos);
 790   cli_printf(c, -1012, "\t%s", buf);
 791   while (i)
 792     {
 793       i = int_set_format(ad, way, i, buf, end - buf - 1);
 794       cli_printf(c, -1012, "\t\t%s", buf);
 795     }
 796 }
 797
 798 static inline void
 799 ea_show_ec_set(struct cli *c, struct adata *ad, byte *pos, byte *buf, byte *end)
 800 {
 801   int i = ec_set_format(ad, 0, pos, end - pos);
 802   cli_printf(c, -1012, "\t%s", buf);
 803   while (i)
 804     {
 805       i = ec_set_format(ad, i, buf, end - buf - 1);
 806       cli_printf(c, -1012, "\t\t%s", buf);
 807     }
 808 }
 809
 810 static inline void
 811 ea_show_lc_set(struct cli *c, struct adata *ad, byte *pos, byte *buf, byte *end)
 812 {
 813   int i = lc_set_format(ad, 0, pos, end - pos);
 814   cli_printf(c, -1012, "\t%s", buf);
 815   while (i)
 816     {
 817       i = lc_set_format(ad, i, buf, end - buf - 1);
 818       cli_printf(c, -1012, "\t\t%s", buf);
 819     }
 820 }
 821
 822 /**
 823  * ea_show - print an &eattr to CLI
 824  * @c: destination CLI
 825  * @e: attribute to be printed
 826  *
 827  * This function takes an extended attribute represented by its &eattr
 828  * structure and prints it to the CLI according to the type information.
 829  *
 830  * If the protocol defining the attribute provides its own
 831  * get_attr() hook, it's consulted first.
 832  */
 833 void
 834 ea_show(struct cli *c, eattr *e)
 835 {
 836   struct protocol *p;
 837   int status = GA_UNKNOWN;
 838   struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr;
 839   byte buf[CLI_MSG_SIZE];
 840   byte *pos = buf, *end = buf + sizeof(buf);
 841
 842   if (p = attr_class_to_protocol[EA_PROTO(e->id)])
 843     {
 844       pos += bsprintf(pos, "%s.", p->name);
 845       if (p->get_attr)
 846         status = p->get_attr(e, pos, end - pos);
 847       pos += strlen(pos);
 848     }
 849   else if (EA_PROTO(e->id))
 850     pos += bsprintf(pos, "%02x.", EA_PROTO(e->id));
 851   else
 852     status = get_generic_attr(e, &pos, end - pos);
 853
 854   if (status < GA_NAME)
 855     pos += bsprintf(pos, "%02x", EA_ID(e->id));
 856   if (status < GA_FULL)
 857     {
 858       *pos++ = ':';
 859       *pos++ = ' ';
 860       switch (e->type & EAF_TYPE_MASK)
 861         {
 862         case EAF_TYPE_INT:
 863           bsprintf(pos, "%u", e->u.data);
 864           break;
 865         case EAF_TYPE_OPAQUE:
 866           opaque_format(ad, pos, end - pos);
 867           break;
 868         case EAF_TYPE_IP_ADDRESS:
 869           bsprintf(pos, "%I", *(ip_addr *) ad->data);
 870           break;
 871         case EAF_TYPE_ROUTER_ID:
 872           bsprintf(pos, "%R", e->u.data);
 873           break;
 874         case EAF_TYPE_AS_PATH:
 875           as_path_format(ad, pos, end - pos);
 876           break;
 877         case EAF_TYPE_BITFIELD:
 878           bsprintf(pos, "%08x", e->u.data);
 879           break;
 880         case EAF_TYPE_INT_SET:
 881           ea_show_int_set(c, ad, 1, pos, buf, end);
 882           return;
 883         case EAF_TYPE_EC_SET:
 884           ea_show_ec_set(c, ad, pos, buf, end);
 885           return;
 886         case EAF_TYPE_LC_SET:
 887           ea_show_lc_set(c, ad, pos, buf, end);
 888           return;
 889         case EAF_TYPE_UNDEF:
 890         default:
 891           bsprintf(pos, "<type %02x>", e->type);
 892         }
 893     }
 894   cli_printf(c, -1012, "\t%s", buf);
 895 }
 896
 897 /**
 898  * ea_dump - dump an extended attribute
 899  * @e: attribute to be dumped
 900  *
 901  * ea_dump() dumps contents of the extended attribute given to
 902  * the debug output.
 903  */
 904 void
 905 ea_dump(ea_list *e)
 906 {
 907   int i;
 908
 909   if (!e)
 910     {
 911       debug("NONE");
 912       return;
 913     }
 914   while (e)
 915     {
 916       debug("[%c%c%c]",
 917             (e->flags & EALF_SORTED) ? 'S' : 's',
 918             (e->flags & EALF_BISECT) ? 'B' : 'b',
 919             (e->flags & EALF_CACHED) ? 'C' : 'c');
 920       for(i=0; i<e->count; i++)
 921         {
 922           eattr *a = &e->attrs[i];
 923           debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags);
 924           if (a->type & EAF_TEMP)
 925             debug("T");
 926           debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]);
 927           if (a->type & EAF_ORIGINATED)
 928             debug("o");
 929           if (a->type & EAF_EMBEDDED)
 930             debug(":%08x", a->u.data);
 931           else
 932             {
 933               int j, len = a->u.ptr->length;
 934               debug("[%d]:", len);
 935               for(j=0; j<len; j++)
 936                 debug("%02x", a->u.ptr->data[j]);
 937             }
 938         }
 939       if (e = e->next)
 940         debug(" | ");
 941     }
 942 }
 943
 944 /**
 945  * ea_hash - calculate an &ea_list hash key
 946  * @e: attribute list
 947  *
 948  * ea_hash() takes an extended attribute list and calculated a hopefully
 949  * uniformly distributed hash value from its contents.
 950  */
 951 inline uint
 952 ea_hash(ea_list *e)
 953 {
 954   const u64 mul = 0x68576150f3d6847;
 955   u64 h = 0xafcef24eda8b29;
 956   int i;
 957
 958   if (e)                        /* Assuming chain of length 1 */
 959     {
 960       for(i=0; i<e->count; i++)
 961         {
 962           struct eattr *a = &e->attrs[i];
 963           h ^= a->id; h *= mul;
 964           if (a->type & EAF_EMBEDDED)
 965             h ^= a->u.data;
 966           else
 967             {
 968               struct adata *d = a->u.ptr;
 969               h ^= mem_hash(d->data, d->length);
 970             }
 971           h *= mul;
 972         }
 973     }
 974   return (h >> 32) ^ (h & 0xffffffff);
 975 }
 976
 977 /**
 978  * ea_append - concatenate &ea_list's
 979  * @to: destination list (can be %NULL)
 980  * @what: list to be appended (can be %NULL)
 981  *
 982  * This function appends the &ea_list @what at the end of
 983  * &ea_list @to and returns a pointer to the resulting list.
 984  */
 985 ea_list *
 986 ea_append(ea_list *to, ea_list *what)
 987 {
 988   ea_list *res;
 989
 990   if (!to)
 991     return what;
 992   res = to;
 993   while (to->next)
 994     to = to->next;
 995   to->next = what;
 996   return res;
 997 }
 998
 999 /*
1000  *      rta's
1001  */
1002
1003 static uint rta_cache_count;
1004 static uint rta_cache_size = 32;
1005 static uint rta_cache_limit;
1006 static uint rta_cache_mask;
1007 static rta **rta_hash_table;
1008
1009 static void
1010 rta_alloc_hash(void)
1011 {
1012   rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size);
1013   if (rta_cache_size < 32768)
1014     rta_cache_limit = rta_cache_size * 2;
1015   else
1016     rta_cache_limit = ~0;
1017   rta_cache_mask = rta_cache_size - 1;
1018 }
1019
1020 static inline uint
1021 rta_hash(rta *a)
1022 {
1023   u64 h;
1024   mem_hash_init(&h);
1025 #define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f));
1026   MIX(src);
1027   MIX(hostentry);
1028   MIX(from);
1029   MIX(igp_metric);
1030   MIX(source);
1031   MIX(scope);
1032   MIX(dest);
1033 #undef MIX
1034
1035   return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs);
1036 }
1037
1038 static inline int
1039 rta_same(rta *x, rta *y)
1040 {
1041   return (x->src == y->src &&
1042           x->source == y->source &&
1043           x->scope == y->scope &&
1044           x->dest == y->dest &&
1045           x->igp_metric == y->igp_metric &&
1046           ipa_equal(x->from, y->from) &&
1047           x->hostentry == y->hostentry &&
1048           nexthop_same(&(x->nh), &(y->nh)) &&
1049           ea_same(x->eattrs, y->eattrs));
1050 }
1051
1052 static inline slab *
1053 rta_slab(rta *a)
1054 {
1055   return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels];
1056 }
1057
1058 static rta *
1059 rta_copy(rta *o)
1060 {
1061   rta *r = sl_alloc(rta_slab(o));
1062
1063   memcpy(r, o, rta_size(o));
1064   r->uc = 1;
1065   r->nh.next = nexthop_copy(o->nh.next);
1066   r->eattrs = ea_list_copy(o->eattrs);
1067   return r;
1068 }
1069
1070 static inline void
1071 rta_insert(rta *r)
1072 {
1073   uint h = r->hash_key & rta_cache_mask;
1074   r->next = rta_hash_table[h];
1075   if (r->next)
1076     r->next->pprev = &r->next;
1077   r->pprev = &rta_hash_table[h];
1078   rta_hash_table[h] = r;
1079 }
1080
1081 static void
1082 rta_rehash(void)
1083 {
1084   uint ohs = rta_cache_size;
1085   uint h;
1086   rta *r, *n;
1087   rta **oht = rta_hash_table;
1088
1089   rta_cache_size = 2*rta_cache_size;
1090   DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size);
1091   rta_alloc_hash();
1092   for(h=0; h<ohs; h++)
1093     for(r=oht[h]; r; r=n)
1094       {
1095         n = r->next;
1096         rta_insert(r);
1097       }
1098   mb_free(oht);
1099 }
1100
1101 /**
1102  * rta_lookup - look up a &rta in attribute cache
1103  * @o: a un-cached &rta
1104  *
1105  * rta_lookup() gets an un-cached &rta structure and returns its cached
1106  * counterpart. It starts with examining the attribute cache to see whether
1107  * there exists a matching entry. If such an entry exists, it's returned and
1108  * its use count is incremented, else a new entry is created with use count
1109  * set to 1.
1110  *
1111  * The extended attribute lists attached to the &rta are automatically
1112  * converted to the normalized form.
1113  */
1114 rta *
1115 rta_lookup(rta *o)
1116 {
1117   rta *r;
1118   uint h;
1119
1120   ASSERT(!(o->aflags & RTAF_CACHED));
1121   if (o->eattrs)
1122     {
1123       if (o->eattrs->next)      /* Multiple ea_list's, need to merge them */
1124         {
1125           ea_list *ml = alloca(ea_scan(o->eattrs));
1126           ea_merge(o->eattrs, ml);
1127           o->eattrs = ml;
1128         }
1129       ea_sort(o->eattrs);
1130     }
1131
1132   h = rta_hash(o);
1133   for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next)
1134     if (r->hash_key == h && rta_same(r, o))
1135       return rta_clone(r);
1136
1137   r = rta_copy(o);
1138   r->hash_key = h;
1139   r->aflags = RTAF_CACHED;
1140   rt_lock_source(r->src);
1141   rt_lock_hostentry(r->hostentry);
1142   rta_insert(r);
1143
1144   if (++rta_cache_count > rta_cache_limit)
1145     rta_rehash();
1146
1147   return r;
1148 }
1149
1150 void
1151 rta__free(rta *a)
1152 {
1153   ASSERT(rta_cache_count && (a->aflags & RTAF_CACHED));
1154   rta_cache_count--;
1155   *a->pprev = a->next;
1156   if (a->next)
1157     a->next->pprev = a->pprev;
1158   rt_unlock_hostentry(a->hostentry);
1159   rt_unlock_source(a->src);
1160   if (a->nh.next)
1161     nexthop_free(a->nh.next);
1162   ea_free(a->eattrs);
1163   a->aflags = 0;                /* Poison the entry */
1164   sl_free(rta_slab(a), a);
1165 }
1166
1167 rta *
1168 rta_do_cow(rta *o, linpool *lp)
1169 {
1170   rta *r = lp_alloc(lp, rta_size(o));
1171   memcpy(r, o, rta_size(o));
1172   for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next)
1173     {
1174       *nhn = lp_alloc(lp, nexthop_size(nho));
1175       memcpy(*nhn, nho, nexthop_size(nho));
1176       nhn = &((*nhn)->next);
1177     }
1178   r->aflags = 0;
1179   r->uc = 0;
1180   return r;
1181 }
1182
1183 /**
1184  * rta_dump - dump route attributes
1185  * @a: attribute structure to dump
1186  *
1187  * This function takes a &rta and dumps its contents to the debug output.
1188  */
1189 void
1190 rta_dump(rta *a)
1191 {
1192   static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE",
1193                          "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP",
1194                          "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1",
1195                          "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" };
1196   static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" };
1197
1198   debug("p=%s uc=%d %s %s%s h=%04x",
1199         a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope),
1200         rtd[a->dest], a->hash_key);
1201   if (!(a->aflags & RTAF_CACHED))
1202     debug(" !CACHED");
1203   debug(" <-%I", a->from);
1204   if (a->dest == RTD_UNICAST)
1205     for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
1206       {
1207         if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw);
1208         if (nh->labels) debug(" L %d", nh->label[0]);
1209         for (int i=1; i<nh->labels; i++)
1210           debug("/%d", nh->label[i]);
1211         debug(" [%s]", nh->iface ? nh->iface->name : "???");
1212       }
1213   if (a->eattrs)
1214     {
1215       debug(" EA: ");
1216       ea_dump(a->eattrs);
1217     }
1218 }
1219
1220 /**
1221  * rta_dump_all - dump attribute cache
1222  *
1223  * This function dumps the whole contents of route attribute cache
1224  * to the debug output.
1225  */
1226 void
1227 rta_dump_all(void)
1228 {
1229   rta *a;
1230   uint h;
1231
1232   debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit);
1233   for(h=0; h<rta_cache_size; h++)
1234     for(a=rta_hash_table[h]; a; a=a->next)
1235       {
1236         debug("%p ", a);
1237         rta_dump(a);
1238         debug("\n");
1239       }
1240   debug("\n");
1241 }
1242
1243 void
1244 rta_show(struct cli *c, rta *a, ea_list *eal)
1245 {
1246   static char *src_names[] = { "dummy", "static", "inherit", "device", "static-device", "redirect",
1247                                "RIP", "OSPF", "OSPF-IA", "OSPF-E1", "OSPF-E2", "BGP", "pipe" };
1248   int i;
1249
1250   cli_printf(c, -1008, "\tType: %s %s", src_names[a->source], ip_scope_text(a->scope));
1251   if (!eal)
1252     eal = a->eattrs;
1253   for(; eal; eal=eal->next)
1254     for(i=0; i<eal->count; i++)
1255       ea_show(c, &eal->attrs[i]);
1256 }
1257
1258 /**
1259  * rta_init - initialize route attribute cache
1260  *
1261  * This function is called during initialization of the routing
1262  * table module to set up the internals of the attribute cache.
1263  */
1264 void
1265 rta_init(void)
1266 {
1267   rta_pool = rp_new(&root_pool, "Attributes");
1268
1269   rta_slab_[0] = sl_new(rta_pool, sizeof(rta));
1270   rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32));
1271   rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2);
1272   rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
1273
1274   nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop));
1275   nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32));
1276   nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2);
1277   nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK);
1278
1279   rta_alloc_hash();
1280   rte_src_init();
1281 }
1282
1283 /*
1284  *  Documentation for functions declared inline in route.h
1285  */
1286 #if 0
1287
1288 /**
1289  * rta_clone - clone route attributes
1290  * @r: a &rta to be cloned
1291  *
1292  * rta_clone() takes a cached &rta and returns its identical cached
1293  * copy. Currently it works by just returning the original &rta with
1294  * its use count incremented.
1295  */
1296 static inline rta *rta_clone(rta *r)
1297 { DUMMY; }
1298
1299 /**
1300  * rta_free - free route attributes
1301  * @r: a &rta to be freed
1302  *
1303  * If you stop using a &rta (for example when deleting a route which uses
1304  * it), you need to call rta_free() to notify the attribute cache the
1305  * attribute is no longer in use and can be freed if you were the last
1306  * user (which rta_free() tests by inspecting the use count).
1307  */
1308 static inline void rta_free(rta *r)
1309 { DUMMY; }
1310
1311 #endif