]>
Commit | Line | Data |
---|---|---|
62aa008a MM |
1 | /* |
2 | * BIRD -- Forwarding Information Base -- Data Structures | |
3 | * | |
6998bb9e | 4 | * (c) 1998--2000 Martin Mares <mj@ucw.cz> |
62aa008a MM |
5 | * |
6 | * Can be freely distributed and used under the terms of the GNU GPL. | |
7 | */ | |
8 | ||
ce4aca09 MM |
9 | /** |
10 | * DOC: Forwarding Information Base | |
11 | * | |
12 | * FIB is a data structure designed for storage of routes indexed by their | |
13 | * network prefixes. It supports insertion, deletion, searching by prefix, | |
14 | * `routing' (in CIDR sense, that is searching for a longest prefix matching | |
15 | * a given IP address) and (which makes the structure very tricky to implement) | |
16 | * asynchronous reading, that is enumerating the contents of a FIB while other | |
17 | * modules add, modify or remove entries. | |
18 | * | |
19 | * Internally, each FIB is represented as a collection of nodes of type &fib_node | |
20 | * indexed using a sophisticated hashing mechanism. | |
21 | * We use two-stage hashing where we calculate a 16-bit primary hash key independent | |
22 | * on hash table size and then we just divide the primary keys modulo table size | |
23 | * to get a real hash key used for determining the bucket containing the node. | |
58f7d004 | 24 | * The lists of nodes in each bucket are sorted according to the primary hash |
ce4aca09 MM |
25 | * key, hence if we keep the total number of buckets to be a power of two, |
26 | * re-hashing of the structure keeps the relative order of the nodes. | |
27 | * | |
28 | * To get the asynchronous reading consistent over node deletions, we need to | |
29 | * keep a list of readers for each node. When a node gets deleted, its readers | |
30 | * are automatically moved to the next node in the table. | |
31 | * | |
32 | * Basic FIB operations are performed by functions defined by this module, | |
33 | * enumerating of FIB contents is accomplished by using the FIB_WALK() macro | |
34 | * or FIB_ITERATE_START() if you want to do it asynchronously. | |
35 | */ | |
36 | ||
6b9fa320 | 37 | #undef LOCAL_DEBUG |
62aa008a | 38 | |
62aa008a MM |
39 | #include "nest/bird.h" |
40 | #include "nest/route.h" | |
221135d6 | 41 | #include "lib/string.h" |
62aa008a | 42 | |
3ab001b9 MM |
43 | #define HASH_DEF_ORDER 10 |
44 | #define HASH_HI_MARK *4 | |
45 | #define HASH_HI_STEP 2 | |
46 | #define HASH_HI_MAX 16 /* Must be at most 16 */ | |
47 | #define HASH_LO_MARK /5 | |
48 | #define HASH_LO_STEP 2 | |
49 | #define HASH_LO_MIN 10 | |
62aa008a MM |
50 | |
51 | static void | |
52 | fib_ht_alloc(struct fib *f) | |
53 | { | |
3ab001b9 MM |
54 | f->hash_size = 1 << f->hash_order; |
55 | f->hash_shift = 16 - f->hash_order; | |
56 | if (f->hash_order > HASH_HI_MAX - HASH_HI_STEP) | |
57 | f->entries_max = ~0; | |
58 | else | |
59 | f->entries_max = f->hash_size HASH_HI_MARK; | |
60 | if (f->hash_order < HASH_LO_MIN + HASH_LO_STEP) | |
62aa008a | 61 | f->entries_min = 0; |
3ab001b9 MM |
62 | else |
63 | f->entries_min = f->hash_size HASH_LO_MARK; | |
64 | DBG("Allocating FIB hash of order %d: %d entries, %d low, %d high\n", | |
65 | f->hash_order, f->hash_size, f->entries_min, f->entries_max); | |
62aa008a | 66 | f->hash_table = mb_alloc(f->fib_pool, f->hash_size * sizeof(struct fib_node *)); |
62aa008a MM |
67 | } |
68 | ||
69 | static inline void | |
70 | fib_ht_free(struct fib_node **h) | |
71 | { | |
72 | mb_free(h); | |
73 | } | |
74 | ||
75 | static inline unsigned | |
76 | fib_hash(struct fib *f, ip_addr *a) | |
77 | { | |
3ab001b9 | 78 | return ipa_hash(*a) >> f->hash_shift; |
62aa008a MM |
79 | } |
80 | ||
1d7c44b7 | 81 | static void |
7c103b1e | 82 | fib_dummy_init(struct fib_node *dummy UNUSED) |
ce45fc12 PM |
83 | { |
84 | } | |
85 | ||
ce4aca09 MM |
86 | /** |
87 | * fib_init - initialize a new FIB | |
88 | * @f: the FIB to be initialized (the structure itself being allocated by the caller) | |
89 | * @p: pool to allocate the nodes in | |
90 | * @node_size: node size to be used (each node consists of a standard header &fib_node | |
91 | * followed by user data) | |
92 | * @hash_order: initial hash order (a binary logarithm of hash table size), 0 to use default order | |
93 | * (recommended) | |
94 | * @init: pointer a function to be called to initialize a newly created node | |
95 | * | |
96 | * This function initializes a newly allocated FIB and prepares it for use. | |
97 | */ | |
62aa008a | 98 | void |
ce4aca09 | 99 | fib_init(struct fib *f, pool *p, unsigned node_size, unsigned hash_order, fib_init_func init) |
62aa008a | 100 | { |
3ab001b9 MM |
101 | if (!hash_order) |
102 | hash_order = HASH_DEF_ORDER; | |
62aa008a MM |
103 | f->fib_pool = p; |
104 | f->fib_slab = sl_new(p, node_size); | |
3ab001b9 | 105 | f->hash_order = hash_order; |
62aa008a | 106 | fib_ht_alloc(f); |
3ab001b9 | 107 | bzero(f->hash_table, f->hash_size * sizeof(struct fib_node *)); |
62aa008a MM |
108 | f->entries = 0; |
109 | f->entries_min = 0; | |
ce45fc12 | 110 | f->init = init ? : fib_dummy_init; |
62aa008a MM |
111 | } |
112 | ||
113 | static void | |
3ab001b9 | 114 | fib_rehash(struct fib *f, int step) |
62aa008a | 115 | { |
3ab001b9 | 116 | unsigned old, new, oldn, newn, ni, nh; |
62aa008a MM |
117 | struct fib_node **n, *e, *x, **t, **m, **h; |
118 | ||
3ab001b9 MM |
119 | old = f->hash_order; |
120 | oldn = f->hash_size; | |
121 | new = old + step; | |
62aa008a | 122 | m = h = f->hash_table; |
3ab001b9 MM |
123 | DBG("Re-hashing FIB from order %d to %d\n", old, new); |
124 | f->hash_order = new; | |
62aa008a | 125 | fib_ht_alloc(f); |
3ab001b9 MM |
126 | t = n = f->hash_table; |
127 | newn = f->hash_size; | |
128 | ni = 0; | |
129 | ||
6998bb9e | 130 | while (oldn--) |
62aa008a MM |
131 | { |
132 | x = *h++; | |
133 | while (e = x) | |
134 | { | |
135 | x = e->next; | |
3ab001b9 MM |
136 | nh = fib_hash(f, &e->prefix); |
137 | while (nh > ni) | |
138 | { | |
139 | *t = NULL; | |
140 | ni++; | |
141 | t = ++n; | |
142 | } | |
62aa008a | 143 | *t = e; |
3ab001b9 | 144 | t = &e->next; |
62aa008a MM |
145 | } |
146 | } | |
3ab001b9 MM |
147 | while (ni < newn) |
148 | { | |
149 | *t = NULL; | |
150 | ni++; | |
151 | t = ++n; | |
152 | } | |
62aa008a MM |
153 | fib_ht_free(m); |
154 | } | |
155 | ||
ce4aca09 MM |
156 | /** |
157 | * fib_find - search for FIB node by prefix | |
158 | * @f: FIB to search in | |
159 | * @a: pointer to IP address of the prefix | |
160 | * @len: prefix length | |
161 | * | |
162 | * Search for a FIB node corresponding to the given prefix, return | |
163 | * a pointer to it or %NULL if no such node exists. | |
164 | */ | |
62aa008a MM |
165 | void * |
166 | fib_find(struct fib *f, ip_addr *a, int len) | |
167 | { | |
168 | struct fib_node *e = f->hash_table[fib_hash(f, a)]; | |
169 | ||
170 | while (e && (e->pxlen != len || !ipa_equal(*a, e->prefix))) | |
171 | e = e->next; | |
172 | return e; | |
173 | } | |
174 | ||
d82fc18d OZ |
175 | /* |
176 | int | |
177 | fib_histogram(struct fib *f) | |
178 | { | |
179 | log(L_WARN "Histogram dump start %d %d", f->hash_size, f->entries); | |
180 | ||
181 | int i, j; | |
182 | struct fib_node *e; | |
183 | ||
184 | for (i = 0; i < f->hash_size; i++) | |
185 | { | |
186 | j = 0; | |
187 | for (e = f->hash_table[i]; e != NULL; e = e->next) | |
188 | j++; | |
189 | if (j > 0) | |
190 | log(L_WARN "Histogram line %d: %d", i, j); | |
191 | } | |
192 | ||
193 | log(L_WARN "Histogram dump end"); | |
194 | } | |
195 | */ | |
196 | ||
ce4aca09 MM |
197 | /** |
198 | * fib_get - find or create a FIB node | |
199 | * @f: FIB to work with | |
200 | * @a: pointer to IP address of the prefix | |
201 | * @len: prefix length | |
202 | * | |
203 | * Search for a FIB node corresponding to the given prefix and | |
204 | * return a pointer to it. If no such node exists, create it. | |
205 | */ | |
62aa008a MM |
206 | void * |
207 | fib_get(struct fib *f, ip_addr *a, int len) | |
208 | { | |
3ab001b9 MM |
209 | unsigned int h = ipa_hash(*a); |
210 | struct fib_node **ee = f->hash_table + (h >> f->hash_shift); | |
211 | struct fib_node *g, *e = *ee; | |
d82fc18d | 212 | u32 uid = h << 16; |
62aa008a MM |
213 | |
214 | while (e && (e->pxlen != len || !ipa_equal(*a, e->prefix))) | |
215 | e = e->next; | |
216 | if (e) | |
217 | return e; | |
0cf86f0f | 218 | #ifdef DEBUGGING |
62aa008a | 219 | if (len < 0 || len > BITS_PER_IP_ADDRESS || !ip_is_prefix(*a,len)) |
08c69a77 | 220 | bug("fib_get() called for invalid address"); |
62aa008a | 221 | #endif |
d82fc18d OZ |
222 | |
223 | while ((g = *ee) && g->uid < uid) | |
224 | ee = &g->next; | |
225 | while ((g = *ee) && g->uid == uid) | |
226 | { | |
227 | ee = &g->next; | |
228 | uid++; | |
229 | } | |
230 | ||
231 | if ((uid >> 16) != h) | |
232 | log(L_ERR "FIB hash table chains are too long"); | |
233 | ||
234 | // log (L_WARN "FIB_GET %I %x %x", *a, h, uid); | |
235 | ||
62aa008a MM |
236 | e = sl_alloc(f->fib_slab); |
237 | e->prefix = *a; | |
238 | e->pxlen = len; | |
62aa008a | 239 | e->next = *ee; |
d82fc18d | 240 | e->uid = uid; |
62aa008a | 241 | *ee = e; |
3ab001b9 | 242 | e->readers = NULL; |
62aa008a MM |
243 | f->init(e); |
244 | if (f->entries++ > f->entries_max) | |
3ab001b9 | 245 | fib_rehash(f, HASH_HI_STEP); |
d82fc18d | 246 | |
62aa008a MM |
247 | return e; |
248 | } | |
249 | ||
ce4aca09 MM |
250 | /** |
251 | * fib_route - CIDR routing lookup | |
252 | * @f: FIB to search in | |
253 | * @a: pointer to IP address of the prefix | |
254 | * @len: prefix length | |
255 | * | |
256 | * Search for a FIB node with longest prefix matching the given | |
257 | * network, that is a node which a CIDR router would use for routing | |
258 | * that network. | |
259 | */ | |
56d6c530 MM |
260 | void * |
261 | fib_route(struct fib *f, ip_addr a, int len) | |
262 | { | |
263 | ip_addr a0; | |
264 | void *t; | |
265 | ||
266 | while (len >= 0) | |
267 | { | |
268 | a0 = ipa_and(a, ipa_mkmask(len)); | |
269 | t = fib_find(f, &a0, len); | |
270 | if (t) | |
271 | return t; | |
272 | len--; | |
273 | } | |
274 | return NULL; | |
275 | } | |
276 | ||
3ab001b9 MM |
277 | static inline void |
278 | fib_merge_readers(struct fib_iterator *i, struct fib_node *to) | |
279 | { | |
280 | if (to) | |
281 | { | |
282 | struct fib_iterator *j = to->readers; | |
283 | if (!j) | |
284 | { | |
285 | /* Fast path */ | |
286 | to->readers = i; | |
287 | i->prev = (struct fib_iterator *) to; | |
3ab001b9 | 288 | } |
8abbde02 MM |
289 | else |
290 | { | |
291 | /* Really merging */ | |
292 | while (j->next) | |
293 | j = j->next; | |
294 | j->next = i; | |
295 | i->prev = j; | |
296 | } | |
297 | while (i && i->node) | |
298 | { | |
299 | i->node = NULL; | |
300 | i = i->next; | |
301 | } | |
3ab001b9 MM |
302 | } |
303 | else /* No more nodes */ | |
304 | while (i) | |
305 | { | |
306 | i->prev = NULL; | |
307 | i = i->next; | |
308 | } | |
309 | } | |
310 | ||
ce4aca09 MM |
311 | /** |
312 | * fib_delete - delete a FIB node | |
313 | * @f: FIB to delete from | |
314 | * @E: entry to delete | |
315 | * | |
316 | * This function removes the given entry from the FIB, | |
317 | * taking care of all the asynchronous readers by shifting | |
318 | * them to the next node in the canonical reading order. | |
319 | */ | |
62aa008a MM |
320 | void |
321 | fib_delete(struct fib *f, void *E) | |
322 | { | |
323 | struct fib_node *e = E; | |
3ab001b9 MM |
324 | unsigned int h = fib_hash(f, &e->prefix); |
325 | struct fib_node **ee = f->hash_table + h; | |
326 | struct fib_iterator *it; | |
62aa008a MM |
327 | |
328 | while (*ee) | |
329 | { | |
330 | if (*ee == e) | |
331 | { | |
332 | *ee = e->next; | |
3ab001b9 MM |
333 | if (it = e->readers) |
334 | { | |
335 | struct fib_node *l = e->next; | |
336 | while (!l) | |
337 | { | |
338 | h++; | |
339 | if (h >= f->hash_size) | |
340 | break; | |
341 | else | |
342 | l = f->hash_table[h]; | |
343 | } | |
344 | fib_merge_readers(it, l); | |
345 | } | |
62aa008a MM |
346 | sl_free(f->fib_slab, e); |
347 | if (f->entries-- < f->entries_min) | |
3ab001b9 | 348 | fib_rehash(f, -HASH_LO_STEP); |
62aa008a MM |
349 | return; |
350 | } | |
351 | ee = &((*ee)->next); | |
352 | } | |
08c69a77 | 353 | bug("fib_delete() called for invalid node"); |
62aa008a MM |
354 | } |
355 | ||
ce4aca09 MM |
356 | /** |
357 | * fib_free - delete a FIB | |
358 | * @f: FIB to be deleted | |
359 | * | |
360 | * This function deletes a FIB -- it frees all memory associated | |
361 | * with it and all its entries. | |
362 | */ | |
62aa008a MM |
363 | void |
364 | fib_free(struct fib *f) | |
365 | { | |
366 | fib_ht_free(f->hash_table); | |
367 | rfree(f->fib_slab); | |
368 | } | |
3ab001b9 MM |
369 | |
370 | void | |
371 | fit_init(struct fib_iterator *i, struct fib *f) | |
372 | { | |
373 | unsigned h; | |
374 | struct fib_node *n; | |
375 | ||
376 | i->efef = 0xff; | |
377 | for(h=0; h<f->hash_size; h++) | |
378 | if (n = f->hash_table[h]) | |
379 | { | |
380 | i->prev = (struct fib_iterator *) n; | |
381 | if (i->next = n->readers) | |
382 | i->next->prev = i; | |
383 | n->readers = i; | |
384 | i->node = n; | |
385 | return; | |
386 | } | |
387 | /* The fib is empty, nothing to do */ | |
388 | i->prev = i->next = NULL; | |
389 | i->node = NULL; | |
390 | } | |
391 | ||
392 | struct fib_node * | |
393 | fit_get(struct fib *f, struct fib_iterator *i) | |
394 | { | |
395 | struct fib_node *n; | |
396 | struct fib_iterator *j, *k; | |
397 | ||
398 | if (!i->prev) | |
399 | { | |
400 | /* We are at the end */ | |
8abbde02 | 401 | i->hash = ~0 - 1; |
3ab001b9 MM |
402 | return NULL; |
403 | } | |
404 | if (!(n = i->node)) | |
405 | { | |
406 | /* No node info available, we are a victim of merging. Try harder. */ | |
407 | j = i; | |
408 | while (j->efef == 0xff) | |
409 | j = j->prev; | |
410 | n = (struct fib_node *) j; | |
411 | } | |
412 | j = i->prev; | |
413 | if (k = i->next) | |
414 | k->prev = j; | |
415 | j->next = k; | |
416 | i->hash = fib_hash(f, &n->prefix); | |
417 | return n; | |
418 | } | |
419 | ||
420 | void | |
421 | fit_put(struct fib_iterator *i, struct fib_node *n) | |
422 | { | |
423 | struct fib_iterator *j; | |
424 | ||
425 | i->node = n; | |
426 | if (j = n->readers) | |
427 | j->prev = i; | |
428 | i->next = j; | |
429 | n->readers = i; | |
430 | i->prev = (struct fib_iterator *) n; | |
431 | } | |
432 | ||
433 | #ifdef DEBUGGING | |
434 | ||
ce4aca09 MM |
435 | /** |
436 | * fib_check - audit a FIB | |
437 | * @f: FIB to be checked | |
438 | * | |
439 | * This debugging function audits a FIB by checking its internal consistency. | |
58f7d004 | 440 | * Use when you suspect somebody of corrupting innocent data structures. |
ce4aca09 | 441 | */ |
3ab001b9 MM |
442 | void |
443 | fib_check(struct fib *f) | |
444 | { | |
445 | unsigned int i, ec, lo, nulls; | |
446 | ||
447 | ec = 0; | |
448 | for(i=0; i<f->hash_size; i++) | |
449 | { | |
450 | struct fib_node *n; | |
451 | lo = 0; | |
452 | for(n=f->hash_table[i]; n; n=n->next) | |
453 | { | |
454 | struct fib_iterator *j, *j0; | |
455 | unsigned int h0 = ipa_hash(n->prefix); | |
456 | if (h0 < lo) | |
08c69a77 | 457 | bug("fib_check: discord in hash chains"); |
3ab001b9 MM |
458 | lo = h0; |
459 | if ((h0 >> f->hash_shift) != i) | |
08c69a77 | 460 | bug("fib_check: mishashed %x->%x (order %d)", h0, i, f->hash_order); |
3ab001b9 MM |
461 | j0 = (struct fib_iterator *) n; |
462 | nulls = 0; | |
463 | for(j=n->readers; j; j=j->next) | |
464 | { | |
465 | if (j->prev != j0) | |
08c69a77 | 466 | bug("fib_check: iterator->prev mismatch"); |
3ab001b9 MM |
467 | j0 = j; |
468 | if (!j->node) | |
469 | nulls++; | |
470 | else if (nulls) | |
08c69a77 | 471 | bug("fib_check: iterator nullified"); |
3ab001b9 | 472 | else if (j->node != n) |
08c69a77 | 473 | bug("fib_check: iterator->node mismatch"); |
3ab001b9 MM |
474 | } |
475 | ec++; | |
476 | } | |
477 | } | |
478 | if (ec != f->entries) | |
08c69a77 | 479 | bug("fib_check: invalid entry count (%d != %d)", ec, f->entries); |
3ab001b9 MM |
480 | } |
481 | ||
482 | #endif | |
483 | ||
484 | #ifdef TEST | |
485 | ||
486 | #include "lib/resource.h" | |
487 | ||
488 | struct fib f; | |
489 | ||
490 | void dump(char *m) | |
491 | { | |
492 | unsigned int i; | |
493 | ||
494 | debug("%s ... order=%d, size=%d, entries=%d\n", m, f.hash_order, f.hash_size, f.hash_size); | |
495 | for(i=0; i<f.hash_size; i++) | |
496 | { | |
497 | struct fib_node *n; | |
498 | struct fib_iterator *j; | |
499 | for(n=f.hash_table[i]; n; n=n->next) | |
500 | { | |
501 | debug("%04x %04x %p %I/%2d", i, ipa_hash(n->prefix), n, n->prefix, n->pxlen); | |
502 | for(j=n->readers; j; j=j->next) | |
503 | debug(" %p[%p]", j, j->node); | |
504 | debug("\n"); | |
505 | } | |
506 | } | |
507 | fib_check(&f); | |
508 | debug("-----\n"); | |
509 | } | |
510 | ||
511 | void init(struct fib_node *n) | |
512 | { | |
513 | } | |
514 | ||
515 | int main(void) | |
516 | { | |
517 | struct fib_node *n; | |
518 | struct fib_iterator i, j; | |
519 | ip_addr a; | |
520 | int c; | |
521 | ||
522 | log_init_debug(NULL); | |
523 | resource_init(); | |
524 | fib_init(&f, &root_pool, sizeof(struct fib_node), 4, init); | |
525 | dump("init"); | |
526 | ||
527 | a = ipa_from_u32(0x01020304); n = fib_get(&f, &a, 32); | |
528 | a = ipa_from_u32(0x02030405); n = fib_get(&f, &a, 32); | |
529 | a = ipa_from_u32(0x03040506); n = fib_get(&f, &a, 32); | |
530 | a = ipa_from_u32(0x00000000); n = fib_get(&f, &a, 32); | |
531 | a = ipa_from_u32(0x00000c01); n = fib_get(&f, &a, 32); | |
532 | a = ipa_from_u32(0xffffffff); n = fib_get(&f, &a, 32); | |
533 | dump("fill"); | |
534 | ||
535 | fit_init(&i, &f); | |
536 | dump("iter init"); | |
537 | ||
538 | fib_rehash(&f, 1); | |
539 | dump("rehash up"); | |
540 | ||
541 | fib_rehash(&f, -1); | |
542 | dump("rehash down"); | |
543 | ||
544 | next: | |
545 | c = 0; | |
546 | FIB_ITERATE_START(&f, &i, z) | |
547 | { | |
548 | if (c) | |
549 | { | |
550 | FIB_ITERATE_PUT(&i, z); | |
551 | dump("iter"); | |
552 | goto next; | |
553 | } | |
554 | c = 1; | |
555 | debug("got %p\n", z); | |
556 | } | |
6264aad1 | 557 | FIB_ITERATE_END(z); |
3ab001b9 MM |
558 | dump("iter end"); |
559 | ||
560 | fit_init(&i, &f); | |
561 | fit_init(&j, &f); | |
562 | dump("iter init 2"); | |
563 | ||
564 | n = fit_get(&f, &i); | |
565 | dump("iter step 2"); | |
566 | ||
567 | fit_put(&i, n->next); | |
568 | dump("iter step 3"); | |
569 | ||
570 | a = ipa_from_u32(0xffffffff); n = fib_get(&f, &a, 32); | |
571 | fib_delete(&f, n); | |
572 | dump("iter step 3"); | |
573 | ||
574 | return 0; | |
575 | } | |
576 | ||
577 | #endif |