]>
Commit | Line | Data |
---|---|---|
959ef981 | 1 | // SPDX-License-Identifier: GPL-2.0 |
379397bf BN |
2 | /* |
3 | * Copyright (c) 2007, Silicon Graphics, Inc. Barry Naujok <bnaujok@sgi.com> | |
4 | * All Rights Reserved. | |
379397bf BN |
5 | */ |
6 | ||
6b803e5a | 7 | #include "libxfs.h" |
379397bf BN |
8 | #include "btree.h" |
9 | ||
10 | /* | |
11 | * Maximum number of keys per node. Must be greater than 2 for the code | |
12 | * to work. | |
13 | */ | |
14 | #define BTREE_KEY_MAX 7 | |
15 | #define BTREE_KEY_MIN (BTREE_KEY_MAX / 2) | |
16 | ||
17 | #define BTREE_PTR_MAX (BTREE_KEY_MAX + 1) | |
18 | ||
19 | struct btree_node { | |
20 | unsigned long num_keys; | |
21 | unsigned long keys[BTREE_KEY_MAX]; | |
22 | struct btree_node *ptrs[BTREE_PTR_MAX]; | |
23 | }; | |
24 | ||
25 | struct btree_cursor { | |
26 | struct btree_node *node; | |
27 | int index; | |
28 | }; | |
29 | ||
30 | struct btree_root { | |
31 | struct btree_node *root_node; | |
32 | struct btree_cursor *cursor; /* track path to end leaf */ | |
33 | int height; | |
34 | /* lookup cache */ | |
35 | int keys_valid; /* set if the cache is valid */ | |
36 | unsigned long cur_key; | |
37 | unsigned long next_key; | |
38 | void *next_value; | |
39 | unsigned long prev_key; | |
40 | void *prev_value; | |
41 | #ifdef BTREE_STATS | |
42 | struct btree_stats { | |
43 | unsigned long num_items; | |
44 | unsigned long max_items; | |
45 | int alloced; | |
46 | int cache_hits; | |
47 | int cache_misses; | |
48 | int lookup; | |
49 | int find; | |
50 | int key_update; | |
51 | int value_update; | |
52 | int insert; | |
53 | int delete; | |
54 | int inc_height; | |
55 | int dec_height; | |
56 | int shift_prev; | |
57 | int shift_next; | |
58 | int split; | |
59 | int merge_prev; | |
60 | int merge_next; | |
61 | int balance_prev; | |
62 | int balance_next; | |
63 | } stats; | |
64 | #endif | |
65 | }; | |
66 | ||
67 | ||
68 | static struct btree_node * | |
69 | btree_node_alloc(void) | |
70 | { | |
71 | return calloc(1, sizeof(struct btree_node)); | |
72 | } | |
73 | ||
74 | static void | |
75 | btree_node_free( | |
76 | struct btree_node *node) | |
77 | { | |
78 | free(node); | |
79 | } | |
80 | ||
81 | static void | |
82 | btree_free_nodes( | |
83 | struct btree_node *node, | |
84 | int level) | |
85 | { | |
86 | int i; | |
87 | ||
88 | if (level) | |
89 | for (i = 0; i <= node->num_keys; i++) | |
90 | btree_free_nodes(node->ptrs[i], level - 1); | |
91 | btree_node_free(node); | |
92 | } | |
93 | ||
94 | static void | |
95 | __btree_init( | |
96 | struct btree_root *root) | |
97 | { | |
98 | memset(root, 0, sizeof(struct btree_root)); | |
99 | root->height = 1; | |
100 | root->cursor = calloc(1, sizeof(struct btree_cursor)); | |
101 | root->root_node = btree_node_alloc(); | |
102 | ASSERT(root->root_node); | |
103 | #ifdef BTREE_STATS | |
104 | root->stats.max_items = 1; | |
105 | root->stats.alloced += 1; | |
106 | #endif | |
107 | } | |
108 | ||
109 | static void | |
110 | __btree_free( | |
111 | struct btree_root *root) | |
112 | { | |
113 | btree_free_nodes(root->root_node, root->height - 1); | |
114 | free(root->cursor); | |
115 | root->height = 0; | |
116 | root->cursor = NULL; | |
117 | root->root_node = NULL; | |
118 | } | |
119 | ||
120 | void | |
121 | btree_init( | |
122 | struct btree_root **root) | |
123 | { | |
124 | *root = calloc(1, sizeof(struct btree_root)); | |
125 | __btree_init(*root); | |
126 | } | |
127 | ||
128 | void | |
129 | btree_clear( | |
130 | struct btree_root *root) | |
131 | { | |
132 | __btree_free(root); | |
133 | __btree_init(root); | |
134 | } | |
135 | ||
136 | void | |
137 | btree_destroy( | |
138 | struct btree_root *root) | |
139 | { | |
140 | __btree_free(root); | |
141 | free(root); | |
142 | } | |
143 | ||
144 | int | |
145 | btree_is_empty( | |
146 | struct btree_root *root) | |
147 | { | |
148 | return root->root_node->num_keys == 0; | |
149 | } | |
150 | ||
151 | static inline void | |
152 | btree_invalidate_cursor( | |
153 | struct btree_root *root) | |
154 | { | |
155 | root->cursor[0].node = NULL; | |
156 | root->keys_valid = 0; | |
157 | } | |
158 | ||
159 | static inline unsigned long | |
160 | btree_key_of_cursor( | |
161 | struct btree_cursor *cursor, | |
162 | int height) | |
163 | { | |
164 | while (cursor->node->num_keys == cursor->index && --height > 0) | |
165 | cursor++; | |
166 | return cursor->node->keys[cursor->index]; | |
167 | } | |
168 | ||
169 | static void * | |
170 | btree_get_prev( | |
171 | struct btree_root *root, | |
172 | unsigned long *key) | |
173 | { | |
174 | struct btree_cursor *cur = root->cursor; | |
175 | int level = 0; | |
176 | struct btree_node *node; | |
177 | ||
178 | if (cur->index > 0) { | |
179 | if (key) | |
180 | *key = cur->node->keys[cur->index - 1]; | |
181 | return cur->node->ptrs[cur->index - 1]; | |
182 | } | |
183 | ||
184 | /* else need to go up and back down the tree to find the previous */ | |
98566312 DC |
185 | do { |
186 | if (cur->index) | |
187 | break; | |
379397bf | 188 | cur++; |
98566312 DC |
189 | } while (++level < root->height); |
190 | ||
191 | if (level == root->height) | |
192 | return NULL; | |
379397bf BN |
193 | |
194 | /* the key is in the current level */ | |
195 | if (key) | |
196 | *key = cur->node->keys[cur->index - 1]; | |
197 | ||
198 | /* descend back down the right side to get the pointer */ | |
199 | node = cur->node->ptrs[cur->index - 1]; | |
200 | while (level--) | |
201 | node = node->ptrs[node->num_keys]; | |
202 | return node; | |
203 | } | |
204 | ||
205 | static void * | |
206 | btree_get_next( | |
207 | struct btree_root *root, | |
208 | unsigned long *key) | |
209 | { | |
210 | struct btree_cursor *cur = root->cursor; | |
211 | int level = 0; | |
212 | struct btree_node *node; | |
213 | ||
214 | while (cur->index == cur->node->num_keys) { | |
215 | if (++level == root->height) | |
216 | return NULL; | |
217 | cur++; | |
218 | } | |
219 | if (level == 0) { | |
220 | if (key) { | |
221 | cur->index++; | |
222 | *key = btree_key_of_cursor(cur, root->height); | |
223 | cur->index--; | |
224 | } | |
225 | return cur->node->ptrs[cur->index + 1]; | |
226 | } | |
227 | ||
228 | node = cur->node->ptrs[cur->index + 1]; | |
229 | while (--level > 0) | |
230 | node = node->ptrs[0]; | |
231 | if (key) | |
232 | *key = node->keys[0]; | |
233 | return node->ptrs[0]; | |
234 | } | |
235 | ||
236 | /* | |
237 | * Lookup/Search functions | |
238 | */ | |
239 | ||
240 | static int | |
241 | btree_do_search( | |
242 | struct btree_root *root, | |
243 | unsigned long key) | |
244 | { | |
245 | unsigned long k = 0; | |
246 | struct btree_cursor *cur = root->cursor + root->height; | |
247 | struct btree_node *node = root->root_node; | |
248 | int height = root->height; | |
249 | int key_found = 0; | |
250 | int i; | |
251 | ||
252 | while (--height >= 0) { | |
253 | cur--; | |
254 | for (i = 0; i < node->num_keys; i++) | |
255 | if (node->keys[i] >= key) { | |
256 | k = node->keys[i]; | |
257 | key_found = 1; | |
258 | break; | |
259 | } | |
260 | cur->node = node; | |
261 | cur->index = i; | |
262 | node = node->ptrs[i]; | |
263 | } | |
264 | root->keys_valid = key_found; | |
265 | if (!key_found) | |
266 | return 0; | |
267 | ||
268 | root->cur_key = k; | |
269 | root->next_value = NULL; /* do on-demand next value lookup */ | |
270 | root->prev_value = btree_get_prev(root, &root->prev_key); | |
271 | return 1; | |
272 | } | |
273 | ||
274 | static int | |
275 | btree_search( | |
276 | struct btree_root *root, | |
277 | unsigned long key) | |
278 | { | |
279 | if (root->keys_valid && key <= root->cur_key && | |
280 | (!root->prev_value || key > root->prev_key)) { | |
281 | #ifdef BTREE_STATS | |
282 | root->stats.cache_hits++; | |
283 | #endif | |
284 | return 1; | |
285 | } | |
286 | #ifdef BTREE_STATS | |
287 | root->stats.cache_misses++; | |
288 | #endif | |
289 | return btree_do_search(root, key); | |
290 | } | |
291 | ||
292 | void * | |
293 | btree_find( | |
294 | struct btree_root *root, | |
295 | unsigned long key, | |
296 | unsigned long *actual_key) | |
297 | { | |
298 | #ifdef BTREE_STATS | |
299 | root->stats.find += 1; | |
300 | #endif | |
301 | if (!btree_search(root, key)) | |
302 | return NULL; | |
303 | ||
304 | if (actual_key) | |
305 | *actual_key = root->cur_key; | |
306 | return root->cursor->node->ptrs[root->cursor->index]; | |
307 | } | |
308 | ||
309 | void * | |
310 | btree_lookup( | |
311 | struct btree_root *root, | |
312 | unsigned long key) | |
313 | { | |
314 | #ifdef BTREE_STATS | |
315 | root->stats.lookup += 1; | |
316 | #endif | |
317 | if (!btree_search(root, key) || root->cur_key != key) | |
318 | return NULL; | |
319 | return root->cursor->node->ptrs[root->cursor->index]; | |
320 | } | |
321 | ||
322 | void * | |
323 | btree_peek_prev( | |
324 | struct btree_root *root, | |
325 | unsigned long *key) | |
326 | { | |
327 | if (!root->keys_valid) | |
328 | return NULL; | |
329 | if (key) | |
330 | *key = root->prev_key; | |
331 | return root->prev_value; | |
332 | } | |
333 | ||
334 | void * | |
335 | btree_peek_next( | |
336 | struct btree_root *root, | |
337 | unsigned long *key) | |
338 | { | |
339 | if (!root->keys_valid) | |
340 | return NULL; | |
341 | if (!root->next_value) | |
342 | root->next_value = btree_get_next(root, &root->next_key); | |
343 | if (key) | |
344 | *key = root->next_key; | |
345 | return root->next_value; | |
346 | } | |
347 | ||
348 | static void * | |
349 | btree_move_cursor_to_next( | |
350 | struct btree_root *root, | |
351 | unsigned long *key) | |
352 | { | |
353 | struct btree_cursor *cur = root->cursor; | |
354 | int level = 0; | |
355 | ||
356 | while (cur->index == cur->node->num_keys) { | |
357 | if (++level == root->height) | |
358 | return NULL; | |
359 | cur++; | |
360 | } | |
361 | cur->index++; | |
362 | if (level == 0) { | |
363 | if (key) | |
364 | *key = btree_key_of_cursor(cur, root->height); | |
365 | return cur->node->ptrs[cur->index]; | |
366 | } | |
367 | ||
368 | while (--level >= 0) { | |
369 | root->cursor[level].node = cur->node->ptrs[cur->index]; | |
370 | root->cursor[level].index = 0; | |
371 | cur--; | |
372 | } | |
373 | if (key) | |
374 | *key = cur->node->keys[0]; | |
375 | return cur->node->ptrs[0]; | |
376 | } | |
377 | ||
378 | void * | |
379 | btree_lookup_next( | |
380 | struct btree_root *root, | |
381 | unsigned long *key) | |
382 | { | |
383 | void *value; | |
384 | ||
385 | if (!root->keys_valid) | |
386 | return NULL; | |
387 | ||
388 | root->prev_key = root->cur_key; | |
389 | root->prev_value = root->cursor->node->ptrs[root->cursor->index]; | |
390 | ||
391 | value = btree_move_cursor_to_next(root, &root->cur_key); | |
392 | if (!value) { | |
393 | btree_invalidate_cursor(root); | |
394 | return NULL; | |
395 | } | |
396 | root->next_value = NULL; /* on-demand next value fetch */ | |
397 | if (key) | |
398 | *key = root->cur_key; | |
399 | return value; | |
400 | } | |
401 | ||
402 | static void * | |
403 | btree_move_cursor_to_prev( | |
404 | struct btree_root *root, | |
405 | unsigned long *key) | |
406 | { | |
407 | struct btree_cursor *cur = root->cursor; | |
408 | int level = 0; | |
409 | ||
410 | while (cur->index == 0) { | |
411 | if (++level == root->height) | |
412 | return NULL; | |
413 | cur++; | |
414 | } | |
415 | cur->index--; | |
416 | if (key) /* the key is in the current level */ | |
417 | *key = cur->node->keys[cur->index]; | |
418 | while (level > 0) { | |
419 | level--; | |
420 | root->cursor[level].node = cur->node->ptrs[cur->index]; | |
421 | root->cursor[level].index = root->cursor[level].node->num_keys; | |
422 | cur--; | |
423 | } | |
424 | return cur->node->ptrs[cur->index]; | |
425 | } | |
426 | ||
427 | void * | |
428 | btree_lookup_prev( | |
429 | struct btree_root *root, | |
430 | unsigned long *key) | |
431 | { | |
432 | void *value; | |
433 | ||
434 | if (!root->keys_valid) | |
435 | return NULL; | |
436 | ||
437 | value = btree_move_cursor_to_prev(root, &root->cur_key); | |
438 | if (!value) | |
439 | return NULL; | |
440 | root->prev_value = btree_get_prev(root, &root->prev_key); | |
441 | root->next_value = NULL; /* on-demand next value fetch */ | |
442 | if (key) | |
443 | *key = root->cur_key; | |
444 | return value; | |
445 | } | |
446 | ||
447 | void * | |
448 | btree_uncached_lookup( | |
449 | struct btree_root *root, | |
450 | unsigned long key) | |
451 | { | |
452 | /* cursor-less (ie. uncached) lookup */ | |
453 | int height = root->height - 1; | |
454 | struct btree_node *node = root->root_node; | |
455 | int i; | |
456 | int key_found = 0; | |
457 | ||
458 | while (height >= 0) { | |
459 | for (i = 0; i < node->num_keys; i++) | |
460 | if (node->keys[i] >= key) { | |
461 | key_found = node->keys[i] == key; | |
462 | break; | |
463 | } | |
464 | node = node->ptrs[i]; | |
465 | height--; | |
466 | } | |
467 | return key_found ? node : NULL; | |
468 | } | |
469 | ||
470 | /* Update functions */ | |
471 | ||
472 | static inline void | |
473 | btree_update_node_key( | |
474 | struct btree_root *root, | |
475 | struct btree_cursor *cursor, | |
476 | int level, | |
477 | unsigned long new_key) | |
478 | { | |
479 | int i; | |
480 | ||
481 | #ifdef BTREE_STATS | |
482 | root->stats.key_update += 1; | |
483 | #endif | |
484 | ||
485 | cursor += level; | |
486 | for (i = level; i < root->height; i++) { | |
487 | if (cursor->index < cursor->node->num_keys) { | |
488 | cursor->node->keys[cursor->index] = new_key; | |
489 | break; | |
490 | } | |
491 | cursor++; | |
492 | } | |
493 | } | |
494 | ||
495 | int | |
496 | btree_update_key( | |
497 | struct btree_root *root, | |
498 | unsigned long old_key, | |
499 | unsigned long new_key) | |
500 | { | |
501 | if (!btree_search(root, old_key) || root->cur_key != old_key) | |
502 | return ENOENT; | |
503 | ||
504 | if (root->next_value && new_key >= root->next_key) | |
505 | return EINVAL; | |
506 | ||
507 | if (root->prev_value && new_key <= root->prev_key) | |
508 | return EINVAL; | |
509 | ||
510 | btree_update_node_key(root, root->cursor, 0, new_key); | |
69fb0e22 | 511 | root->cur_key = new_key; |
379397bf BN |
512 | |
513 | return 0; | |
514 | } | |
515 | ||
516 | int | |
517 | btree_update_value( | |
518 | struct btree_root *root, | |
519 | unsigned long key, | |
520 | void *new_value) | |
521 | { | |
522 | if (!new_value) | |
523 | return EINVAL; | |
524 | ||
525 | if (!btree_search(root, key) || root->cur_key != key) | |
526 | return ENOENT; | |
527 | ||
528 | #ifdef BTREE_STATS | |
529 | root->stats.value_update += 1; | |
530 | #endif | |
531 | root->cursor->node->ptrs[root->cursor->index] = new_value; | |
532 | ||
533 | return 0; | |
534 | } | |
535 | ||
536 | /* | |
537 | * Cursor modification functions - used for inserting and deleting | |
538 | */ | |
539 | ||
540 | static struct btree_cursor * | |
541 | btree_copy_cursor_prev( | |
542 | struct btree_root *root, | |
543 | struct btree_cursor *dest_cursor, | |
544 | int level) | |
545 | { | |
546 | struct btree_cursor *src_cur = root->cursor + level; | |
547 | struct btree_cursor *dst_cur; | |
548 | int l = level; | |
549 | int i; | |
550 | ||
551 | if (level >= root->height) | |
552 | return NULL; | |
553 | ||
554 | while (src_cur->index == 0) { | |
555 | if (++l >= root->height) | |
556 | return NULL; | |
557 | src_cur++; | |
558 | } | |
559 | for (i = l; i < root->height; i++) | |
560 | dest_cursor[i] = *src_cur++; | |
561 | ||
562 | dst_cur = dest_cursor + l; | |
563 | dst_cur->index--; | |
564 | while (l-- >= level) { | |
565 | dest_cursor[l].node = dst_cur->node->ptrs[dst_cur->index]; | |
566 | dest_cursor[l].index = dest_cursor[l].node->num_keys; | |
567 | dst_cur--; | |
568 | } | |
569 | return dest_cursor; | |
570 | } | |
571 | ||
572 | static struct btree_cursor * | |
573 | btree_copy_cursor_next( | |
574 | struct btree_root *root, | |
575 | struct btree_cursor *dest_cursor, | |
576 | int level) | |
577 | { | |
578 | struct btree_cursor *src_cur = root->cursor + level; | |
579 | struct btree_cursor *dst_cur; | |
580 | int l = level; | |
581 | int i; | |
582 | ||
583 | if (level >= root->height) | |
584 | return NULL; | |
585 | ||
586 | while (src_cur->index == src_cur->node->num_keys) { | |
587 | if (++l >= root->height) | |
588 | return NULL; | |
589 | src_cur++; | |
590 | } | |
591 | for (i = l; i < root->height; i++) | |
592 | dest_cursor[i] = *src_cur++; | |
593 | ||
594 | dst_cur = dest_cursor + l; | |
595 | dst_cur->index++; | |
596 | while (l-- >= level) { | |
597 | dest_cursor[l].node = dst_cur->node->ptrs[dst_cur->index]; | |
598 | dest_cursor[l].index = 0; | |
599 | dst_cur--; | |
600 | } | |
601 | return dest_cursor; | |
602 | } | |
603 | ||
604 | /* | |
605 | * Shift functions | |
606 | * | |
607 | * Tries to move items in the current leaf to its sibling if it has space. | |
608 | * Used in both insert and delete functions. | |
609 | * Returns the number of items shifted. | |
610 | */ | |
611 | ||
612 | static int | |
613 | btree_shift_to_prev( | |
614 | struct btree_root *root, | |
615 | int level, | |
616 | struct btree_cursor *prev_cursor, | |
617 | int num_children) | |
618 | { | |
619 | struct btree_node *node; | |
620 | struct btree_node *prev_node; | |
621 | int num_remain; /* # of keys left in "node" */ | |
622 | unsigned long key; | |
623 | int i; | |
624 | ||
625 | if (!prev_cursor || !num_children) | |
626 | return 0; | |
627 | ||
628 | prev_node = prev_cursor[level].node; | |
629 | node = root->cursor[level].node; | |
630 | ||
631 | ASSERT(num_children > 0 && num_children <= node->num_keys + 1); | |
632 | ||
633 | if ((prev_node->num_keys + num_children) > BTREE_KEY_MAX) | |
634 | return 0; | |
635 | ||
636 | #ifdef BTREE_STATS | |
637 | root->stats.shift_prev += 1; | |
638 | #endif | |
639 | ||
640 | num_remain = node->num_keys - num_children; | |
641 | ASSERT(num_remain == -1 || num_remain >= BTREE_KEY_MIN); | |
642 | ||
643 | /* shift parent keys around */ | |
644 | level++; | |
645 | if (num_remain > 0) | |
646 | key = node->keys[num_children - 1]; | |
647 | else | |
648 | key = btree_key_of_cursor(root->cursor + level, | |
649 | root->height - level); | |
650 | while (prev_cursor[level].index == prev_cursor[level].node->num_keys) { | |
651 | level++; | |
652 | ASSERT(level < root->height); | |
653 | } | |
654 | prev_node->keys[prev_node->num_keys] = | |
655 | prev_cursor[level].node->keys[prev_cursor[level].index]; | |
656 | prev_cursor[level].node->keys[prev_cursor[level].index] = key; | |
657 | ||
658 | /* copy pointers and keys to the end of the prev node */ | |
659 | for (i = 0; i < num_children - 1; i++) { | |
660 | prev_node->keys[prev_node->num_keys + 1 + i] = node->keys[i]; | |
661 | prev_node->ptrs[prev_node->num_keys + 1 + i] = node->ptrs[i]; | |
662 | } | |
663 | prev_node->ptrs[prev_node->num_keys + 1 + i] = node->ptrs[i]; | |
664 | prev_node->num_keys += num_children; | |
665 | ||
666 | /* move remaining pointers/keys to start of node */ | |
667 | if (num_remain >= 0) { | |
668 | for (i = 0; i < num_remain; i++) { | |
669 | node->keys[i] = node->keys[num_children + i]; | |
670 | node->ptrs[i] = node->ptrs[num_children + i]; | |
671 | } | |
672 | node->ptrs[i] = node->ptrs[num_children + i]; | |
673 | node->num_keys = num_remain; | |
674 | } else | |
675 | node->num_keys = 0; | |
676 | ||
677 | return num_children; | |
678 | } | |
679 | ||
680 | static int | |
681 | btree_shift_to_next( | |
682 | struct btree_root *root, | |
683 | int level, | |
684 | struct btree_cursor *next_cursor, | |
685 | int num_children) | |
686 | { | |
687 | struct btree_node *node; | |
688 | struct btree_node *next_node; | |
689 | int num_remain; /* # of children left in node */ | |
690 | int i; | |
691 | ||
692 | if (!next_cursor || !num_children) | |
693 | return 0; | |
694 | ||
695 | node = root->cursor[level].node; | |
696 | next_node = next_cursor[level].node; | |
697 | ||
698 | ASSERT(num_children > 0 && num_children <= node->num_keys + 1); | |
699 | ||
700 | if ((next_node->num_keys + num_children) > BTREE_KEY_MAX) | |
701 | return 0; | |
702 | ||
703 | num_remain = node->num_keys + 1 - num_children; | |
704 | ASSERT(num_remain == 0 || num_remain > BTREE_KEY_MIN); | |
705 | ||
706 | #ifdef BTREE_STATS | |
707 | root->stats.shift_next += 1; | |
708 | #endif | |
709 | ||
710 | /* make space for "num_children" items at beginning of next-leaf */ | |
711 | i = next_node->num_keys; | |
712 | next_node->ptrs[num_children + i] = next_node->ptrs[i]; | |
713 | while (--i >= 0) { | |
714 | next_node->keys[num_children + i] = next_node->keys[i]; | |
715 | next_node->ptrs[num_children + i] = next_node->ptrs[i]; | |
716 | } | |
717 | ||
718 | /* update keys in parent and next node from parent */ | |
719 | do { | |
720 | level++; | |
721 | ASSERT(level < root->height); | |
722 | } while (root->cursor[level].index == | |
723 | root->cursor[level].node->num_keys); | |
724 | ||
725 | next_node->keys[num_children - 1] = | |
726 | root->cursor[level].node->keys[root->cursor[level].index]; | |
727 | root->cursor[level].node->keys[root->cursor[level].index] = | |
728 | node->keys[node->num_keys - num_children]; | |
729 | ||
730 | /* copy last "num_children" items from node into start of next-node */ | |
731 | for (i = 0; i < num_children - 1; i++) { | |
732 | next_node->keys[i] = node->keys[num_remain + i]; | |
733 | next_node->ptrs[i] = node->ptrs[num_remain + i]; | |
734 | } | |
735 | next_node->ptrs[i] = node->ptrs[num_remain + i]; | |
736 | next_node->num_keys += num_children; | |
737 | ||
738 | if (num_remain > 0) | |
739 | node->num_keys -= num_children; | |
740 | else | |
741 | node->num_keys = 0; | |
742 | ||
743 | return num_children; | |
744 | } | |
745 | ||
746 | /* | |
747 | * Insertion functions | |
748 | */ | |
749 | ||
750 | static struct btree_node * | |
751 | btree_increase_height( | |
752 | struct btree_root *root) | |
753 | { | |
754 | struct btree_node *new_root; | |
755 | struct btree_cursor *new_cursor; | |
756 | ||
757 | new_cursor = realloc(root->cursor, (root->height + 1) * | |
758 | sizeof(struct btree_cursor)); | |
759 | if (!new_cursor) | |
760 | return NULL; | |
761 | root->cursor = new_cursor; | |
762 | ||
763 | new_root = btree_node_alloc(); | |
764 | if (!new_root) | |
765 | return NULL; | |
766 | ||
767 | #ifdef BTREE_STATS | |
768 | root->stats.alloced += 1; | |
769 | root->stats.inc_height += 1; | |
770 | root->stats.max_items *= BTREE_PTR_MAX; | |
771 | #endif | |
772 | ||
773 | new_root->ptrs[0] = root->root_node; | |
774 | root->root_node = new_root; | |
775 | ||
776 | root->cursor[root->height].node = new_root; | |
777 | root->cursor[root->height].index = 0; | |
778 | ||
779 | root->height++; | |
780 | ||
781 | return new_root; | |
782 | } | |
783 | ||
784 | static int | |
785 | btree_insert_item( | |
786 | struct btree_root *root, | |
787 | int level, | |
788 | unsigned long key, | |
789 | void *value); | |
790 | ||
791 | ||
792 | static struct btree_node * | |
793 | btree_split( | |
794 | struct btree_root *root, | |
795 | int level, | |
796 | unsigned long key, | |
797 | int *index) | |
798 | { | |
799 | struct btree_node *node = root->cursor[level].node; | |
800 | struct btree_node *new_node; | |
801 | int i; | |
802 | ||
803 | new_node = btree_node_alloc(); | |
804 | if (!new_node) | |
805 | return NULL; | |
806 | ||
807 | if (btree_insert_item(root, level + 1, node->keys[BTREE_KEY_MIN], | |
808 | new_node) != 0) { | |
809 | btree_node_free(new_node); | |
810 | return NULL; | |
811 | } | |
812 | ||
813 | #ifdef BTREE_STATS | |
814 | root->stats.alloced += 1; | |
815 | root->stats.split += 1; | |
816 | #endif | |
817 | ||
818 | for (i = 0; i < BTREE_KEY_MAX - BTREE_KEY_MIN - 1; i++) { | |
819 | new_node->keys[i] = node->keys[BTREE_KEY_MIN + 1 + i]; | |
820 | new_node->ptrs[i] = node->ptrs[BTREE_KEY_MIN + 1 + i]; | |
821 | } | |
822 | new_node->ptrs[i] = node->ptrs[BTREE_KEY_MIN + 1 + i]; | |
823 | new_node->num_keys = BTREE_KEY_MAX - BTREE_KEY_MIN - 1; | |
824 | ||
825 | node->num_keys = BTREE_KEY_MIN; | |
826 | if (key < node->keys[BTREE_KEY_MIN]) | |
827 | return node; /* index doesn't change */ | |
828 | ||
829 | /* insertion point is in new node... */ | |
830 | *index -= BTREE_KEY_MIN + 1; | |
831 | return new_node; | |
832 | } | |
833 | ||
834 | static int | |
835 | btree_insert_shift_to_prev( | |
836 | struct btree_root *root, | |
837 | int level, | |
838 | int *index) | |
839 | { | |
840 | struct btree_cursor tmp_cursor[root->height]; | |
841 | int n; | |
842 | ||
843 | if (*index <= 0) | |
844 | return -1; | |
845 | ||
846 | if (!btree_copy_cursor_prev(root, tmp_cursor, level + 1)) | |
847 | return -1; | |
848 | ||
849 | n = MIN(*index, (BTREE_PTR_MAX - tmp_cursor[level].node->num_keys) / 2); | |
850 | if (!n || !btree_shift_to_prev(root, level, tmp_cursor, n)) | |
851 | return -1; | |
852 | ||
853 | *index -= n; | |
854 | return 0; | |
855 | } | |
856 | ||
857 | static int | |
858 | btree_insert_shift_to_next( | |
859 | struct btree_root *root, | |
860 | int level, | |
861 | int *index) | |
862 | { | |
863 | struct btree_cursor tmp_cursor[root->height]; | |
864 | int n; | |
865 | ||
866 | if (*index >= BTREE_KEY_MAX) | |
867 | return -1; | |
868 | ||
869 | if (!btree_copy_cursor_next(root, tmp_cursor, level + 1)) | |
870 | return -1; | |
871 | ||
872 | n = MIN(BTREE_KEY_MAX - *index, | |
873 | (BTREE_PTR_MAX - tmp_cursor[level].node->num_keys) / 2); | |
874 | if (!n || !btree_shift_to_next(root, level, tmp_cursor, n)) | |
875 | return -1; | |
876 | return 0; | |
877 | } | |
878 | ||
879 | static int | |
880 | btree_insert_item( | |
881 | struct btree_root *root, | |
882 | int level, | |
883 | unsigned long key, | |
884 | void *value) | |
885 | { | |
886 | struct btree_node *node = root->cursor[level].node; | |
887 | int index = root->cursor[level].index; | |
888 | int i; | |
889 | ||
890 | if (node->num_keys == BTREE_KEY_MAX) { | |
891 | if (btree_insert_shift_to_prev(root, level, &index) == 0) | |
892 | goto insert; | |
893 | if (btree_insert_shift_to_next(root, level, &index) == 0) | |
894 | goto insert; | |
895 | if (level == root->height - 1) { | |
896 | if (!btree_increase_height(root)) | |
897 | return ENOMEM; | |
898 | } | |
899 | node = btree_split(root, level, key, &index); | |
900 | if (!node) | |
901 | return ENOMEM; | |
902 | } | |
903 | insert: | |
904 | ASSERT(index <= node->num_keys); | |
905 | ||
906 | i = node->num_keys; | |
907 | node->ptrs[i + 1] = node->ptrs[i]; | |
908 | while (--i >= index) { | |
909 | node->keys[i + 1] = node->keys[i]; | |
910 | node->ptrs[i + 1] = node->ptrs[i]; | |
911 | } | |
912 | ||
913 | node->num_keys++; | |
914 | node->keys[index] = key; | |
915 | ||
916 | if (level == 0) | |
917 | node->ptrs[index] = value; | |
918 | else | |
919 | node->ptrs[index + 1] = value; | |
920 | ||
921 | return 0; | |
922 | } | |
923 | ||
924 | ||
925 | ||
926 | int | |
927 | btree_insert( | |
928 | struct btree_root *root, | |
929 | unsigned long key, | |
930 | void *value) | |
931 | { | |
932 | int result; | |
933 | ||
934 | if (!value) | |
935 | return EINVAL; | |
936 | ||
937 | if (btree_search(root, key) && root->cur_key == key) | |
938 | return EEXIST; | |
939 | ||
940 | #ifdef BTREE_STATS | |
941 | root->stats.insert += 1; | |
942 | root->stats.num_items += 1; | |
943 | #endif | |
944 | ||
945 | result = btree_insert_item(root, 0, key, value); | |
946 | ||
947 | btree_invalidate_cursor(root); | |
948 | ||
949 | return result; | |
950 | } | |
951 | ||
952 | ||
953 | /* | |
954 | * Deletion functions | |
955 | * | |
956 | * Rather more complicated as deletions has 4 ways to go once a node | |
957 | * ends up with less than the minimum number of keys: | |
958 | * - move remainder to previous node | |
959 | * - move remainder to next node | |
960 | * (both will involve a parent deletion which may recurse) | |
961 | * - balance by moving some items from previous node | |
962 | * - balance by moving some items from next node | |
963 | */ | |
964 | ||
965 | static void | |
966 | btree_decrease_height( | |
967 | struct btree_root *root) | |
968 | { | |
969 | struct btree_node *old_root = root->root_node; | |
970 | ||
971 | ASSERT(old_root->num_keys == 0); | |
972 | ||
973 | #ifdef BTREE_STATS | |
974 | root->stats.alloced -= 1; | |
975 | root->stats.dec_height += 1; | |
976 | root->stats.max_items /= BTREE_PTR_MAX; | |
977 | #endif | |
978 | root->root_node = old_root->ptrs[0]; | |
979 | btree_node_free(old_root); | |
980 | root->height--; | |
981 | } | |
982 | ||
983 | static int | |
984 | btree_merge_with_prev( | |
985 | struct btree_root *root, | |
986 | int level, | |
987 | struct btree_cursor *prev_cursor) | |
988 | { | |
989 | if (!prev_cursor) | |
990 | return 0; | |
991 | ||
992 | if (!btree_shift_to_prev(root, level, prev_cursor, | |
993 | root->cursor[level].node->num_keys + 1)) | |
994 | return 0; | |
995 | ||
996 | #ifdef BTREE_STATS | |
997 | root->stats.merge_prev += 1; | |
998 | #endif | |
999 | return 1; | |
1000 | } | |
1001 | ||
1002 | static int | |
1003 | btree_merge_with_next( | |
1004 | struct btree_root *root, | |
1005 | int level, | |
1006 | struct btree_cursor *next_cursor) | |
1007 | { | |
1008 | if (!next_cursor) | |
1009 | return 0; | |
1010 | ||
1011 | if (!btree_shift_to_next(root, level, next_cursor, | |
1012 | root->cursor[level].node->num_keys + 1)) | |
1013 | return 0; | |
1014 | ||
1015 | #ifdef BTREE_STATS | |
1016 | root->stats.merge_next += 1; | |
1017 | #endif | |
1018 | return 1; | |
1019 | } | |
1020 | ||
1021 | static int | |
1022 | btree_balance_with_prev( | |
1023 | struct btree_root *root, | |
1024 | int level, | |
1025 | struct btree_cursor *prev_cursor) | |
1026 | { | |
1027 | struct btree_cursor *root_cursor = root->cursor; | |
1028 | ||
1029 | if (!prev_cursor) | |
1030 | return 0; | |
1031 | ASSERT(prev_cursor[level].node->num_keys > BTREE_KEY_MIN); | |
1032 | ||
1033 | #ifdef BTREE_STATS | |
1034 | root->stats.balance_prev += 1; | |
1035 | #endif | |
1036 | /* | |
1037 | * Move some nodes from the prev node into the current node. | |
1038 | * As the shift operation is a right shift and is relative to | |
1039 | * the root cursor, make the root cursor the prev cursor and | |
1040 | * pass in the root cursor as the next cursor. | |
1041 | */ | |
1042 | ||
1043 | root->cursor = prev_cursor; | |
1044 | if (!btree_shift_to_next(root, level, root_cursor, | |
1045 | (prev_cursor[level].node->num_keys + 1 - BTREE_KEY_MIN) / 2)) | |
1046 | abort(); | |
1047 | root->cursor = root_cursor; | |
1048 | ||
1049 | return 1; | |
1050 | } | |
1051 | ||
1052 | static int | |
1053 | btree_balance_with_next( | |
1054 | struct btree_root *root, | |
1055 | int level, | |
1056 | struct btree_cursor *next_cursor) | |
1057 | { | |
1058 | struct btree_cursor *root_cursor = root->cursor; | |
1059 | ||
1060 | if (!next_cursor) | |
1061 | return 0; | |
1062 | assert(next_cursor[level].node->num_keys > BTREE_KEY_MIN); | |
1063 | ||
1064 | #ifdef btree_stats | |
1065 | root->stats.balance_next += 1; | |
1066 | #endif | |
1067 | /* | |
1068 | * move some nodes from the next node into the current node. | |
1069 | * as the shift operation is a left shift and is relative to | |
1070 | * the root cursor, make the root cursor the next cursor and | |
1071 | * pass in the root cursor as the prev cursor. | |
1072 | */ | |
1073 | ||
1074 | root->cursor = next_cursor; | |
1075 | if (!btree_shift_to_prev(root, level, root_cursor, | |
1076 | (next_cursor[level].node->num_keys + 1 - BTREE_KEY_MIN) / 2)) | |
1077 | abort(); | |
1078 | root->cursor = root_cursor; | |
1079 | ||
1080 | return 1; | |
1081 | ||
1082 | } | |
1083 | ||
1084 | static void | |
1085 | btree_delete_key( | |
1086 | struct btree_root *root, | |
1087 | int level); | |
1088 | ||
1089 | /* | |
1090 | * btree_delete_node: | |
1091 | * | |
1092 | * Return 0 if it's done or 1 if the next level needs to be collapsed | |
1093 | */ | |
1094 | static void | |
1095 | btree_delete_node( | |
1096 | struct btree_root *root, | |
1097 | int level) | |
1098 | { | |
1099 | struct btree_cursor prev_cursor[root->height]; | |
1100 | struct btree_cursor next_cursor[root->height]; | |
1101 | struct btree_cursor *pc; | |
1102 | struct btree_cursor *nc; | |
1103 | ||
1104 | /* | |
1105 | * the node has underflowed, grab or merge keys/items from a | |
1106 | * neighbouring node. | |
1107 | */ | |
1108 | ||
1109 | if (level == root->height - 1) { | |
1110 | if (level > 0 && root->root_node->num_keys == 0) | |
1111 | btree_decrease_height(root); | |
1112 | return; | |
1113 | } | |
1114 | ||
1115 | pc = btree_copy_cursor_prev(root, prev_cursor, level + 1); | |
1116 | if (!btree_merge_with_prev(root, level, pc)) { | |
1117 | nc = btree_copy_cursor_next(root, next_cursor, level + 1); | |
1118 | if (!btree_merge_with_next(root, level, nc)) { | |
1119 | /* merging failed, try redistrubution */ | |
1120 | if (!btree_balance_with_prev(root, level, pc) && | |
1121 | !btree_balance_with_next(root, level, nc)) | |
1122 | abort(); | |
1123 | return; /* when balancing, then the node isn't freed */ | |
1124 | } | |
1125 | } | |
1126 | ||
1127 | #ifdef BTREE_STATS | |
1128 | root->stats.alloced -= 1; | |
1129 | #endif | |
1130 | btree_node_free(root->cursor[level].node); | |
1131 | ||
1132 | btree_delete_key(root, level + 1); | |
1133 | } | |
1134 | ||
1135 | static void | |
1136 | btree_delete_key( | |
1137 | struct btree_root *root, | |
1138 | int level) | |
1139 | { | |
1140 | struct btree_node *node = root->cursor[level].node; | |
1141 | int index = root->cursor[level].index; | |
1142 | ||
1143 | node->num_keys--; | |
1144 | if (index <= node->num_keys) { | |
1145 | /* | |
1146 | * if not deleting the last item, shift higher items down | |
1147 | * to cover the item being deleted | |
1148 | */ | |
1149 | while (index < node->num_keys) { | |
1150 | node->keys[index] = node->keys[index + 1]; | |
1151 | node->ptrs[index] = node->ptrs[index + 1]; | |
1152 | index++; | |
1153 | } | |
1154 | node->ptrs[index] = node->ptrs[index + 1]; | |
1155 | } else { | |
1156 | /* | |
1157 | * else update the associated parent key as the last key | |
1158 | * in the leaf has changed | |
1159 | */ | |
1160 | btree_update_node_key(root, root->cursor, level + 1, | |
1161 | node->keys[node->num_keys]); | |
1162 | } | |
1163 | /* | |
1164 | * if node underflows, either merge with sibling or rebalance | |
1165 | * with sibling. | |
1166 | */ | |
1167 | if (node->num_keys < BTREE_KEY_MIN) | |
1168 | btree_delete_node(root, level); | |
1169 | } | |
1170 | ||
1171 | void * | |
1172 | btree_delete( | |
1173 | struct btree_root *root, | |
1174 | unsigned long key) | |
1175 | { | |
1176 | void *value; | |
1177 | ||
1178 | value = btree_lookup(root, key); | |
1179 | if (!value) | |
1180 | return NULL; | |
1181 | ||
1182 | #ifdef BTREE_STATS | |
1183 | root->stats.delete += 1; | |
1184 | root->stats.num_items -= 1; | |
1185 | #endif | |
1186 | ||
1187 | btree_delete_key(root, 0); | |
1188 | ||
1189 | btree_invalidate_cursor(root); | |
1190 | ||
1191 | return value; | |
1192 | } | |
1193 | ||
1194 | #ifdef BTREE_STATS | |
1195 | void | |
1196 | btree_print_stats( | |
1197 | struct btree_root *root, | |
1198 | FILE *f) | |
1199 | { | |
1200 | unsigned long max_items = root->stats.max_items * | |
1201 | (root->root_node->num_keys + 1); | |
1202 | ||
1203 | fprintf(f, "\tnum_items = %lu, max_items = %lu (%lu%%)\n", | |
1204 | root->stats.num_items, max_items, | |
1205 | root->stats.num_items * 100 / max_items); | |
1206 | fprintf(f, "\talloced = %d nodes, %lu bytes, %lu bytes per item\n", | |
1207 | root->stats.alloced, | |
1208 | root->stats.alloced * sizeof(struct btree_node), | |
1209 | root->stats.alloced * sizeof(struct btree_node) / | |
1210 | root->stats.num_items); | |
1211 | fprintf(f, "\tlookup = %d\n", root->stats.lookup); | |
1212 | fprintf(f, "\tfind = %d\n", root->stats.find); | |
1213 | fprintf(f, "\tcache_hits = %d\n", root->stats.cache_hits); | |
1214 | fprintf(f, "\tcache_misses = %d\n", root->stats.cache_misses); | |
1215 | fprintf(f, "\tkey_update = %d\n", root->stats.key_update); | |
1216 | fprintf(f, "\tvalue_update = %d\n", root->stats.value_update); | |
1217 | fprintf(f, "\tinsert = %d\n", root->stats.insert); | |
1218 | fprintf(f, "\tshift_prev = %d\n", root->stats.shift_prev); | |
1219 | fprintf(f, "\tshift_next = %d\n", root->stats.shift_next); | |
1220 | fprintf(f, "\tsplit = %d\n", root->stats.split); | |
1221 | fprintf(f, "\tinc_height = %d\n", root->stats.inc_height); | |
1222 | fprintf(f, "\tdelete = %d\n", root->stats.delete); | |
1223 | fprintf(f, "\tmerge_prev = %d\n", root->stats.merge_prev); | |
1224 | fprintf(f, "\tmerge_next = %d\n", root->stats.merge_next); | |
1225 | fprintf(f, "\tbalance_prev = %d\n", root->stats.balance_prev); | |
1226 | fprintf(f, "\tbalance_next = %d\n", root->stats.balance_next); | |
1227 | fprintf(f, "\tdec_height = %d\n", root->stats.dec_height); | |
1228 | } | |
1229 | #endif |