]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/btree.c
xfs_db: add a superblock info command
[thirdparty/xfsprogs-dev.git] / repair / btree.c
1 /*
2 * Copyright (c) 2007, Silicon Graphics, Inc. Barry Naujok <bnaujok@sgi.com>
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "libxfs.h"
20 #include "btree.h"
21
22 /*
23 * Maximum number of keys per node. Must be greater than 2 for the code
24 * to work.
25 */
26 #define BTREE_KEY_MAX 7
27 #define BTREE_KEY_MIN (BTREE_KEY_MAX / 2)
28
29 #define BTREE_PTR_MAX (BTREE_KEY_MAX + 1)
30
31 struct btree_node {
32 unsigned long num_keys;
33 unsigned long keys[BTREE_KEY_MAX];
34 struct btree_node *ptrs[BTREE_PTR_MAX];
35 };
36
37 struct btree_cursor {
38 struct btree_node *node;
39 int index;
40 };
41
42 struct btree_root {
43 struct btree_node *root_node;
44 struct btree_cursor *cursor; /* track path to end leaf */
45 int height;
46 /* lookup cache */
47 int keys_valid; /* set if the cache is valid */
48 unsigned long cur_key;
49 unsigned long next_key;
50 void *next_value;
51 unsigned long prev_key;
52 void *prev_value;
53 #ifdef BTREE_STATS
54 struct btree_stats {
55 unsigned long num_items;
56 unsigned long max_items;
57 int alloced;
58 int cache_hits;
59 int cache_misses;
60 int lookup;
61 int find;
62 int key_update;
63 int value_update;
64 int insert;
65 int delete;
66 int inc_height;
67 int dec_height;
68 int shift_prev;
69 int shift_next;
70 int split;
71 int merge_prev;
72 int merge_next;
73 int balance_prev;
74 int balance_next;
75 } stats;
76 #endif
77 };
78
79
80 static struct btree_node *
81 btree_node_alloc(void)
82 {
83 return calloc(1, sizeof(struct btree_node));
84 }
85
86 static void
87 btree_node_free(
88 struct btree_node *node)
89 {
90 free(node);
91 }
92
93 static void
94 btree_free_nodes(
95 struct btree_node *node,
96 int level)
97 {
98 int i;
99
100 if (level)
101 for (i = 0; i <= node->num_keys; i++)
102 btree_free_nodes(node->ptrs[i], level - 1);
103 btree_node_free(node);
104 }
105
106 static void
107 __btree_init(
108 struct btree_root *root)
109 {
110 memset(root, 0, sizeof(struct btree_root));
111 root->height = 1;
112 root->cursor = calloc(1, sizeof(struct btree_cursor));
113 root->root_node = btree_node_alloc();
114 ASSERT(root->root_node);
115 #ifdef BTREE_STATS
116 root->stats.max_items = 1;
117 root->stats.alloced += 1;
118 #endif
119 }
120
121 static void
122 __btree_free(
123 struct btree_root *root)
124 {
125 btree_free_nodes(root->root_node, root->height - 1);
126 free(root->cursor);
127 root->height = 0;
128 root->cursor = NULL;
129 root->root_node = NULL;
130 }
131
132 void
133 btree_init(
134 struct btree_root **root)
135 {
136 *root = calloc(1, sizeof(struct btree_root));
137 __btree_init(*root);
138 }
139
140 void
141 btree_clear(
142 struct btree_root *root)
143 {
144 __btree_free(root);
145 __btree_init(root);
146 }
147
148 void
149 btree_destroy(
150 struct btree_root *root)
151 {
152 __btree_free(root);
153 free(root);
154 }
155
156 int
157 btree_is_empty(
158 struct btree_root *root)
159 {
160 return root->root_node->num_keys == 0;
161 }
162
163 static inline void
164 btree_invalidate_cursor(
165 struct btree_root *root)
166 {
167 root->cursor[0].node = NULL;
168 root->keys_valid = 0;
169 }
170
171 static inline unsigned long
172 btree_key_of_cursor(
173 struct btree_cursor *cursor,
174 int height)
175 {
176 while (cursor->node->num_keys == cursor->index && --height > 0)
177 cursor++;
178 return cursor->node->keys[cursor->index];
179 }
180
181 static void *
182 btree_get_prev(
183 struct btree_root *root,
184 unsigned long *key)
185 {
186 struct btree_cursor *cur = root->cursor;
187 int level = 0;
188 struct btree_node *node;
189
190 if (cur->index > 0) {
191 if (key)
192 *key = cur->node->keys[cur->index - 1];
193 return cur->node->ptrs[cur->index - 1];
194 }
195
196 /* else need to go up and back down the tree to find the previous */
197 do {
198 if (cur->index)
199 break;
200 cur++;
201 } while (++level < root->height);
202
203 if (level == root->height)
204 return NULL;
205
206 /* the key is in the current level */
207 if (key)
208 *key = cur->node->keys[cur->index - 1];
209
210 /* descend back down the right side to get the pointer */
211 node = cur->node->ptrs[cur->index - 1];
212 while (level--)
213 node = node->ptrs[node->num_keys];
214 return node;
215 }
216
217 static void *
218 btree_get_next(
219 struct btree_root *root,
220 unsigned long *key)
221 {
222 struct btree_cursor *cur = root->cursor;
223 int level = 0;
224 struct btree_node *node;
225
226 while (cur->index == cur->node->num_keys) {
227 if (++level == root->height)
228 return NULL;
229 cur++;
230 }
231 if (level == 0) {
232 if (key) {
233 cur->index++;
234 *key = btree_key_of_cursor(cur, root->height);
235 cur->index--;
236 }
237 return cur->node->ptrs[cur->index + 1];
238 }
239
240 node = cur->node->ptrs[cur->index + 1];
241 while (--level > 0)
242 node = node->ptrs[0];
243 if (key)
244 *key = node->keys[0];
245 return node->ptrs[0];
246 }
247
248 /*
249 * Lookup/Search functions
250 */
251
252 static int
253 btree_do_search(
254 struct btree_root *root,
255 unsigned long key)
256 {
257 unsigned long k = 0;
258 struct btree_cursor *cur = root->cursor + root->height;
259 struct btree_node *node = root->root_node;
260 int height = root->height;
261 int key_found = 0;
262 int i;
263
264 while (--height >= 0) {
265 cur--;
266 for (i = 0; i < node->num_keys; i++)
267 if (node->keys[i] >= key) {
268 k = node->keys[i];
269 key_found = 1;
270 break;
271 }
272 cur->node = node;
273 cur->index = i;
274 node = node->ptrs[i];
275 }
276 root->keys_valid = key_found;
277 if (!key_found)
278 return 0;
279
280 root->cur_key = k;
281 root->next_value = NULL; /* do on-demand next value lookup */
282 root->prev_value = btree_get_prev(root, &root->prev_key);
283 return 1;
284 }
285
286 static int
287 btree_search(
288 struct btree_root *root,
289 unsigned long key)
290 {
291 if (root->keys_valid && key <= root->cur_key &&
292 (!root->prev_value || key > root->prev_key)) {
293 #ifdef BTREE_STATS
294 root->stats.cache_hits++;
295 #endif
296 return 1;
297 }
298 #ifdef BTREE_STATS
299 root->stats.cache_misses++;
300 #endif
301 return btree_do_search(root, key);
302 }
303
304 void *
305 btree_find(
306 struct btree_root *root,
307 unsigned long key,
308 unsigned long *actual_key)
309 {
310 #ifdef BTREE_STATS
311 root->stats.find += 1;
312 #endif
313 if (!btree_search(root, key))
314 return NULL;
315
316 if (actual_key)
317 *actual_key = root->cur_key;
318 return root->cursor->node->ptrs[root->cursor->index];
319 }
320
321 void *
322 btree_lookup(
323 struct btree_root *root,
324 unsigned long key)
325 {
326 #ifdef BTREE_STATS
327 root->stats.lookup += 1;
328 #endif
329 if (!btree_search(root, key) || root->cur_key != key)
330 return NULL;
331 return root->cursor->node->ptrs[root->cursor->index];
332 }
333
334 void *
335 btree_peek_prev(
336 struct btree_root *root,
337 unsigned long *key)
338 {
339 if (!root->keys_valid)
340 return NULL;
341 if (key)
342 *key = root->prev_key;
343 return root->prev_value;
344 }
345
346 void *
347 btree_peek_next(
348 struct btree_root *root,
349 unsigned long *key)
350 {
351 if (!root->keys_valid)
352 return NULL;
353 if (!root->next_value)
354 root->next_value = btree_get_next(root, &root->next_key);
355 if (key)
356 *key = root->next_key;
357 return root->next_value;
358 }
359
360 static void *
361 btree_move_cursor_to_next(
362 struct btree_root *root,
363 unsigned long *key)
364 {
365 struct btree_cursor *cur = root->cursor;
366 int level = 0;
367
368 while (cur->index == cur->node->num_keys) {
369 if (++level == root->height)
370 return NULL;
371 cur++;
372 }
373 cur->index++;
374 if (level == 0) {
375 if (key)
376 *key = btree_key_of_cursor(cur, root->height);
377 return cur->node->ptrs[cur->index];
378 }
379
380 while (--level >= 0) {
381 root->cursor[level].node = cur->node->ptrs[cur->index];
382 root->cursor[level].index = 0;
383 cur--;
384 }
385 if (key)
386 *key = cur->node->keys[0];
387 return cur->node->ptrs[0];
388 }
389
390 void *
391 btree_lookup_next(
392 struct btree_root *root,
393 unsigned long *key)
394 {
395 void *value;
396
397 if (!root->keys_valid)
398 return NULL;
399
400 root->prev_key = root->cur_key;
401 root->prev_value = root->cursor->node->ptrs[root->cursor->index];
402
403 value = btree_move_cursor_to_next(root, &root->cur_key);
404 if (!value) {
405 btree_invalidate_cursor(root);
406 return NULL;
407 }
408 root->next_value = NULL; /* on-demand next value fetch */
409 if (key)
410 *key = root->cur_key;
411 return value;
412 }
413
414 static void *
415 btree_move_cursor_to_prev(
416 struct btree_root *root,
417 unsigned long *key)
418 {
419 struct btree_cursor *cur = root->cursor;
420 int level = 0;
421
422 while (cur->index == 0) {
423 if (++level == root->height)
424 return NULL;
425 cur++;
426 }
427 cur->index--;
428 if (key) /* the key is in the current level */
429 *key = cur->node->keys[cur->index];
430 while (level > 0) {
431 level--;
432 root->cursor[level].node = cur->node->ptrs[cur->index];
433 root->cursor[level].index = root->cursor[level].node->num_keys;
434 cur--;
435 }
436 return cur->node->ptrs[cur->index];
437 }
438
439 void *
440 btree_lookup_prev(
441 struct btree_root *root,
442 unsigned long *key)
443 {
444 void *value;
445
446 if (!root->keys_valid)
447 return NULL;
448
449 value = btree_move_cursor_to_prev(root, &root->cur_key);
450 if (!value)
451 return NULL;
452 root->prev_value = btree_get_prev(root, &root->prev_key);
453 root->next_value = NULL; /* on-demand next value fetch */
454 if (key)
455 *key = root->cur_key;
456 return value;
457 }
458
459 void *
460 btree_uncached_lookup(
461 struct btree_root *root,
462 unsigned long key)
463 {
464 /* cursor-less (ie. uncached) lookup */
465 int height = root->height - 1;
466 struct btree_node *node = root->root_node;
467 int i;
468 int key_found = 0;
469
470 while (height >= 0) {
471 for (i = 0; i < node->num_keys; i++)
472 if (node->keys[i] >= key) {
473 key_found = node->keys[i] == key;
474 break;
475 }
476 node = node->ptrs[i];
477 height--;
478 }
479 return key_found ? node : NULL;
480 }
481
482 /* Update functions */
483
484 static inline void
485 btree_update_node_key(
486 struct btree_root *root,
487 struct btree_cursor *cursor,
488 int level,
489 unsigned long new_key)
490 {
491 int i;
492
493 #ifdef BTREE_STATS
494 root->stats.key_update += 1;
495 #endif
496
497 cursor += level;
498 for (i = level; i < root->height; i++) {
499 if (cursor->index < cursor->node->num_keys) {
500 cursor->node->keys[cursor->index] = new_key;
501 break;
502 }
503 cursor++;
504 }
505 }
506
507 int
508 btree_update_key(
509 struct btree_root *root,
510 unsigned long old_key,
511 unsigned long new_key)
512 {
513 if (!btree_search(root, old_key) || root->cur_key != old_key)
514 return ENOENT;
515
516 if (root->next_value && new_key >= root->next_key)
517 return EINVAL;
518
519 if (root->prev_value && new_key <= root->prev_key)
520 return EINVAL;
521
522 btree_update_node_key(root, root->cursor, 0, new_key);
523 root->cur_key = new_key;
524
525 return 0;
526 }
527
528 int
529 btree_update_value(
530 struct btree_root *root,
531 unsigned long key,
532 void *new_value)
533 {
534 if (!new_value)
535 return EINVAL;
536
537 if (!btree_search(root, key) || root->cur_key != key)
538 return ENOENT;
539
540 #ifdef BTREE_STATS
541 root->stats.value_update += 1;
542 #endif
543 root->cursor->node->ptrs[root->cursor->index] = new_value;
544
545 return 0;
546 }
547
548 /*
549 * Cursor modification functions - used for inserting and deleting
550 */
551
552 static struct btree_cursor *
553 btree_copy_cursor_prev(
554 struct btree_root *root,
555 struct btree_cursor *dest_cursor,
556 int level)
557 {
558 struct btree_cursor *src_cur = root->cursor + level;
559 struct btree_cursor *dst_cur;
560 int l = level;
561 int i;
562
563 if (level >= root->height)
564 return NULL;
565
566 while (src_cur->index == 0) {
567 if (++l >= root->height)
568 return NULL;
569 src_cur++;
570 }
571 for (i = l; i < root->height; i++)
572 dest_cursor[i] = *src_cur++;
573
574 dst_cur = dest_cursor + l;
575 dst_cur->index--;
576 while (l-- >= level) {
577 dest_cursor[l].node = dst_cur->node->ptrs[dst_cur->index];
578 dest_cursor[l].index = dest_cursor[l].node->num_keys;
579 dst_cur--;
580 }
581 return dest_cursor;
582 }
583
584 static struct btree_cursor *
585 btree_copy_cursor_next(
586 struct btree_root *root,
587 struct btree_cursor *dest_cursor,
588 int level)
589 {
590 struct btree_cursor *src_cur = root->cursor + level;
591 struct btree_cursor *dst_cur;
592 int l = level;
593 int i;
594
595 if (level >= root->height)
596 return NULL;
597
598 while (src_cur->index == src_cur->node->num_keys) {
599 if (++l >= root->height)
600 return NULL;
601 src_cur++;
602 }
603 for (i = l; i < root->height; i++)
604 dest_cursor[i] = *src_cur++;
605
606 dst_cur = dest_cursor + l;
607 dst_cur->index++;
608 while (l-- >= level) {
609 dest_cursor[l].node = dst_cur->node->ptrs[dst_cur->index];
610 dest_cursor[l].index = 0;
611 dst_cur--;
612 }
613 return dest_cursor;
614 }
615
616 /*
617 * Shift functions
618 *
619 * Tries to move items in the current leaf to its sibling if it has space.
620 * Used in both insert and delete functions.
621 * Returns the number of items shifted.
622 */
623
624 static int
625 btree_shift_to_prev(
626 struct btree_root *root,
627 int level,
628 struct btree_cursor *prev_cursor,
629 int num_children)
630 {
631 struct btree_node *node;
632 struct btree_node *prev_node;
633 int num_remain; /* # of keys left in "node" */
634 unsigned long key;
635 int i;
636
637 if (!prev_cursor || !num_children)
638 return 0;
639
640 prev_node = prev_cursor[level].node;
641 node = root->cursor[level].node;
642
643 ASSERT(num_children > 0 && num_children <= node->num_keys + 1);
644
645 if ((prev_node->num_keys + num_children) > BTREE_KEY_MAX)
646 return 0;
647
648 #ifdef BTREE_STATS
649 root->stats.shift_prev += 1;
650 #endif
651
652 num_remain = node->num_keys - num_children;
653 ASSERT(num_remain == -1 || num_remain >= BTREE_KEY_MIN);
654
655 /* shift parent keys around */
656 level++;
657 if (num_remain > 0)
658 key = node->keys[num_children - 1];
659 else
660 key = btree_key_of_cursor(root->cursor + level,
661 root->height - level);
662 while (prev_cursor[level].index == prev_cursor[level].node->num_keys) {
663 level++;
664 ASSERT(level < root->height);
665 }
666 prev_node->keys[prev_node->num_keys] =
667 prev_cursor[level].node->keys[prev_cursor[level].index];
668 prev_cursor[level].node->keys[prev_cursor[level].index] = key;
669
670 /* copy pointers and keys to the end of the prev node */
671 for (i = 0; i < num_children - 1; i++) {
672 prev_node->keys[prev_node->num_keys + 1 + i] = node->keys[i];
673 prev_node->ptrs[prev_node->num_keys + 1 + i] = node->ptrs[i];
674 }
675 prev_node->ptrs[prev_node->num_keys + 1 + i] = node->ptrs[i];
676 prev_node->num_keys += num_children;
677
678 /* move remaining pointers/keys to start of node */
679 if (num_remain >= 0) {
680 for (i = 0; i < num_remain; i++) {
681 node->keys[i] = node->keys[num_children + i];
682 node->ptrs[i] = node->ptrs[num_children + i];
683 }
684 node->ptrs[i] = node->ptrs[num_children + i];
685 node->num_keys = num_remain;
686 } else
687 node->num_keys = 0;
688
689 return num_children;
690 }
691
692 static int
693 btree_shift_to_next(
694 struct btree_root *root,
695 int level,
696 struct btree_cursor *next_cursor,
697 int num_children)
698 {
699 struct btree_node *node;
700 struct btree_node *next_node;
701 int num_remain; /* # of children left in node */
702 int i;
703
704 if (!next_cursor || !num_children)
705 return 0;
706
707 node = root->cursor[level].node;
708 next_node = next_cursor[level].node;
709
710 ASSERT(num_children > 0 && num_children <= node->num_keys + 1);
711
712 if ((next_node->num_keys + num_children) > BTREE_KEY_MAX)
713 return 0;
714
715 num_remain = node->num_keys + 1 - num_children;
716 ASSERT(num_remain == 0 || num_remain > BTREE_KEY_MIN);
717
718 #ifdef BTREE_STATS
719 root->stats.shift_next += 1;
720 #endif
721
722 /* make space for "num_children" items at beginning of next-leaf */
723 i = next_node->num_keys;
724 next_node->ptrs[num_children + i] = next_node->ptrs[i];
725 while (--i >= 0) {
726 next_node->keys[num_children + i] = next_node->keys[i];
727 next_node->ptrs[num_children + i] = next_node->ptrs[i];
728 }
729
730 /* update keys in parent and next node from parent */
731 do {
732 level++;
733 ASSERT(level < root->height);
734 } while (root->cursor[level].index ==
735 root->cursor[level].node->num_keys);
736
737 next_node->keys[num_children - 1] =
738 root->cursor[level].node->keys[root->cursor[level].index];
739 root->cursor[level].node->keys[root->cursor[level].index] =
740 node->keys[node->num_keys - num_children];
741
742 /* copy last "num_children" items from node into start of next-node */
743 for (i = 0; i < num_children - 1; i++) {
744 next_node->keys[i] = node->keys[num_remain + i];
745 next_node->ptrs[i] = node->ptrs[num_remain + i];
746 }
747 next_node->ptrs[i] = node->ptrs[num_remain + i];
748 next_node->num_keys += num_children;
749
750 if (num_remain > 0)
751 node->num_keys -= num_children;
752 else
753 node->num_keys = 0;
754
755 return num_children;
756 }
757
758 /*
759 * Insertion functions
760 */
761
762 static struct btree_node *
763 btree_increase_height(
764 struct btree_root *root)
765 {
766 struct btree_node *new_root;
767 struct btree_cursor *new_cursor;
768
769 new_cursor = realloc(root->cursor, (root->height + 1) *
770 sizeof(struct btree_cursor));
771 if (!new_cursor)
772 return NULL;
773 root->cursor = new_cursor;
774
775 new_root = btree_node_alloc();
776 if (!new_root)
777 return NULL;
778
779 #ifdef BTREE_STATS
780 root->stats.alloced += 1;
781 root->stats.inc_height += 1;
782 root->stats.max_items *= BTREE_PTR_MAX;
783 #endif
784
785 new_root->ptrs[0] = root->root_node;
786 root->root_node = new_root;
787
788 root->cursor[root->height].node = new_root;
789 root->cursor[root->height].index = 0;
790
791 root->height++;
792
793 return new_root;
794 }
795
796 static int
797 btree_insert_item(
798 struct btree_root *root,
799 int level,
800 unsigned long key,
801 void *value);
802
803
804 static struct btree_node *
805 btree_split(
806 struct btree_root *root,
807 int level,
808 unsigned long key,
809 int *index)
810 {
811 struct btree_node *node = root->cursor[level].node;
812 struct btree_node *new_node;
813 int i;
814
815 new_node = btree_node_alloc();
816 if (!new_node)
817 return NULL;
818
819 if (btree_insert_item(root, level + 1, node->keys[BTREE_KEY_MIN],
820 new_node) != 0) {
821 btree_node_free(new_node);
822 return NULL;
823 }
824
825 #ifdef BTREE_STATS
826 root->stats.alloced += 1;
827 root->stats.split += 1;
828 #endif
829
830 for (i = 0; i < BTREE_KEY_MAX - BTREE_KEY_MIN - 1; i++) {
831 new_node->keys[i] = node->keys[BTREE_KEY_MIN + 1 + i];
832 new_node->ptrs[i] = node->ptrs[BTREE_KEY_MIN + 1 + i];
833 }
834 new_node->ptrs[i] = node->ptrs[BTREE_KEY_MIN + 1 + i];
835 new_node->num_keys = BTREE_KEY_MAX - BTREE_KEY_MIN - 1;
836
837 node->num_keys = BTREE_KEY_MIN;
838 if (key < node->keys[BTREE_KEY_MIN])
839 return node; /* index doesn't change */
840
841 /* insertion point is in new node... */
842 *index -= BTREE_KEY_MIN + 1;
843 return new_node;
844 }
845
846 static int
847 btree_insert_shift_to_prev(
848 struct btree_root *root,
849 int level,
850 int *index)
851 {
852 struct btree_cursor tmp_cursor[root->height];
853 int n;
854
855 if (*index <= 0)
856 return -1;
857
858 if (!btree_copy_cursor_prev(root, tmp_cursor, level + 1))
859 return -1;
860
861 n = MIN(*index, (BTREE_PTR_MAX - tmp_cursor[level].node->num_keys) / 2);
862 if (!n || !btree_shift_to_prev(root, level, tmp_cursor, n))
863 return -1;
864
865 *index -= n;
866 return 0;
867 }
868
869 static int
870 btree_insert_shift_to_next(
871 struct btree_root *root,
872 int level,
873 int *index)
874 {
875 struct btree_cursor tmp_cursor[root->height];
876 int n;
877
878 if (*index >= BTREE_KEY_MAX)
879 return -1;
880
881 if (!btree_copy_cursor_next(root, tmp_cursor, level + 1))
882 return -1;
883
884 n = MIN(BTREE_KEY_MAX - *index,
885 (BTREE_PTR_MAX - tmp_cursor[level].node->num_keys) / 2);
886 if (!n || !btree_shift_to_next(root, level, tmp_cursor, n))
887 return -1;
888 return 0;
889 }
890
891 static int
892 btree_insert_item(
893 struct btree_root *root,
894 int level,
895 unsigned long key,
896 void *value)
897 {
898 struct btree_node *node = root->cursor[level].node;
899 int index = root->cursor[level].index;
900 int i;
901
902 if (node->num_keys == BTREE_KEY_MAX) {
903 if (btree_insert_shift_to_prev(root, level, &index) == 0)
904 goto insert;
905 if (btree_insert_shift_to_next(root, level, &index) == 0)
906 goto insert;
907 if (level == root->height - 1) {
908 if (!btree_increase_height(root))
909 return ENOMEM;
910 }
911 node = btree_split(root, level, key, &index);
912 if (!node)
913 return ENOMEM;
914 }
915 insert:
916 ASSERT(index <= node->num_keys);
917
918 i = node->num_keys;
919 node->ptrs[i + 1] = node->ptrs[i];
920 while (--i >= index) {
921 node->keys[i + 1] = node->keys[i];
922 node->ptrs[i + 1] = node->ptrs[i];
923 }
924
925 node->num_keys++;
926 node->keys[index] = key;
927
928 if (level == 0)
929 node->ptrs[index] = value;
930 else
931 node->ptrs[index + 1] = value;
932
933 return 0;
934 }
935
936
937
938 int
939 btree_insert(
940 struct btree_root *root,
941 unsigned long key,
942 void *value)
943 {
944 int result;
945
946 if (!value)
947 return EINVAL;
948
949 if (btree_search(root, key) && root->cur_key == key)
950 return EEXIST;
951
952 #ifdef BTREE_STATS
953 root->stats.insert += 1;
954 root->stats.num_items += 1;
955 #endif
956
957 result = btree_insert_item(root, 0, key, value);
958
959 btree_invalidate_cursor(root);
960
961 return result;
962 }
963
964
965 /*
966 * Deletion functions
967 *
968 * Rather more complicated as deletions has 4 ways to go once a node
969 * ends up with less than the minimum number of keys:
970 * - move remainder to previous node
971 * - move remainder to next node
972 * (both will involve a parent deletion which may recurse)
973 * - balance by moving some items from previous node
974 * - balance by moving some items from next node
975 */
976
977 static void
978 btree_decrease_height(
979 struct btree_root *root)
980 {
981 struct btree_node *old_root = root->root_node;
982
983 ASSERT(old_root->num_keys == 0);
984
985 #ifdef BTREE_STATS
986 root->stats.alloced -= 1;
987 root->stats.dec_height += 1;
988 root->stats.max_items /= BTREE_PTR_MAX;
989 #endif
990 root->root_node = old_root->ptrs[0];
991 btree_node_free(old_root);
992 root->height--;
993 }
994
995 static int
996 btree_merge_with_prev(
997 struct btree_root *root,
998 int level,
999 struct btree_cursor *prev_cursor)
1000 {
1001 if (!prev_cursor)
1002 return 0;
1003
1004 if (!btree_shift_to_prev(root, level, prev_cursor,
1005 root->cursor[level].node->num_keys + 1))
1006 return 0;
1007
1008 #ifdef BTREE_STATS
1009 root->stats.merge_prev += 1;
1010 #endif
1011 return 1;
1012 }
1013
1014 static int
1015 btree_merge_with_next(
1016 struct btree_root *root,
1017 int level,
1018 struct btree_cursor *next_cursor)
1019 {
1020 if (!next_cursor)
1021 return 0;
1022
1023 if (!btree_shift_to_next(root, level, next_cursor,
1024 root->cursor[level].node->num_keys + 1))
1025 return 0;
1026
1027 #ifdef BTREE_STATS
1028 root->stats.merge_next += 1;
1029 #endif
1030 return 1;
1031 }
1032
1033 static int
1034 btree_balance_with_prev(
1035 struct btree_root *root,
1036 int level,
1037 struct btree_cursor *prev_cursor)
1038 {
1039 struct btree_cursor *root_cursor = root->cursor;
1040
1041 if (!prev_cursor)
1042 return 0;
1043 ASSERT(prev_cursor[level].node->num_keys > BTREE_KEY_MIN);
1044
1045 #ifdef BTREE_STATS
1046 root->stats.balance_prev += 1;
1047 #endif
1048 /*
1049 * Move some nodes from the prev node into the current node.
1050 * As the shift operation is a right shift and is relative to
1051 * the root cursor, make the root cursor the prev cursor and
1052 * pass in the root cursor as the next cursor.
1053 */
1054
1055 root->cursor = prev_cursor;
1056 if (!btree_shift_to_next(root, level, root_cursor,
1057 (prev_cursor[level].node->num_keys + 1 - BTREE_KEY_MIN) / 2))
1058 abort();
1059 root->cursor = root_cursor;
1060
1061 return 1;
1062 }
1063
1064 static int
1065 btree_balance_with_next(
1066 struct btree_root *root,
1067 int level,
1068 struct btree_cursor *next_cursor)
1069 {
1070 struct btree_cursor *root_cursor = root->cursor;
1071
1072 if (!next_cursor)
1073 return 0;
1074 assert(next_cursor[level].node->num_keys > BTREE_KEY_MIN);
1075
1076 #ifdef btree_stats
1077 root->stats.balance_next += 1;
1078 #endif
1079 /*
1080 * move some nodes from the next node into the current node.
1081 * as the shift operation is a left shift and is relative to
1082 * the root cursor, make the root cursor the next cursor and
1083 * pass in the root cursor as the prev cursor.
1084 */
1085
1086 root->cursor = next_cursor;
1087 if (!btree_shift_to_prev(root, level, root_cursor,
1088 (next_cursor[level].node->num_keys + 1 - BTREE_KEY_MIN) / 2))
1089 abort();
1090 root->cursor = root_cursor;
1091
1092 return 1;
1093
1094 }
1095
1096 static void
1097 btree_delete_key(
1098 struct btree_root *root,
1099 int level);
1100
1101 /*
1102 * btree_delete_node:
1103 *
1104 * Return 0 if it's done or 1 if the next level needs to be collapsed
1105 */
1106 static void
1107 btree_delete_node(
1108 struct btree_root *root,
1109 int level)
1110 {
1111 struct btree_cursor prev_cursor[root->height];
1112 struct btree_cursor next_cursor[root->height];
1113 struct btree_cursor *pc;
1114 struct btree_cursor *nc;
1115
1116 /*
1117 * the node has underflowed, grab or merge keys/items from a
1118 * neighbouring node.
1119 */
1120
1121 if (level == root->height - 1) {
1122 if (level > 0 && root->root_node->num_keys == 0)
1123 btree_decrease_height(root);
1124 return;
1125 }
1126
1127 pc = btree_copy_cursor_prev(root, prev_cursor, level + 1);
1128 if (!btree_merge_with_prev(root, level, pc)) {
1129 nc = btree_copy_cursor_next(root, next_cursor, level + 1);
1130 if (!btree_merge_with_next(root, level, nc)) {
1131 /* merging failed, try redistrubution */
1132 if (!btree_balance_with_prev(root, level, pc) &&
1133 !btree_balance_with_next(root, level, nc))
1134 abort();
1135 return; /* when balancing, then the node isn't freed */
1136 }
1137 }
1138
1139 #ifdef BTREE_STATS
1140 root->stats.alloced -= 1;
1141 #endif
1142 btree_node_free(root->cursor[level].node);
1143
1144 btree_delete_key(root, level + 1);
1145 }
1146
1147 static void
1148 btree_delete_key(
1149 struct btree_root *root,
1150 int level)
1151 {
1152 struct btree_node *node = root->cursor[level].node;
1153 int index = root->cursor[level].index;
1154
1155 node->num_keys--;
1156 if (index <= node->num_keys) {
1157 /*
1158 * if not deleting the last item, shift higher items down
1159 * to cover the item being deleted
1160 */
1161 while (index < node->num_keys) {
1162 node->keys[index] = node->keys[index + 1];
1163 node->ptrs[index] = node->ptrs[index + 1];
1164 index++;
1165 }
1166 node->ptrs[index] = node->ptrs[index + 1];
1167 } else {
1168 /*
1169 * else update the associated parent key as the last key
1170 * in the leaf has changed
1171 */
1172 btree_update_node_key(root, root->cursor, level + 1,
1173 node->keys[node->num_keys]);
1174 }
1175 /*
1176 * if node underflows, either merge with sibling or rebalance
1177 * with sibling.
1178 */
1179 if (node->num_keys < BTREE_KEY_MIN)
1180 btree_delete_node(root, level);
1181 }
1182
1183 void *
1184 btree_delete(
1185 struct btree_root *root,
1186 unsigned long key)
1187 {
1188 void *value;
1189
1190 value = btree_lookup(root, key);
1191 if (!value)
1192 return NULL;
1193
1194 #ifdef BTREE_STATS
1195 root->stats.delete += 1;
1196 root->stats.num_items -= 1;
1197 #endif
1198
1199 btree_delete_key(root, 0);
1200
1201 btree_invalidate_cursor(root);
1202
1203 return value;
1204 }
1205
1206 #ifdef BTREE_STATS
1207 void
1208 btree_print_stats(
1209 struct btree_root *root,
1210 FILE *f)
1211 {
1212 unsigned long max_items = root->stats.max_items *
1213 (root->root_node->num_keys + 1);
1214
1215 fprintf(f, "\tnum_items = %lu, max_items = %lu (%lu%%)\n",
1216 root->stats.num_items, max_items,
1217 root->stats.num_items * 100 / max_items);
1218 fprintf(f, "\talloced = %d nodes, %lu bytes, %lu bytes per item\n",
1219 root->stats.alloced,
1220 root->stats.alloced * sizeof(struct btree_node),
1221 root->stats.alloced * sizeof(struct btree_node) /
1222 root->stats.num_items);
1223 fprintf(f, "\tlookup = %d\n", root->stats.lookup);
1224 fprintf(f, "\tfind = %d\n", root->stats.find);
1225 fprintf(f, "\tcache_hits = %d\n", root->stats.cache_hits);
1226 fprintf(f, "\tcache_misses = %d\n", root->stats.cache_misses);
1227 fprintf(f, "\tkey_update = %d\n", root->stats.key_update);
1228 fprintf(f, "\tvalue_update = %d\n", root->stats.value_update);
1229 fprintf(f, "\tinsert = %d\n", root->stats.insert);
1230 fprintf(f, "\tshift_prev = %d\n", root->stats.shift_prev);
1231 fprintf(f, "\tshift_next = %d\n", root->stats.shift_next);
1232 fprintf(f, "\tsplit = %d\n", root->stats.split);
1233 fprintf(f, "\tinc_height = %d\n", root->stats.inc_height);
1234 fprintf(f, "\tdelete = %d\n", root->stats.delete);
1235 fprintf(f, "\tmerge_prev = %d\n", root->stats.merge_prev);
1236 fprintf(f, "\tmerge_next = %d\n", root->stats.merge_next);
1237 fprintf(f, "\tbalance_prev = %d\n", root->stats.balance_prev);
1238 fprintf(f, "\tbalance_next = %d\n", root->stats.balance_next);
1239 fprintf(f, "\tdec_height = %d\n", root->stats.dec_height);
1240 }
1241 #endif