]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/btree.c
libxfs: refactor manage_zones()
[thirdparty/xfsprogs-dev.git] / repair / btree.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2007, Silicon Graphics, Inc. Barry Naujok <bnaujok@sgi.com>
4 * All Rights Reserved.
5 */
6
7 #include "libxfs.h"
8 #include "btree.h"
9
10 /*
11 * Maximum number of keys per node. Must be greater than 2 for the code
12 * to work.
13 */
14 #define BTREE_KEY_MAX 7
15 #define BTREE_KEY_MIN (BTREE_KEY_MAX / 2)
16
17 #define BTREE_PTR_MAX (BTREE_KEY_MAX + 1)
18
19 struct btree_node {
20 unsigned long num_keys;
21 unsigned long keys[BTREE_KEY_MAX];
22 struct btree_node *ptrs[BTREE_PTR_MAX];
23 };
24
25 struct btree_cursor {
26 struct btree_node *node;
27 int index;
28 };
29
30 struct btree_root {
31 struct btree_node *root_node;
32 struct btree_cursor *cursor; /* track path to end leaf */
33 int height;
34 /* lookup cache */
35 int keys_valid; /* set if the cache is valid */
36 unsigned long cur_key;
37 unsigned long next_key;
38 void *next_value;
39 unsigned long prev_key;
40 void *prev_value;
41 #ifdef BTREE_STATS
42 struct btree_stats {
43 unsigned long num_items;
44 unsigned long max_items;
45 int alloced;
46 int cache_hits;
47 int cache_misses;
48 int lookup;
49 int find;
50 int key_update;
51 int value_update;
52 int insert;
53 int delete;
54 int inc_height;
55 int dec_height;
56 int shift_prev;
57 int shift_next;
58 int split;
59 int merge_prev;
60 int merge_next;
61 int balance_prev;
62 int balance_next;
63 } stats;
64 #endif
65 };
66
67
68 static struct btree_node *
69 btree_node_alloc(void)
70 {
71 return calloc(1, sizeof(struct btree_node));
72 }
73
74 static void
75 btree_node_free(
76 struct btree_node *node)
77 {
78 free(node);
79 }
80
81 static void
82 btree_free_nodes(
83 struct btree_node *node,
84 int level)
85 {
86 int i;
87
88 if (level)
89 for (i = 0; i <= node->num_keys; i++)
90 btree_free_nodes(node->ptrs[i], level - 1);
91 btree_node_free(node);
92 }
93
94 static void
95 __btree_init(
96 struct btree_root *root)
97 {
98 memset(root, 0, sizeof(struct btree_root));
99 root->height = 1;
100 root->cursor = calloc(1, sizeof(struct btree_cursor));
101 root->root_node = btree_node_alloc();
102 ASSERT(root->root_node);
103 #ifdef BTREE_STATS
104 root->stats.max_items = 1;
105 root->stats.alloced += 1;
106 #endif
107 }
108
109 static void
110 __btree_free(
111 struct btree_root *root)
112 {
113 btree_free_nodes(root->root_node, root->height - 1);
114 free(root->cursor);
115 root->height = 0;
116 root->cursor = NULL;
117 root->root_node = NULL;
118 }
119
120 void
121 btree_init(
122 struct btree_root **root)
123 {
124 *root = calloc(1, sizeof(struct btree_root));
125 __btree_init(*root);
126 }
127
128 void
129 btree_clear(
130 struct btree_root *root)
131 {
132 __btree_free(root);
133 __btree_init(root);
134 }
135
136 void
137 btree_destroy(
138 struct btree_root *root)
139 {
140 __btree_free(root);
141 free(root);
142 }
143
144 int
145 btree_is_empty(
146 struct btree_root *root)
147 {
148 return root->root_node->num_keys == 0;
149 }
150
151 static inline void
152 btree_invalidate_cursor(
153 struct btree_root *root)
154 {
155 root->cursor[0].node = NULL;
156 root->keys_valid = 0;
157 }
158
159 static inline unsigned long
160 btree_key_of_cursor(
161 struct btree_cursor *cursor,
162 int height)
163 {
164 while (cursor->node->num_keys == cursor->index && --height > 0)
165 cursor++;
166 return cursor->node->keys[cursor->index];
167 }
168
169 static void *
170 btree_get_prev(
171 struct btree_root *root,
172 unsigned long *key)
173 {
174 struct btree_cursor *cur = root->cursor;
175 int level = 0;
176 struct btree_node *node;
177
178 if (cur->index > 0) {
179 if (key)
180 *key = cur->node->keys[cur->index - 1];
181 return cur->node->ptrs[cur->index - 1];
182 }
183
184 /* else need to go up and back down the tree to find the previous */
185 do {
186 if (cur->index)
187 break;
188 cur++;
189 } while (++level < root->height);
190
191 if (level == root->height)
192 return NULL;
193
194 /* the key is in the current level */
195 if (key)
196 *key = cur->node->keys[cur->index - 1];
197
198 /* descend back down the right side to get the pointer */
199 node = cur->node->ptrs[cur->index - 1];
200 while (level--)
201 node = node->ptrs[node->num_keys];
202 return node;
203 }
204
205 static void *
206 btree_get_next(
207 struct btree_root *root,
208 unsigned long *key)
209 {
210 struct btree_cursor *cur = root->cursor;
211 int level = 0;
212 struct btree_node *node;
213
214 while (cur->index == cur->node->num_keys) {
215 if (++level == root->height)
216 return NULL;
217 cur++;
218 }
219 if (level == 0) {
220 if (key) {
221 cur->index++;
222 *key = btree_key_of_cursor(cur, root->height);
223 cur->index--;
224 }
225 return cur->node->ptrs[cur->index + 1];
226 }
227
228 node = cur->node->ptrs[cur->index + 1];
229 while (--level > 0)
230 node = node->ptrs[0];
231 if (key)
232 *key = node->keys[0];
233 return node->ptrs[0];
234 }
235
236 /*
237 * Lookup/Search functions
238 */
239
240 static int
241 btree_do_search(
242 struct btree_root *root,
243 unsigned long key)
244 {
245 unsigned long k = 0;
246 struct btree_cursor *cur = root->cursor + root->height;
247 struct btree_node *node = root->root_node;
248 int height = root->height;
249 int key_found = 0;
250 int i;
251
252 while (--height >= 0) {
253 cur--;
254 for (i = 0; i < node->num_keys; i++)
255 if (node->keys[i] >= key) {
256 k = node->keys[i];
257 key_found = 1;
258 break;
259 }
260 cur->node = node;
261 cur->index = i;
262 node = node->ptrs[i];
263 }
264 root->keys_valid = key_found;
265 if (!key_found)
266 return 0;
267
268 root->cur_key = k;
269 root->next_value = NULL; /* do on-demand next value lookup */
270 root->prev_value = btree_get_prev(root, &root->prev_key);
271 return 1;
272 }
273
274 static int
275 btree_search(
276 struct btree_root *root,
277 unsigned long key)
278 {
279 if (root->keys_valid && key <= root->cur_key &&
280 (!root->prev_value || key > root->prev_key)) {
281 #ifdef BTREE_STATS
282 root->stats.cache_hits++;
283 #endif
284 return 1;
285 }
286 #ifdef BTREE_STATS
287 root->stats.cache_misses++;
288 #endif
289 return btree_do_search(root, key);
290 }
291
292 void *
293 btree_find(
294 struct btree_root *root,
295 unsigned long key,
296 unsigned long *actual_key)
297 {
298 #ifdef BTREE_STATS
299 root->stats.find += 1;
300 #endif
301 if (!btree_search(root, key))
302 return NULL;
303
304 if (actual_key)
305 *actual_key = root->cur_key;
306 return root->cursor->node->ptrs[root->cursor->index];
307 }
308
309 void *
310 btree_lookup(
311 struct btree_root *root,
312 unsigned long key)
313 {
314 #ifdef BTREE_STATS
315 root->stats.lookup += 1;
316 #endif
317 if (!btree_search(root, key) || root->cur_key != key)
318 return NULL;
319 return root->cursor->node->ptrs[root->cursor->index];
320 }
321
322 void *
323 btree_peek_prev(
324 struct btree_root *root,
325 unsigned long *key)
326 {
327 if (!root->keys_valid)
328 return NULL;
329 if (key)
330 *key = root->prev_key;
331 return root->prev_value;
332 }
333
334 void *
335 btree_peek_next(
336 struct btree_root *root,
337 unsigned long *key)
338 {
339 if (!root->keys_valid)
340 return NULL;
341 if (!root->next_value)
342 root->next_value = btree_get_next(root, &root->next_key);
343 if (key)
344 *key = root->next_key;
345 return root->next_value;
346 }
347
348 static void *
349 btree_move_cursor_to_next(
350 struct btree_root *root,
351 unsigned long *key)
352 {
353 struct btree_cursor *cur = root->cursor;
354 int level = 0;
355
356 while (cur->index == cur->node->num_keys) {
357 if (++level == root->height)
358 return NULL;
359 cur++;
360 }
361 cur->index++;
362 if (level == 0) {
363 if (key)
364 *key = btree_key_of_cursor(cur, root->height);
365 return cur->node->ptrs[cur->index];
366 }
367
368 while (--level >= 0) {
369 root->cursor[level].node = cur->node->ptrs[cur->index];
370 root->cursor[level].index = 0;
371 cur--;
372 }
373 if (key)
374 *key = cur->node->keys[0];
375 return cur->node->ptrs[0];
376 }
377
378 void *
379 btree_lookup_next(
380 struct btree_root *root,
381 unsigned long *key)
382 {
383 void *value;
384
385 if (!root->keys_valid)
386 return NULL;
387
388 root->prev_key = root->cur_key;
389 root->prev_value = root->cursor->node->ptrs[root->cursor->index];
390
391 value = btree_move_cursor_to_next(root, &root->cur_key);
392 if (!value) {
393 btree_invalidate_cursor(root);
394 return NULL;
395 }
396 root->next_value = NULL; /* on-demand next value fetch */
397 if (key)
398 *key = root->cur_key;
399 return value;
400 }
401
402 static void *
403 btree_move_cursor_to_prev(
404 struct btree_root *root,
405 unsigned long *key)
406 {
407 struct btree_cursor *cur = root->cursor;
408 int level = 0;
409
410 while (cur->index == 0) {
411 if (++level == root->height)
412 return NULL;
413 cur++;
414 }
415 cur->index--;
416 if (key) /* the key is in the current level */
417 *key = cur->node->keys[cur->index];
418 while (level > 0) {
419 level--;
420 root->cursor[level].node = cur->node->ptrs[cur->index];
421 root->cursor[level].index = root->cursor[level].node->num_keys;
422 cur--;
423 }
424 return cur->node->ptrs[cur->index];
425 }
426
427 void *
428 btree_lookup_prev(
429 struct btree_root *root,
430 unsigned long *key)
431 {
432 void *value;
433
434 if (!root->keys_valid)
435 return NULL;
436
437 value = btree_move_cursor_to_prev(root, &root->cur_key);
438 if (!value)
439 return NULL;
440 root->prev_value = btree_get_prev(root, &root->prev_key);
441 root->next_value = NULL; /* on-demand next value fetch */
442 if (key)
443 *key = root->cur_key;
444 return value;
445 }
446
447 /* Update functions */
448
449 static inline void
450 btree_update_node_key(
451 struct btree_root *root,
452 struct btree_cursor *cursor,
453 int level,
454 unsigned long new_key)
455 {
456 int i;
457
458 #ifdef BTREE_STATS
459 root->stats.key_update += 1;
460 #endif
461
462 cursor += level;
463 for (i = level; i < root->height; i++) {
464 if (cursor->index < cursor->node->num_keys) {
465 cursor->node->keys[cursor->index] = new_key;
466 break;
467 }
468 cursor++;
469 }
470 }
471
472 int
473 btree_update_key(
474 struct btree_root *root,
475 unsigned long old_key,
476 unsigned long new_key)
477 {
478 if (!btree_search(root, old_key) || root->cur_key != old_key)
479 return ENOENT;
480
481 if (root->next_value && new_key >= root->next_key)
482 return EINVAL;
483
484 if (root->prev_value && new_key <= root->prev_key)
485 return EINVAL;
486
487 btree_update_node_key(root, root->cursor, 0, new_key);
488 root->cur_key = new_key;
489
490 return 0;
491 }
492
493 int
494 btree_update_value(
495 struct btree_root *root,
496 unsigned long key,
497 void *new_value)
498 {
499 if (!new_value)
500 return EINVAL;
501
502 if (!btree_search(root, key) || root->cur_key != key)
503 return ENOENT;
504
505 #ifdef BTREE_STATS
506 root->stats.value_update += 1;
507 #endif
508 root->cursor->node->ptrs[root->cursor->index] = new_value;
509
510 return 0;
511 }
512
513 /*
514 * Cursor modification functions - used for inserting and deleting
515 */
516
517 static struct btree_cursor *
518 btree_copy_cursor_prev(
519 struct btree_root *root,
520 struct btree_cursor *dest_cursor,
521 int level)
522 {
523 struct btree_cursor *src_cur = root->cursor + level;
524 struct btree_cursor *dst_cur;
525 int l = level;
526 int i;
527
528 if (level >= root->height)
529 return NULL;
530
531 while (src_cur->index == 0) {
532 if (++l >= root->height)
533 return NULL;
534 src_cur++;
535 }
536 for (i = l; i < root->height; i++)
537 dest_cursor[i] = *src_cur++;
538
539 dst_cur = dest_cursor + l;
540 dst_cur->index--;
541 while (l-- >= level) {
542 dest_cursor[l].node = dst_cur->node->ptrs[dst_cur->index];
543 dest_cursor[l].index = dest_cursor[l].node->num_keys;
544 dst_cur--;
545 }
546 return dest_cursor;
547 }
548
549 static struct btree_cursor *
550 btree_copy_cursor_next(
551 struct btree_root *root,
552 struct btree_cursor *dest_cursor,
553 int level)
554 {
555 struct btree_cursor *src_cur = root->cursor + level;
556 struct btree_cursor *dst_cur;
557 int l = level;
558 int i;
559
560 if (level >= root->height)
561 return NULL;
562
563 while (src_cur->index == src_cur->node->num_keys) {
564 if (++l >= root->height)
565 return NULL;
566 src_cur++;
567 }
568 for (i = l; i < root->height; i++)
569 dest_cursor[i] = *src_cur++;
570
571 dst_cur = dest_cursor + l;
572 dst_cur->index++;
573 while (l-- >= level) {
574 dest_cursor[l].node = dst_cur->node->ptrs[dst_cur->index];
575 dest_cursor[l].index = 0;
576 dst_cur--;
577 }
578 return dest_cursor;
579 }
580
581 /*
582 * Shift functions
583 *
584 * Tries to move items in the current leaf to its sibling if it has space.
585 * Used in both insert and delete functions.
586 * Returns the number of items shifted.
587 */
588
589 static int
590 btree_shift_to_prev(
591 struct btree_root *root,
592 int level,
593 struct btree_cursor *prev_cursor,
594 int num_children)
595 {
596 struct btree_node *node;
597 struct btree_node *prev_node;
598 int num_remain; /* # of keys left in "node" */
599 unsigned long key;
600 int i;
601
602 if (!prev_cursor || !num_children)
603 return 0;
604
605 prev_node = prev_cursor[level].node;
606 node = root->cursor[level].node;
607
608 ASSERT(num_children > 0 && num_children <= node->num_keys + 1);
609
610 if ((prev_node->num_keys + num_children) > BTREE_KEY_MAX)
611 return 0;
612
613 #ifdef BTREE_STATS
614 root->stats.shift_prev += 1;
615 #endif
616
617 num_remain = node->num_keys - num_children;
618 ASSERT(num_remain == -1 || num_remain >= BTREE_KEY_MIN);
619
620 /* shift parent keys around */
621 level++;
622 if (num_remain > 0)
623 key = node->keys[num_children - 1];
624 else
625 key = btree_key_of_cursor(root->cursor + level,
626 root->height - level);
627 while (prev_cursor[level].index == prev_cursor[level].node->num_keys) {
628 level++;
629 ASSERT(level < root->height);
630 }
631 prev_node->keys[prev_node->num_keys] =
632 prev_cursor[level].node->keys[prev_cursor[level].index];
633 prev_cursor[level].node->keys[prev_cursor[level].index] = key;
634
635 /* copy pointers and keys to the end of the prev node */
636 for (i = 0; i < num_children - 1; i++) {
637 prev_node->keys[prev_node->num_keys + 1 + i] = node->keys[i];
638 prev_node->ptrs[prev_node->num_keys + 1 + i] = node->ptrs[i];
639 }
640 prev_node->ptrs[prev_node->num_keys + 1 + i] = node->ptrs[i];
641 prev_node->num_keys += num_children;
642
643 /* move remaining pointers/keys to start of node */
644 if (num_remain >= 0) {
645 for (i = 0; i < num_remain; i++) {
646 node->keys[i] = node->keys[num_children + i];
647 node->ptrs[i] = node->ptrs[num_children + i];
648 }
649 node->ptrs[i] = node->ptrs[num_children + i];
650 node->num_keys = num_remain;
651 } else
652 node->num_keys = 0;
653
654 return num_children;
655 }
656
657 static int
658 btree_shift_to_next(
659 struct btree_root *root,
660 int level,
661 struct btree_cursor *next_cursor,
662 int num_children)
663 {
664 struct btree_node *node;
665 struct btree_node *next_node;
666 int num_remain; /* # of children left in node */
667 int i;
668
669 if (!next_cursor || !num_children)
670 return 0;
671
672 node = root->cursor[level].node;
673 next_node = next_cursor[level].node;
674
675 ASSERT(num_children > 0 && num_children <= node->num_keys + 1);
676
677 if ((next_node->num_keys + num_children) > BTREE_KEY_MAX)
678 return 0;
679
680 num_remain = node->num_keys + 1 - num_children;
681 ASSERT(num_remain == 0 || num_remain > BTREE_KEY_MIN);
682
683 #ifdef BTREE_STATS
684 root->stats.shift_next += 1;
685 #endif
686
687 /* make space for "num_children" items at beginning of next-leaf */
688 i = next_node->num_keys;
689 next_node->ptrs[num_children + i] = next_node->ptrs[i];
690 while (--i >= 0) {
691 next_node->keys[num_children + i] = next_node->keys[i];
692 next_node->ptrs[num_children + i] = next_node->ptrs[i];
693 }
694
695 /* update keys in parent and next node from parent */
696 do {
697 level++;
698 ASSERT(level < root->height);
699 } while (root->cursor[level].index ==
700 root->cursor[level].node->num_keys);
701
702 next_node->keys[num_children - 1] =
703 root->cursor[level].node->keys[root->cursor[level].index];
704 root->cursor[level].node->keys[root->cursor[level].index] =
705 node->keys[node->num_keys - num_children];
706
707 /* copy last "num_children" items from node into start of next-node */
708 for (i = 0; i < num_children - 1; i++) {
709 next_node->keys[i] = node->keys[num_remain + i];
710 next_node->ptrs[i] = node->ptrs[num_remain + i];
711 }
712 next_node->ptrs[i] = node->ptrs[num_remain + i];
713 next_node->num_keys += num_children;
714
715 if (num_remain > 0)
716 node->num_keys -= num_children;
717 else
718 node->num_keys = 0;
719
720 return num_children;
721 }
722
723 /*
724 * Insertion functions
725 */
726
727 static struct btree_node *
728 btree_increase_height(
729 struct btree_root *root)
730 {
731 struct btree_node *new_root;
732 struct btree_cursor *new_cursor;
733
734 new_cursor = realloc(root->cursor, (root->height + 1) *
735 sizeof(struct btree_cursor));
736 if (!new_cursor)
737 return NULL;
738 root->cursor = new_cursor;
739
740 new_root = btree_node_alloc();
741 if (!new_root)
742 return NULL;
743
744 #ifdef BTREE_STATS
745 root->stats.alloced += 1;
746 root->stats.inc_height += 1;
747 root->stats.max_items *= BTREE_PTR_MAX;
748 #endif
749
750 new_root->ptrs[0] = root->root_node;
751 root->root_node = new_root;
752
753 root->cursor[root->height].node = new_root;
754 root->cursor[root->height].index = 0;
755
756 root->height++;
757
758 return new_root;
759 }
760
761 static int
762 btree_insert_item(
763 struct btree_root *root,
764 int level,
765 unsigned long key,
766 void *value);
767
768
769 static struct btree_node *
770 btree_split(
771 struct btree_root *root,
772 int level,
773 unsigned long key,
774 int *index)
775 {
776 struct btree_node *node = root->cursor[level].node;
777 struct btree_node *new_node;
778 int i;
779
780 new_node = btree_node_alloc();
781 if (!new_node)
782 return NULL;
783
784 if (btree_insert_item(root, level + 1, node->keys[BTREE_KEY_MIN],
785 new_node) != 0) {
786 btree_node_free(new_node);
787 return NULL;
788 }
789
790 #ifdef BTREE_STATS
791 root->stats.alloced += 1;
792 root->stats.split += 1;
793 #endif
794
795 for (i = 0; i < BTREE_KEY_MAX - BTREE_KEY_MIN - 1; i++) {
796 new_node->keys[i] = node->keys[BTREE_KEY_MIN + 1 + i];
797 new_node->ptrs[i] = node->ptrs[BTREE_KEY_MIN + 1 + i];
798 }
799 new_node->ptrs[i] = node->ptrs[BTREE_KEY_MIN + 1 + i];
800 new_node->num_keys = BTREE_KEY_MAX - BTREE_KEY_MIN - 1;
801
802 node->num_keys = BTREE_KEY_MIN;
803 if (key < node->keys[BTREE_KEY_MIN])
804 return node; /* index doesn't change */
805
806 /* insertion point is in new node... */
807 *index -= BTREE_KEY_MIN + 1;
808 return new_node;
809 }
810
811 static int
812 btree_insert_shift_to_prev(
813 struct btree_root *root,
814 int level,
815 int *index)
816 {
817 struct btree_cursor tmp_cursor[root->height];
818 int n;
819
820 if (*index <= 0)
821 return -1;
822
823 if (!btree_copy_cursor_prev(root, tmp_cursor, level + 1))
824 return -1;
825
826 n = min(*index, (BTREE_PTR_MAX - tmp_cursor[level].node->num_keys) / 2);
827 if (!n || !btree_shift_to_prev(root, level, tmp_cursor, n))
828 return -1;
829
830 *index -= n;
831 return 0;
832 }
833
834 static int
835 btree_insert_shift_to_next(
836 struct btree_root *root,
837 int level,
838 int *index)
839 {
840 struct btree_cursor tmp_cursor[root->height];
841 int n;
842
843 if (*index >= BTREE_KEY_MAX)
844 return -1;
845
846 if (!btree_copy_cursor_next(root, tmp_cursor, level + 1))
847 return -1;
848
849 n = min(BTREE_KEY_MAX - *index,
850 (BTREE_PTR_MAX - tmp_cursor[level].node->num_keys) / 2);
851 if (!n || !btree_shift_to_next(root, level, tmp_cursor, n))
852 return -1;
853 return 0;
854 }
855
856 static int
857 btree_insert_item(
858 struct btree_root *root,
859 int level,
860 unsigned long key,
861 void *value)
862 {
863 struct btree_node *node = root->cursor[level].node;
864 int index = root->cursor[level].index;
865 int i;
866
867 if (node->num_keys == BTREE_KEY_MAX) {
868 if (btree_insert_shift_to_prev(root, level, &index) == 0)
869 goto insert;
870 if (btree_insert_shift_to_next(root, level, &index) == 0)
871 goto insert;
872 if (level == root->height - 1) {
873 if (!btree_increase_height(root))
874 return ENOMEM;
875 }
876 node = btree_split(root, level, key, &index);
877 if (!node)
878 return ENOMEM;
879 }
880 insert:
881 ASSERT(index <= node->num_keys);
882
883 i = node->num_keys;
884 node->ptrs[i + 1] = node->ptrs[i];
885 while (--i >= index) {
886 node->keys[i + 1] = node->keys[i];
887 node->ptrs[i + 1] = node->ptrs[i];
888 }
889
890 node->num_keys++;
891 node->keys[index] = key;
892
893 if (level == 0)
894 node->ptrs[index] = value;
895 else
896 node->ptrs[index + 1] = value;
897
898 return 0;
899 }
900
901
902
903 int
904 btree_insert(
905 struct btree_root *root,
906 unsigned long key,
907 void *value)
908 {
909 int result;
910
911 if (!value)
912 return EINVAL;
913
914 if (btree_search(root, key) && root->cur_key == key)
915 return EEXIST;
916
917 #ifdef BTREE_STATS
918 root->stats.insert += 1;
919 root->stats.num_items += 1;
920 #endif
921
922 result = btree_insert_item(root, 0, key, value);
923
924 btree_invalidate_cursor(root);
925
926 return result;
927 }
928
929
930 /*
931 * Deletion functions
932 *
933 * Rather more complicated as deletions has 4 ways to go once a node
934 * ends up with less than the minimum number of keys:
935 * - move remainder to previous node
936 * - move remainder to next node
937 * (both will involve a parent deletion which may recurse)
938 * - balance by moving some items from previous node
939 * - balance by moving some items from next node
940 */
941
942 static void
943 btree_decrease_height(
944 struct btree_root *root)
945 {
946 struct btree_node *old_root = root->root_node;
947
948 ASSERT(old_root->num_keys == 0);
949
950 #ifdef BTREE_STATS
951 root->stats.alloced -= 1;
952 root->stats.dec_height += 1;
953 root->stats.max_items /= BTREE_PTR_MAX;
954 #endif
955 root->root_node = old_root->ptrs[0];
956 btree_node_free(old_root);
957 root->height--;
958 }
959
960 static int
961 btree_merge_with_prev(
962 struct btree_root *root,
963 int level,
964 struct btree_cursor *prev_cursor)
965 {
966 if (!prev_cursor)
967 return 0;
968
969 if (!btree_shift_to_prev(root, level, prev_cursor,
970 root->cursor[level].node->num_keys + 1))
971 return 0;
972
973 #ifdef BTREE_STATS
974 root->stats.merge_prev += 1;
975 #endif
976 return 1;
977 }
978
979 static int
980 btree_merge_with_next(
981 struct btree_root *root,
982 int level,
983 struct btree_cursor *next_cursor)
984 {
985 if (!next_cursor)
986 return 0;
987
988 if (!btree_shift_to_next(root, level, next_cursor,
989 root->cursor[level].node->num_keys + 1))
990 return 0;
991
992 #ifdef BTREE_STATS
993 root->stats.merge_next += 1;
994 #endif
995 return 1;
996 }
997
998 static int
999 btree_balance_with_prev(
1000 struct btree_root *root,
1001 int level,
1002 struct btree_cursor *prev_cursor)
1003 {
1004 struct btree_cursor *root_cursor = root->cursor;
1005
1006 if (!prev_cursor)
1007 return 0;
1008 ASSERT(prev_cursor[level].node->num_keys > BTREE_KEY_MIN);
1009
1010 #ifdef BTREE_STATS
1011 root->stats.balance_prev += 1;
1012 #endif
1013 /*
1014 * Move some nodes from the prev node into the current node.
1015 * As the shift operation is a right shift and is relative to
1016 * the root cursor, make the root cursor the prev cursor and
1017 * pass in the root cursor as the next cursor.
1018 */
1019
1020 root->cursor = prev_cursor;
1021 if (!btree_shift_to_next(root, level, root_cursor,
1022 (prev_cursor[level].node->num_keys + 1 - BTREE_KEY_MIN) / 2))
1023 abort();
1024 root->cursor = root_cursor;
1025
1026 return 1;
1027 }
1028
1029 static int
1030 btree_balance_with_next(
1031 struct btree_root *root,
1032 int level,
1033 struct btree_cursor *next_cursor)
1034 {
1035 struct btree_cursor *root_cursor = root->cursor;
1036
1037 if (!next_cursor)
1038 return 0;
1039 assert(next_cursor[level].node->num_keys > BTREE_KEY_MIN);
1040
1041 #ifdef btree_stats
1042 root->stats.balance_next += 1;
1043 #endif
1044 /*
1045 * move some nodes from the next node into the current node.
1046 * as the shift operation is a left shift and is relative to
1047 * the root cursor, make the root cursor the next cursor and
1048 * pass in the root cursor as the prev cursor.
1049 */
1050
1051 root->cursor = next_cursor;
1052 if (!btree_shift_to_prev(root, level, root_cursor,
1053 (next_cursor[level].node->num_keys + 1 - BTREE_KEY_MIN) / 2))
1054 abort();
1055 root->cursor = root_cursor;
1056
1057 return 1;
1058
1059 }
1060
1061 static void
1062 btree_delete_key(
1063 struct btree_root *root,
1064 int level);
1065
1066 /*
1067 * btree_delete_node:
1068 *
1069 * Return 0 if it's done or 1 if the next level needs to be collapsed
1070 */
1071 static void
1072 btree_delete_node(
1073 struct btree_root *root,
1074 int level)
1075 {
1076 struct btree_cursor prev_cursor[root->height];
1077 struct btree_cursor next_cursor[root->height];
1078 struct btree_cursor *pc;
1079 struct btree_cursor *nc;
1080
1081 /*
1082 * the node has underflowed, grab or merge keys/items from a
1083 * neighbouring node.
1084 */
1085
1086 if (level == root->height - 1) {
1087 if (level > 0 && root->root_node->num_keys == 0)
1088 btree_decrease_height(root);
1089 return;
1090 }
1091
1092 pc = btree_copy_cursor_prev(root, prev_cursor, level + 1);
1093 if (!btree_merge_with_prev(root, level, pc)) {
1094 nc = btree_copy_cursor_next(root, next_cursor, level + 1);
1095 if (!btree_merge_with_next(root, level, nc)) {
1096 /* merging failed, try redistrubution */
1097 if (!btree_balance_with_prev(root, level, pc) &&
1098 !btree_balance_with_next(root, level, nc))
1099 abort();
1100 return; /* when balancing, then the node isn't freed */
1101 }
1102 }
1103
1104 #ifdef BTREE_STATS
1105 root->stats.alloced -= 1;
1106 #endif
1107 btree_node_free(root->cursor[level].node);
1108
1109 btree_delete_key(root, level + 1);
1110 }
1111
1112 static void
1113 btree_delete_key(
1114 struct btree_root *root,
1115 int level)
1116 {
1117 struct btree_node *node = root->cursor[level].node;
1118 int index = root->cursor[level].index;
1119
1120 node->num_keys--;
1121 if (index <= node->num_keys) {
1122 /*
1123 * if not deleting the last item, shift higher items down
1124 * to cover the item being deleted
1125 */
1126 while (index < node->num_keys) {
1127 node->keys[index] = node->keys[index + 1];
1128 node->ptrs[index] = node->ptrs[index + 1];
1129 index++;
1130 }
1131 node->ptrs[index] = node->ptrs[index + 1];
1132 } else {
1133 /*
1134 * else update the associated parent key as the last key
1135 * in the leaf has changed
1136 */
1137 btree_update_node_key(root, root->cursor, level + 1,
1138 node->keys[node->num_keys]);
1139 }
1140 /*
1141 * if node underflows, either merge with sibling or rebalance
1142 * with sibling.
1143 */
1144 if (node->num_keys < BTREE_KEY_MIN)
1145 btree_delete_node(root, level);
1146 }
1147
1148 void *
1149 btree_delete(
1150 struct btree_root *root,
1151 unsigned long key)
1152 {
1153 void *value;
1154
1155 value = btree_lookup(root, key);
1156 if (!value)
1157 return NULL;
1158
1159 #ifdef BTREE_STATS
1160 root->stats.delete += 1;
1161 root->stats.num_items -= 1;
1162 #endif
1163
1164 btree_delete_key(root, 0);
1165
1166 btree_invalidate_cursor(root);
1167
1168 return value;
1169 }
1170
1171 #ifdef BTREE_STATS
1172 void
1173 btree_print_stats(
1174 struct btree_root *root,
1175 FILE *f)
1176 {
1177 unsigned long max_items = root->stats.max_items *
1178 (root->root_node->num_keys + 1);
1179
1180 fprintf(f, "\tnum_items = %lu, max_items = %lu (%lu%%)\n",
1181 root->stats.num_items, max_items,
1182 root->stats.num_items * 100 / max_items);
1183 fprintf(f, "\talloced = %d nodes, %lu bytes, %lu bytes per item\n",
1184 root->stats.alloced,
1185 root->stats.alloced * sizeof(struct btree_node),
1186 root->stats.alloced * sizeof(struct btree_node) /
1187 root->stats.num_items);
1188 fprintf(f, "\tlookup = %d\n", root->stats.lookup);
1189 fprintf(f, "\tfind = %d\n", root->stats.find);
1190 fprintf(f, "\tcache_hits = %d\n", root->stats.cache_hits);
1191 fprintf(f, "\tcache_misses = %d\n", root->stats.cache_misses);
1192 fprintf(f, "\tkey_update = %d\n", root->stats.key_update);
1193 fprintf(f, "\tvalue_update = %d\n", root->stats.value_update);
1194 fprintf(f, "\tinsert = %d\n", root->stats.insert);
1195 fprintf(f, "\tshift_prev = %d\n", root->stats.shift_prev);
1196 fprintf(f, "\tshift_next = %d\n", root->stats.shift_next);
1197 fprintf(f, "\tsplit = %d\n", root->stats.split);
1198 fprintf(f, "\tinc_height = %d\n", root->stats.inc_height);
1199 fprintf(f, "\tdelete = %d\n", root->stats.delete);
1200 fprintf(f, "\tmerge_prev = %d\n", root->stats.merge_prev);
1201 fprintf(f, "\tmerge_next = %d\n", root->stats.merge_next);
1202 fprintf(f, "\tbalance_prev = %d\n", root->stats.balance_prev);
1203 fprintf(f, "\tbalance_next = %d\n", root->stats.balance_next);
1204 fprintf(f, "\tdec_height = %d\n", root->stats.dec_height);
1205 }
1206 #endif