1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
14 #include "err_protos.h"
19 * note: there are 4 sets of incore things handled here:
20 * block bitmaps, extent trees, uncertain inode list,
21 * and inode tree. The tree-based code uses the AVL
22 * tree package used by the IRIX kernel VM code
23 * (sys/avl.h). The inode list code uses the same records
24 * as the inode tree code for convenience. The bitmaps
25 * and bitmap operators are mostly macros defined in incore.h.
26 * There are one of everything per AG except for extent
27 * trees. There's one duplicate extent tree, one bno and
28 * one bcnt extent tree per AG. Not all of the above exist
29 * through all phases. The duplicate extent tree gets trashed
30 * at the end of phase 4. The bno/bcnt trees don't appear until
31 * phase 5. The uncertain inode list goes away at the end of
32 * phase 3. The inode tree and bno/bnct trees go away after phase 5.
35 static avl64tree_desc_t
*rt_ext_tree_ptr
; /* dup extent tree for rt */
36 static pthread_mutex_t rt_ext_tree_lock
;
38 static struct btree_root
**dup_extent_trees
; /* per ag dup extent trees */
39 static pthread_mutex_t
*dup_extent_tree_locks
;
41 static avltree_desc_t
**extent_bno_ptrs
; /*
42 * array of extent tree ptrs
43 * one per ag for free extents
44 * sorted by starting block
47 static avltree_desc_t
**extent_bcnt_ptrs
; /*
48 * array of extent tree ptrs
49 * one per ag for free extents
54 * duplicate extent tree functions
58 release_dup_extent_tree(
61 pthread_mutex_lock(&dup_extent_tree_locks
[agno
]);
62 btree_clear(dup_extent_trees
[agno
]);
63 pthread_mutex_unlock(&dup_extent_tree_locks
[agno
]);
69 xfs_agblock_t startblock
,
70 xfs_extlen_t blockcount
)
74 fprintf(stderr
, "Adding dup extent - %d/%d %d\n", agno
, startblock
,
77 pthread_mutex_lock(&dup_extent_tree_locks
[agno
]);
78 ret
= btree_insert(dup_extent_trees
[agno
], startblock
,
79 (void *)(uintptr_t)(startblock
+ blockcount
));
80 pthread_mutex_unlock(&dup_extent_tree_locks
[agno
]);
87 xfs_agblock_t start_agbno
,
88 xfs_agblock_t end_agbno
)
93 pthread_mutex_lock(&dup_extent_tree_locks
[agno
]);
94 if (!btree_find(dup_extent_trees
[agno
], start_agbno
, &bno
)) {
96 goto out
; /* this really shouldn't happen */
98 if (bno
< end_agbno
) {
102 ret
= (uintptr_t)btree_peek_prev(dup_extent_trees
[agno
], NULL
) >
105 pthread_mutex_unlock(&dup_extent_tree_locks
[agno
]);
111 * extent tree stuff is avl trees of duplicate extents,
112 * sorted in order by block number. there is one tree per ag.
115 static extent_tree_node_t
*
116 mk_extent_tree_nodes(xfs_agblock_t new_startblock
,
117 xfs_extlen_t new_blockcount
, extent_state_t new_state
)
119 extent_tree_node_t
*new;
121 new = malloc(sizeof(*new));
123 do_error(_("couldn't allocate new extent descriptor.\n"));
125 new->avl_node
.avl_nextino
= NULL
;
126 new->ex_startblock
= new_startblock
;
127 new->ex_blockcount
= new_blockcount
;
128 new->ex_state
= new_state
;
136 release_extent_tree_node(extent_tree_node_t
*node
)
142 * routines to recycle all nodes in a tree. it walks the tree
143 * and puts all nodes back on the free list so the nodes can be
144 * reused. the duplicate and bno/bcnt extent trees for each AG
145 * are recycled after they're no longer needed to save memory
148 release_extent_tree(avltree_desc_t
*tree
)
150 extent_tree_node_t
*ext
;
151 extent_tree_node_t
*tmp
;
152 extent_tree_node_t
*lext
;
153 extent_tree_node_t
*ltmp
;
155 if (tree
->avl_firstino
== NULL
)
158 ext
= (extent_tree_node_t
*) tree
->avl_firstino
;
160 while (ext
!= NULL
) {
161 tmp
= (extent_tree_node_t
*) ext
->avl_node
.avl_nextino
;
164 * ext->next is guaranteed to be set only in bcnt trees
166 if (ext
->next
!= NULL
) {
168 while (lext
!= NULL
) {
170 release_extent_tree_node(lext
);
175 release_extent_tree_node(ext
);
179 tree
->avl_root
= tree
->avl_firstino
= NULL
;
185 * top-level (visible) routines
188 release_agbno_extent_tree(xfs_agnumber_t agno
)
190 release_extent_tree(extent_bno_ptrs
[agno
]);
196 release_agbcnt_extent_tree(xfs_agnumber_t agno
)
198 release_extent_tree(extent_bcnt_ptrs
[agno
]);
204 * the next 4 routines manage the trees of free extents -- 2 trees
205 * per AG. The first tree is sorted by block number. The second
206 * tree is sorted by extent size. This is the bno tree.
209 add_bno_extent(xfs_agnumber_t agno
, xfs_agblock_t startblock
,
210 xfs_extlen_t blockcount
)
212 extent_tree_node_t
*ext
;
214 ASSERT(extent_bno_ptrs
!= NULL
);
215 ASSERT(extent_bno_ptrs
[agno
] != NULL
);
217 ext
= mk_extent_tree_nodes(startblock
, blockcount
, XR_E_FREE
);
219 if (avl_insert(extent_bno_ptrs
[agno
], (avlnode_t
*) ext
) == NULL
) {
220 do_error(_("duplicate bno extent range\n"));
225 findfirst_bno_extent(xfs_agnumber_t agno
)
227 ASSERT(extent_bno_ptrs
!= NULL
);
228 ASSERT(extent_bno_ptrs
[agno
] != NULL
);
230 return((extent_tree_node_t
*) extent_bno_ptrs
[agno
]->avl_firstino
);
234 find_bno_extent(xfs_agnumber_t agno
, xfs_agblock_t startblock
)
236 ASSERT(extent_bno_ptrs
!= NULL
);
237 ASSERT(extent_bno_ptrs
[agno
] != NULL
);
239 return((extent_tree_node_t
*) avl_find(extent_bno_ptrs
[agno
],
244 * delete a node that's in the tree (pointer obtained by a find routine)
247 get_bno_extent(xfs_agnumber_t agno
, extent_tree_node_t
*ext
)
249 ASSERT(extent_bno_ptrs
!= NULL
);
250 ASSERT(extent_bno_ptrs
[agno
] != NULL
);
252 avl_delete(extent_bno_ptrs
[agno
], &ext
->avl_node
);
258 * the next 4 routines manage the trees of free extents -- 2 trees
259 * per AG. The first tree is sorted by block number. The second
260 * tree is sorted by extent size. This is the bcnt tree.
263 add_bcnt_extent(xfs_agnumber_t agno
, xfs_agblock_t startblock
,
264 xfs_extlen_t blockcount
)
266 extent_tree_node_t
*ext
, *prev
, *current
, *top
;
267 xfs_agblock_t tmp_startblock
;
268 xfs_extlen_t tmp_blockcount
;
269 extent_state_t tmp_state
;
271 ASSERT(extent_bcnt_ptrs
!= NULL
);
272 ASSERT(extent_bcnt_ptrs
[agno
] != NULL
);
274 ext
= mk_extent_tree_nodes(startblock
, blockcount
, XR_E_FREE
);
276 ASSERT(ext
->next
== NULL
);
279 fprintf(stderr
, "adding bcnt: agno = %d, start = %u, count = %u\n",
280 agno
, startblock
, blockcount
);
282 if ((current
= (extent_tree_node_t
*) avl_find(extent_bcnt_ptrs
[agno
],
283 blockcount
)) != NULL
) {
285 * avl tree code doesn't handle dups so insert
286 * onto linked list in increasing startblock order
288 * when called from mk_incore_fstree,
289 * startblock is in increasing order.
290 * current is an "anchor" node.
291 * quick check if the new ext goes to the end.
292 * if so, append at the end, using the last field
295 ASSERT(current
->last
!= NULL
);
296 if (startblock
> current
->last
->ex_startblock
) {
297 current
->last
->next
= ext
;
303 * scan, to find the proper location for new entry.
304 * this scan is *very* expensive and gets worse with
305 * with increasing entries.
307 top
= prev
= current
;
308 while (current
!= NULL
&&
309 startblock
> current
->ex_startblock
) {
311 current
= current
->next
;
314 if (top
== current
) {
317 * new entry should be ahead of current.
318 * to keep the avl tree intact,
319 * swap the values of to-be-inserted element
320 * and the values of the head of the list.
321 * then insert as the 2nd element on the list.
323 * see the comment in get_bcnt_extent()
324 * as to why we have to do this.
326 tmp_startblock
= top
->ex_startblock
;
327 tmp_blockcount
= top
->ex_blockcount
;
328 tmp_state
= top
->ex_state
;
330 top
->ex_startblock
= ext
->ex_startblock
;
331 top
->ex_blockcount
= ext
->ex_blockcount
;
332 top
->ex_state
= ext
->ex_state
;
334 ext
->ex_startblock
= tmp_startblock
;
335 ext
->ex_blockcount
= tmp_blockcount
;
336 ext
->ex_state
= tmp_state
;
348 if (avl_insert(extent_bcnt_ptrs
[agno
], (avlnode_t
*) ext
) == NULL
) {
349 do_error(_(": duplicate bno extent range\n"));
352 ext
->last
= ext
; /* ext is an "anchor" node */
358 findfirst_bcnt_extent(xfs_agnumber_t agno
)
360 ASSERT(extent_bcnt_ptrs
!= NULL
);
361 ASSERT(extent_bcnt_ptrs
[agno
] != NULL
);
363 return((extent_tree_node_t
*) extent_bcnt_ptrs
[agno
]->avl_firstino
);
367 findbiggest_bcnt_extent(xfs_agnumber_t agno
)
369 ASSERT(extent_bcnt_ptrs
!= NULL
);
370 ASSERT(extent_bcnt_ptrs
[agno
] != NULL
);
372 return((extent_tree_node_t
*) avl_lastino(extent_bcnt_ptrs
[agno
]->avl_root
));
376 findnext_bcnt_extent(xfs_agnumber_t agno
, extent_tree_node_t
*ext
)
380 if (ext
->next
!= NULL
) {
381 ASSERT(ext
->ex_blockcount
== ext
->next
->ex_blockcount
);
382 ASSERT(ext
->ex_startblock
< ext
->next
->ex_startblock
);
386 * have to look at the top of the list to get the
387 * correct avl_nextino pointer since that pointer
388 * is maintained and altered by the AVL code.
390 nextino
= avl_find(extent_bcnt_ptrs
[agno
], ext
->ex_blockcount
);
391 ASSERT(nextino
!= NULL
);
392 if (nextino
->avl_nextino
!= NULL
) {
393 ASSERT(ext
->ex_blockcount
< ((extent_tree_node_t
*)
394 nextino
->avl_nextino
)->ex_blockcount
);
396 return((extent_tree_node_t
*) nextino
->avl_nextino
);
401 * this is meant to be called after you walk the bno tree to
402 * determine exactly which extent you want (so you'll know the
403 * desired value for startblock when you call this routine).
406 get_bcnt_extent(xfs_agnumber_t agno
, xfs_agblock_t startblock
,
407 xfs_extlen_t blockcount
)
409 extent_tree_node_t
*ext
, *prev
, *top
;
410 xfs_agblock_t tmp_startblock
;
411 xfs_extlen_t tmp_blockcount
;
412 extent_state_t tmp_state
;
415 ASSERT(extent_bcnt_ptrs
!= NULL
);
416 ASSERT(extent_bcnt_ptrs
[agno
] != NULL
);
418 if ((ext
= (extent_tree_node_t
*) avl_find(extent_bcnt_ptrs
[agno
],
419 blockcount
)) == NULL
)
424 if (ext
->next
!= NULL
) {
426 * pull it off the list
428 while (ext
!= NULL
&& startblock
!= ext
->ex_startblock
) {
435 * this node is linked into the tree so we
436 * swap the core values so we can delete
437 * the next item on the list instead of
438 * the head of the list. This is because
439 * the rest of the tree undoubtedly has
440 * pointers to the piece of memory that
441 * is the head of the list so pulling
442 * the item out of the list and hence
443 * the avl tree would be a bad idea.
445 * (cheaper than the alternative, a tree
446 * delete of this node followed by a tree
447 * insert of the next node on the list).
449 tmp_startblock
= ext
->next
->ex_startblock
;
450 tmp_blockcount
= ext
->next
->ex_blockcount
;
451 tmp_state
= ext
->next
->ex_state
;
453 ext
->next
->ex_startblock
= ext
->ex_startblock
;
454 ext
->next
->ex_blockcount
= ext
->ex_blockcount
;
455 ext
->next
->ex_state
= ext
->ex_state
;
457 ext
->ex_startblock
= tmp_startblock
;
458 ext
->ex_blockcount
= tmp_blockcount
;
459 ext
->ex_state
= tmp_state
;
465 * now, a simple list deletion
467 prev
->next
= ext
->next
;
471 * no list, just one node. simply delete
473 avl_delete(extent_bcnt_ptrs
[agno
], &ext
->avl_node
);
476 ASSERT(ext
->ex_startblock
== startblock
);
477 ASSERT(ext
->ex_blockcount
== blockcount
);
482 avl_ext_start(avlnode_t
*node
)
485 ((extent_tree_node_t
*) node
)->ex_startblock
);
489 avl_ext_end(avlnode_t
*node
)
492 ((extent_tree_node_t
*) node
)->ex_startblock
+
493 ((extent_tree_node_t
*) node
)->ex_blockcount
));
497 * convert size to an address for the AVL tree code -- the bigger the size,
498 * the lower the address so the biggest extent will be first in the tree
501 avl_ext_bcnt_start(avlnode_t
*node
)
504 return((uintptr_t) (BCNT_ADDR(((extent_tree_node_t *)
505 node)->ex_blockcount)));
507 return((uintptr_t) ((extent_tree_node_t
*)node
)->ex_blockcount
);
511 avl_ext_bcnt_end(avlnode_t
*node
)
514 return((uintptr_t) (BCNT_ADDR(((extent_tree_node_t *)
515 node)->ex_blockcount)));
517 return((uintptr_t) ((extent_tree_node_t
*)node
)->ex_blockcount
);
520 static avlops_t avl_extent_bcnt_tree_ops
= {
525 static avlops_t avl_extent_tree_ops
= {
531 * for real-time extents -- have to dup code since realtime extent
532 * startblocks can be 64-bit values.
534 static rt_extent_tree_node_t
*
535 mk_rt_extent_tree_nodes(xfs_rtblock_t new_startblock
,
536 xfs_extlen_t new_blockcount
, extent_state_t new_state
)
538 rt_extent_tree_node_t
*new;
540 new = malloc(sizeof(*new));
542 do_error(_("couldn't allocate new extent descriptor.\n"));
544 new->avl_node
.avl_nextino
= NULL
;
545 new->rt_startblock
= new_startblock
;
546 new->rt_blockcount
= new_blockcount
;
547 new->rt_state
= new_state
;
553 release_rt_extent_tree_node(rt_extent_tree_node_t
*node
)
559 release_rt_extent_tree()
561 extent_tree_node_t
*ext
;
562 extent_tree_node_t
*tmp
;
563 extent_tree_node_t
*lext
;
564 extent_tree_node_t
*ltmp
;
565 avl64tree_desc_t
*tree
;
567 tree
= rt_extent_tree_ptr
;
569 if (tree
->avl_firstino
== NULL
)
572 ext
= (extent_tree_node_t
*) tree
->avl_firstino
;
574 while (ext
!= NULL
) {
575 tmp
= (extent_tree_node_t
*) ext
->avl_node
.avl_nextino
;
576 release_rt_extent_tree_node(ext
);
580 tree
->avl_root
= tree
->avl_firstino
= NULL
;
587 * don't need release functions for realtime tree teardown
588 * since we only have one tree, not one per AG
592 free_rt_dup_extent_tree(xfs_mount_t
*mp
)
594 ASSERT(mp
->m_sb
.sb_rblocks
!= 0);
595 free(rt_ext_tree_ptr
);
596 rt_ext_tree_ptr
= NULL
;
600 * add a duplicate real-time extent
603 add_rt_dup_extent(xfs_rtblock_t startblock
, xfs_extlen_t blockcount
)
605 rt_extent_tree_node_t
*first
, *last
, *ext
, *next_ext
;
606 xfs_rtblock_t new_startblock
;
607 xfs_extlen_t new_blockcount
;
609 pthread_mutex_lock(&rt_ext_tree_lock
);
610 avl64_findranges(rt_ext_tree_ptr
, startblock
- 1,
611 startblock
+ blockcount
+ 1,
612 (avl64node_t
**) &first
, (avl64node_t
**) &last
);
614 * find adjacent and overlapping extent blocks
616 if (first
== NULL
&& last
== NULL
) {
617 /* nothing, just make and insert new extent */
619 ext
= mk_rt_extent_tree_nodes(startblock
,
620 blockcount
, XR_E_MULT
);
622 if (avl64_insert(rt_ext_tree_ptr
,
623 (avl64node_t
*) ext
) == NULL
) {
624 do_error(_("duplicate extent range\n"));
627 pthread_mutex_unlock(&rt_ext_tree_lock
);
631 ASSERT(first
!= NULL
&& last
!= NULL
);
634 * find the new composite range, delete old extent nodes
637 new_startblock
= startblock
;
638 new_blockcount
= blockcount
;
641 ext
!= (rt_extent_tree_node_t
*) last
->avl_node
.avl_nextino
;
644 * preserve the next inorder node
646 next_ext
= (rt_extent_tree_node_t
*) ext
->avl_node
.avl_nextino
;
648 * just bail if the new extent is contained within an old one
650 if (ext
->rt_startblock
<= startblock
&&
651 ext
->rt_blockcount
>= blockcount
) {
652 pthread_mutex_unlock(&rt_ext_tree_lock
);
656 * now check for overlaps and adjacent extents
658 if (ext
->rt_startblock
+ ext
->rt_blockcount
>= startblock
659 || ext
->rt_startblock
<= startblock
+ blockcount
) {
661 if (ext
->rt_startblock
< new_startblock
)
662 new_startblock
= ext
->rt_startblock
;
664 if (ext
->rt_startblock
+ ext
->rt_blockcount
>
665 new_startblock
+ new_blockcount
)
666 new_blockcount
= ext
->rt_startblock
+
670 avl64_delete(rt_ext_tree_ptr
, (avl64node_t
*) ext
);
675 ext
= mk_rt_extent_tree_nodes(new_startblock
,
676 new_blockcount
, XR_E_MULT
);
678 if (avl64_insert(rt_ext_tree_ptr
, (avl64node_t
*) ext
) == NULL
) {
679 do_error(_("duplicate extent range\n"));
682 pthread_mutex_unlock(&rt_ext_tree_lock
);
687 * returns 1 if block is a dup, 0 if not
691 search_rt_dup_extent(xfs_mount_t
*mp
, xfs_rtblock_t bno
)
695 pthread_mutex_lock(&rt_ext_tree_lock
);
696 if (avl64_findrange(rt_ext_tree_ptr
, bno
) != NULL
)
700 pthread_mutex_unlock(&rt_ext_tree_lock
);
705 avl64_rt_ext_start(avl64node_t
*node
)
707 return(((rt_extent_tree_node_t
*) node
)->rt_startblock
);
711 avl64_ext_end(avl64node_t
*node
)
713 return(((rt_extent_tree_node_t
*) node
)->rt_startblock
+
714 ((rt_extent_tree_node_t
*) node
)->rt_blockcount
);
717 static avl64ops_t avl64_extent_tree_ops
= {
723 incore_ext_init(xfs_mount_t
*mp
)
726 xfs_agnumber_t agcount
= mp
->m_sb
.sb_agcount
;
728 pthread_mutex_init(&rt_ext_tree_lock
, NULL
);
730 dup_extent_trees
= calloc(agcount
, sizeof(struct btree_root
*));
731 if (!dup_extent_trees
)
732 do_error(_("couldn't malloc dup extent tree descriptor table\n"));
734 dup_extent_tree_locks
= calloc(agcount
, sizeof(pthread_mutex_t
));
735 if (!dup_extent_tree_locks
)
736 do_error(_("couldn't malloc dup extent tree descriptor table\n"));
738 if ((extent_bno_ptrs
= malloc(agcount
*
739 sizeof(avltree_desc_t
*))) == NULL
)
741 _("couldn't malloc free by-bno extent tree descriptor table\n"));
743 if ((extent_bcnt_ptrs
= malloc(agcount
*
744 sizeof(avltree_desc_t
*))) == NULL
)
746 _("couldn't malloc free by-bcnt extent tree descriptor table\n"));
748 for (i
= 0; i
< agcount
; i
++) {
749 if ((extent_bno_ptrs
[i
] =
750 malloc(sizeof(avltree_desc_t
))) == NULL
)
752 _("couldn't malloc bno extent tree descriptor\n"));
753 if ((extent_bcnt_ptrs
[i
] =
754 malloc(sizeof(avltree_desc_t
))) == NULL
)
756 _("couldn't malloc bcnt extent tree descriptor\n"));
759 for (i
= 0; i
< agcount
; i
++) {
760 btree_init(&dup_extent_trees
[i
]);
761 pthread_mutex_init(&dup_extent_tree_locks
[i
], NULL
);
762 avl_init_tree(extent_bno_ptrs
[i
], &avl_extent_tree_ops
);
763 avl_init_tree(extent_bcnt_ptrs
[i
], &avl_extent_bcnt_tree_ops
);
766 if ((rt_ext_tree_ptr
= malloc(sizeof(avl64tree_desc_t
))) == NULL
)
767 do_error(_("couldn't malloc dup rt extent tree descriptor\n"));
769 avl64_init_tree(rt_ext_tree_ptr
, &avl64_extent_tree_ops
);
773 * this routine actually frees all the memory used to track per-AG trees
776 incore_ext_teardown(xfs_mount_t
*mp
)
780 for (i
= 0; i
< mp
->m_sb
.sb_agcount
; i
++) {
781 btree_destroy(dup_extent_trees
[i
]);
782 free(extent_bno_ptrs
[i
]);
783 free(extent_bcnt_ptrs
[i
]);
786 free(dup_extent_trees
);
787 free(extent_bcnt_ptrs
);
788 free(extent_bno_ptrs
);
790 dup_extent_trees
= NULL
;
791 extent_bcnt_ptrs
= NULL
;
792 extent_bno_ptrs
= NULL
;
796 count_extents(xfs_agnumber_t agno
, avltree_desc_t
*tree
, int whichtree
)
798 extent_tree_node_t
*node
;
801 node
= (extent_tree_node_t
*) tree
->avl_firstino
;
803 while (node
!= NULL
) {
806 node
= findnext_bcnt_extent(agno
, node
);
808 node
= findnext_bno_extent(node
);
815 count_bno_extents_blocks(xfs_agnumber_t agno
, uint
*numblocks
)
818 extent_tree_node_t
*node
;
821 ASSERT(agno
< glob_agcount
);
825 node
= (extent_tree_node_t
*) extent_bno_ptrs
[agno
]->avl_firstino
;
827 while (node
!= NULL
) {
828 nblocks
+= node
->ex_blockcount
;
830 node
= findnext_bno_extent(node
);
833 *numblocks
= nblocks
;
838 count_bno_extents(xfs_agnumber_t agno
)
840 ASSERT(agno
< glob_agcount
);
841 return(count_extents(agno
, extent_bno_ptrs
[agno
], 0));
845 count_bcnt_extents(xfs_agnumber_t agno
)
847 ASSERT(agno
< glob_agcount
);
848 return(count_extents(agno
, extent_bcnt_ptrs
[agno
], 1));