1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2020 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
7 #include "err_protos.h"
8 #include "libfrog/bitmap.h"
15 /* Initialize a btree rebuild context. */
18 struct repair_ctx
*sc
,
19 const struct xfs_owner_info
*oinfo
,
20 xfs_agblock_t est_agfreeblocks
,
21 struct bt_rebuild
*btr
)
23 memset(btr
, 0, sizeof(struct bt_rebuild
));
25 bulkload_init_ag(&btr
->newbt
, sc
, oinfo
);
26 bulkload_estimate_ag_slack(sc
, &btr
->bload
, est_agfreeblocks
);
30 * Update this free space record to reflect the blocks we stole from the
31 * beginning of the record.
36 struct extent_tree_node
*ext_ptr
,
39 struct extent_tree_node
*bno_ext_ptr
;
40 xfs_agblock_t new_start
= ext_ptr
->ex_startblock
+ len
;
41 xfs_extlen_t new_len
= ext_ptr
->ex_blockcount
- len
;
43 /* Delete the used-up extent from both extent trees. */
44 #ifdef XR_BLD_FREE_TRACE
45 fprintf(stderr
, "releasing extent: %u [%u %u]\n", agno
,
46 ext_ptr
->ex_startblock
, ext_ptr
->ex_blockcount
);
48 bno_ext_ptr
= find_bno_extent(agno
, ext_ptr
->ex_startblock
);
49 ASSERT(bno_ext_ptr
!= NULL
);
50 get_bno_extent(agno
, bno_ext_ptr
);
51 release_extent_tree_node(bno_ext_ptr
);
53 ext_ptr
= get_bcnt_extent(agno
, ext_ptr
->ex_startblock
,
54 ext_ptr
->ex_blockcount
);
55 release_extent_tree_node(ext_ptr
);
58 * If we only used part of this last extent, then we must reinsert the
59 * extent to maintain proper sorting order.
62 add_bno_extent(agno
, new_start
, new_len
);
63 add_bcnt_extent(agno
, new_start
, new_len
);
68 * Reserve blocks for the new per-AG structures. Returns true if all blocks
69 * were allocated, and false if we ran out of space.
75 struct bt_rebuild
*btr
,
78 struct extent_tree_node
*ext_ptr
;
79 uint32_t blocks_allocated
= 0;
83 while (blocks_allocated
< nr_blocks
) {
87 * Grab the smallest extent and use it up, then get the
88 * next smallest. This mimics the init_*_cursor code.
90 ext_ptr
= findfirst_bcnt_extent(agno
);
94 /* Use up the extent we've got. */
95 len
= min(ext_ptr
->ex_blockcount
, nr_blocks
- blocks_allocated
);
96 fsbno
= XFS_AGB_TO_FSB(mp
, agno
, ext_ptr
->ex_startblock
);
97 error
= bulkload_add_blocks(&btr
->newbt
, fsbno
, len
);
99 do_error(_("could not set up btree reservation: %s\n"),
102 error
= rmap_add_ag_rec(mp
, agno
, ext_ptr
->ex_startblock
, len
,
103 btr
->newbt
.oinfo
.oi_owner
);
105 do_error(_("could not set up btree rmaps: %s\n"),
108 consume_freespace(agno
, ext_ptr
, len
);
109 blocks_allocated
+= len
;
111 #ifdef XR_BLD_FREE_TRACE
112 fprintf(stderr
, "blocks_allocated = %d\n",
115 return blocks_allocated
== nr_blocks
;
120 struct xfs_mount
*mp
,
122 struct bt_rebuild
*btr
,
125 if (!reserve_agblocks(mp
, agno
, btr
, nr_blocks
))
127 _("error - not enough free space in filesystem, AG %u\n"),
131 /* Feed one of the new btree blocks to the bulk loader. */
134 struct xfs_btree_cur
*cur
,
135 union xfs_btree_ptr
*ptr
,
138 struct bt_rebuild
*btr
= priv
;
140 return bulkload_claim_block(cur
, &btr
->newbt
, ptr
);
144 * Scoop up leftovers from a rebuild cursor for later freeing, then free the
149 struct xfs_mount
*mp
,
150 struct bt_rebuild
*btr
,
151 struct bitmap
*lost_blocks
)
153 struct bulkload_resv
*resv
, *n
;
156 for_each_bulkload_reservation(&btr
->newbt
, resv
, n
) {
157 if (resv
->used
== resv
->len
)
160 error
= bitmap_set(lost_blocks
, resv
->fsbno
+ resv
->used
,
161 resv
->len
- resv
->used
);
164 _("Insufficient memory saving lost blocks, err=%d.\n"), error
);
165 resv
->used
= resv
->len
;
168 bulkload_destroy(&btr
->newbt
, 0);
174 * We need to leave some free records in the tree for the corner case of
175 * setting up the AGFL. This may require allocation of blocks, and as
176 * such can require insertion of new records into the tree (e.g. moving
177 * a record in the by-count tree when a long extent is shortened). If we
178 * pack the records into the leaves with no slack space, this requires a
179 * leaf split to occur and a block to be allocated from the free list.
180 * If we don't have any blocks on the free list (because we are setting
181 * it up!), then we fail, and the filesystem will fail with the same
182 * failure at runtime. Hence leave a couple of records slack space in
183 * each block to allow immediate modification of the tree without
184 * requiring splits to be done.
188 * Return the next free space extent tree record from the previous value we
191 static inline struct extent_tree_node
*
193 struct xfs_btree_cur
*cur
,
194 struct extent_tree_node
*prev_value
)
196 xfs_agnumber_t agno
= cur
->bc_ag
.pag
->pag_agno
;
198 if (cur
->bc_btnum
== XFS_BTNUM_BNO
) {
200 return findfirst_bno_extent(agno
);
201 return findnext_bno_extent(prev_value
);
206 return findfirst_bcnt_extent(agno
);
207 return findnext_bcnt_extent(agno
, prev_value
);
210 /* Grab one bnobt record and put it in the btree cursor. */
213 struct xfs_btree_cur
*cur
,
216 struct bt_rebuild
*btr
= priv
;
217 struct xfs_alloc_rec_incore
*arec
= &cur
->bc_rec
.a
;
219 btr
->bno_rec
= get_bno_rec(cur
, btr
->bno_rec
);
220 arec
->ar_startblock
= btr
->bno_rec
->ex_startblock
;
221 arec
->ar_blockcount
= btr
->bno_rec
->ex_blockcount
;
222 btr
->freeblks
+= btr
->bno_rec
->ex_blockcount
;
227 init_freespace_cursors(
228 struct repair_ctx
*sc
,
229 struct xfs_perag
*pag
,
230 unsigned int est_agfreeblocks
,
231 unsigned int *nr_extents
,
233 struct bt_rebuild
*btr_bno
,
234 struct bt_rebuild
*btr_cnt
)
236 xfs_agnumber_t agno
= pag
->pag_agno
;
237 unsigned int agfl_goal
;
240 agfl_goal
= libxfs_alloc_min_freelist(sc
->mp
, NULL
);
242 init_rebuild(sc
, &XFS_RMAP_OINFO_AG
, est_agfreeblocks
, btr_bno
);
243 init_rebuild(sc
, &XFS_RMAP_OINFO_AG
, est_agfreeblocks
, btr_cnt
);
245 btr_bno
->cur
= libxfs_allocbt_stage_cursor(sc
->mp
,
246 &btr_bno
->newbt
.afake
, pag
, XFS_BTNUM_BNO
);
247 btr_cnt
->cur
= libxfs_allocbt_stage_cursor(sc
->mp
,
248 &btr_cnt
->newbt
.afake
, pag
, XFS_BTNUM_CNT
);
250 btr_bno
->bload
.get_record
= get_bnobt_record
;
251 btr_bno
->bload
.claim_block
= rebuild_claim_block
;
253 btr_cnt
->bload
.get_record
= get_bnobt_record
;
254 btr_cnt
->bload
.claim_block
= rebuild_claim_block
;
257 * Now we need to allocate blocks for the free space btrees using the
258 * free space records we're about to put in them. Every record we use
259 * can change the shape of the free space trees, so we recompute the
260 * btree shape until we stop needing /more/ blocks. If we have any
261 * left over we'll stash them in the AGFL when we're done.
264 unsigned int num_freeblocks
;
265 int delta_bno
, delta_cnt
;
268 /* Compute how many bnobt blocks we'll need. */
269 error
= -libxfs_btree_bload_compute_geometry(btr_bno
->cur
,
270 &btr_bno
->bload
, *nr_extents
);
273 _("Unable to compute free space by block btree geometry, error %d.\n"), -error
);
275 /* Compute how many cntbt blocks we'll need. */
276 error
= -libxfs_btree_bload_compute_geometry(btr_cnt
->cur
,
277 &btr_cnt
->bload
, *nr_extents
);
280 _("Unable to compute free space by length btree geometry, error %d.\n"), -error
);
283 * Compute the deficit between the number of blocks reserved
284 * and the number of blocks we think we need for the btree.
286 delta_bno
= (int)btr_bno
->newbt
.nr_reserved
-
287 btr_bno
->bload
.nr_blocks
;
288 delta_cnt
= (int)btr_cnt
->newbt
.nr_reserved
-
289 btr_cnt
->bload
.nr_blocks
;
291 /* We don't need any more blocks, so we're done. */
292 if (delta_bno
>= 0 && delta_cnt
>= 0 &&
293 delta_bno
+ delta_cnt
>= agfl_goal
) {
294 *extra_blocks
= delta_bno
+ delta_cnt
;
298 /* Allocate however many more blocks we need this time. */
300 reserve_btblocks(sc
->mp
, agno
, btr_bno
, -delta_bno
);
304 reserve_btblocks(sc
->mp
, agno
, btr_cnt
, -delta_cnt
);
309 * Try to fill the bnobt cursor with extra blocks to populate
310 * the AGFL. If we don't get all the blocks we want, stop
311 * trying to fill the AGFL because the AG is totally out of
314 agfl_wanted
= agfl_goal
- (delta_bno
+ delta_cnt
);
315 if (agfl_wanted
> 0 &&
316 !reserve_agblocks(sc
->mp
, agno
, btr_bno
, agfl_wanted
))
319 /* Ok, now how many free space records do we have? */
320 *nr_extents
= count_bno_extents_blocks(agno
, &num_freeblocks
);
324 /* Rebuild the free space btrees. */
326 build_freespace_btrees(
327 struct repair_ctx
*sc
,
329 struct bt_rebuild
*btr_bno
,
330 struct bt_rebuild
*btr_cnt
)
334 /* Add all observed bnobt records. */
335 error
= -libxfs_btree_bload(btr_bno
->cur
, &btr_bno
->bload
, btr_bno
);
338 _("Error %d while creating bnobt btree for AG %u.\n"), error
, agno
);
340 /* Add all observed cntbt records. */
341 error
= -libxfs_btree_bload(btr_cnt
->cur
, &btr_cnt
->bload
, btr_cnt
);
344 _("Error %d while creating cntbt btree for AG %u.\n"), error
, agno
);
346 /* Since we're not writing the AGF yet, no need to commit the cursor */
347 libxfs_btree_del_cursor(btr_bno
->cur
, 0);
348 libxfs_btree_del_cursor(btr_cnt
->cur
, 0);
353 static inline struct ino_tree_node
*
355 struct xfs_btree_cur
*cur
,
356 struct ino_tree_node
*prev_value
)
358 xfs_agnumber_t agno
= cur
->bc_ag
.pag
->pag_agno
;
360 if (cur
->bc_btnum
== XFS_BTNUM_INO
) {
362 return findfirst_inode_rec(agno
);
363 return next_ino_rec(prev_value
);
368 return findfirst_free_inode_rec(agno
);
369 return next_free_ino_rec(prev_value
);
372 /* Grab one inobt record. */
375 struct xfs_btree_cur
*cur
,
378 struct bt_rebuild
*btr
= priv
;
379 struct xfs_inobt_rec_incore
*irec
= &cur
->bc_rec
.i
;
380 struct ino_tree_node
*ino_rec
;
385 btr
->ino_rec
= ino_rec
= get_ino_rec(cur
, btr
->ino_rec
);
387 /* Transform the incore record into an on-disk record. */
388 irec
->ir_startino
= ino_rec
->ino_startnum
;
389 irec
->ir_free
= ino_rec
->ir_free
;
391 for (k
= 0; k
< sizeof(xfs_inofree_t
) * NBBY
; k
++) {
392 ASSERT(is_inode_confirmed(ino_rec
, k
));
394 if (is_inode_sparse(ino_rec
, k
))
396 if (is_inode_free(ino_rec
, k
))
401 irec
->ir_count
= inocnt
;
402 irec
->ir_freecount
= finocnt
;
404 if (xfs_has_sparseinodes(cur
->bc_mp
)) {
410 * Convert the 64-bit in-core sparse inode state to the
411 * 16-bit on-disk holemask.
414 spmask
= (1 << XFS_INODES_PER_HOLEMASK_BIT
) - 1;
415 sparse
= ino_rec
->ir_sparse
;
416 for (k
= 0; k
< XFS_INOBT_HOLEMASK_BITS
; k
++) {
417 if (sparse
& spmask
) {
418 ASSERT((sparse
& spmask
) == spmask
);
419 holemask
|= (1 << k
);
421 ASSERT((sparse
& spmask
) == 0);
422 sparse
>>= XFS_INODES_PER_HOLEMASK_BIT
;
425 irec
->ir_holemask
= holemask
;
427 irec
->ir_holemask
= 0;
430 if (btr
->first_agino
== NULLAGINO
)
431 btr
->first_agino
= ino_rec
->ino_startnum
;
432 btr
->freecount
+= finocnt
;
433 btr
->count
+= inocnt
;
437 /* Initialize both inode btree cursors as needed. */
440 struct repair_ctx
*sc
,
441 struct xfs_perag
*pag
,
442 unsigned int est_agfreeblocks
,
444 uint64_t *num_free_inos
,
445 struct bt_rebuild
*btr_ino
,
446 struct bt_rebuild
*btr_fino
)
448 struct ino_tree_node
*ino_rec
;
449 xfs_agnumber_t agno
= pag
->pag_agno
;
450 unsigned int ino_recs
= 0;
451 unsigned int fino_recs
= 0;
455 finobt
= xfs_has_finobt(sc
->mp
);
456 init_rebuild(sc
, &XFS_RMAP_OINFO_INOBT
, est_agfreeblocks
, btr_ino
);
458 /* Compute inode statistics. */
461 for (ino_rec
= findfirst_inode_rec(agno
);
463 ino_rec
= next_ino_rec(ino_rec
)) {
464 unsigned int rec_ninos
= 0;
465 unsigned int rec_nfinos
= 0;
468 for (i
= 0; i
< XFS_INODES_PER_CHUNK
; i
++) {
469 ASSERT(is_inode_confirmed(ino_rec
, i
));
471 * sparse inodes are not factored into superblock (free)
474 if (is_inode_sparse(ino_rec
, i
))
476 if (is_inode_free(ino_rec
, i
))
481 *num_free_inos
+= rec_nfinos
;
482 *num_inos
+= rec_ninos
;
485 /* finobt only considers records with free inodes */
490 btr_ino
->cur
= libxfs_inobt_stage_cursor(pag
, &btr_ino
->newbt
.afake
,
493 btr_ino
->bload
.get_record
= get_inobt_record
;
494 btr_ino
->bload
.claim_block
= rebuild_claim_block
;
495 btr_ino
->first_agino
= NULLAGINO
;
497 /* Compute how many inobt blocks we'll need. */
498 error
= -libxfs_btree_bload_compute_geometry(btr_ino
->cur
,
499 &btr_ino
->bload
, ino_recs
);
502 _("Unable to compute inode btree geometry, error %d.\n"), error
);
504 reserve_btblocks(sc
->mp
, agno
, btr_ino
, btr_ino
->bload
.nr_blocks
);
509 init_rebuild(sc
, &XFS_RMAP_OINFO_INOBT
, est_agfreeblocks
, btr_fino
);
510 btr_fino
->cur
= libxfs_inobt_stage_cursor(pag
,
511 &btr_fino
->newbt
.afake
, XFS_BTNUM_FINO
);
513 btr_fino
->bload
.get_record
= get_inobt_record
;
514 btr_fino
->bload
.claim_block
= rebuild_claim_block
;
515 btr_fino
->first_agino
= NULLAGINO
;
517 /* Compute how many finobt blocks we'll need. */
518 error
= -libxfs_btree_bload_compute_geometry(btr_fino
->cur
,
519 &btr_fino
->bload
, fino_recs
);
522 _("Unable to compute free inode btree geometry, error %d.\n"), error
);
524 reserve_btblocks(sc
->mp
, agno
, btr_fino
, btr_fino
->bload
.nr_blocks
);
527 /* Rebuild the inode btrees. */
530 struct repair_ctx
*sc
,
532 struct bt_rebuild
*btr_ino
,
533 struct bt_rebuild
*btr_fino
)
537 /* Add all observed inobt records. */
538 error
= -libxfs_btree_bload(btr_ino
->cur
, &btr_ino
->bload
, btr_ino
);
541 _("Error %d while creating inobt btree for AG %u.\n"), error
, agno
);
543 /* Since we're not writing the AGI yet, no need to commit the cursor */
544 libxfs_btree_del_cursor(btr_ino
->cur
, 0);
546 if (!xfs_has_finobt(sc
->mp
))
549 /* Add all observed finobt records. */
550 error
= -libxfs_btree_bload(btr_fino
->cur
, &btr_fino
->bload
, btr_fino
);
553 _("Error %d while creating finobt btree for AG %u.\n"), error
, agno
);
555 /* Since we're not writing the AGI yet, no need to commit the cursor */
556 libxfs_btree_del_cursor(btr_fino
->cur
, 0);
559 /* rebuild the rmap tree */
561 /* Grab one rmap record. */
564 struct xfs_btree_cur
*cur
,
567 struct xfs_rmap_irec
*rec
;
568 struct bt_rebuild
*btr
= priv
;
570 rec
= pop_slab_cursor(btr
->slab_cursor
);
571 memcpy(&cur
->bc_rec
.r
, rec
, sizeof(struct xfs_rmap_irec
));
575 /* Set up the rmap rebuild parameters. */
578 struct repair_ctx
*sc
,
579 struct xfs_perag
*pag
,
580 unsigned int est_agfreeblocks
,
581 struct bt_rebuild
*btr
)
583 xfs_agnumber_t agno
= pag
->pag_agno
;
586 if (!xfs_has_rmapbt(sc
->mp
))
589 init_rebuild(sc
, &XFS_RMAP_OINFO_AG
, est_agfreeblocks
, btr
);
590 btr
->cur
= libxfs_rmapbt_stage_cursor(sc
->mp
, &btr
->newbt
.afake
, pag
);
592 btr
->bload
.get_record
= get_rmapbt_record
;
593 btr
->bload
.claim_block
= rebuild_claim_block
;
595 /* Compute how many blocks we'll need. */
596 error
= -libxfs_btree_bload_compute_geometry(btr
->cur
, &btr
->bload
,
597 rmap_record_count(sc
->mp
, agno
));
600 _("Unable to compute rmap btree geometry, error %d.\n"), error
);
602 reserve_btblocks(sc
->mp
, agno
, btr
, btr
->bload
.nr_blocks
);
605 /* Rebuild a rmap btree. */
608 struct repair_ctx
*sc
,
610 struct bt_rebuild
*btr
)
614 error
= rmap_init_cursor(agno
, &btr
->slab_cursor
);
617 _("Insufficient memory to construct rmap cursor.\n"));
619 /* Add all observed rmap records. */
620 error
= -libxfs_btree_bload(btr
->cur
, &btr
->bload
, btr
);
623 _("Error %d while creating rmap btree for AG %u.\n"), error
, agno
);
625 /* Since we're not writing the AGF yet, no need to commit the cursor */
626 libxfs_btree_del_cursor(btr
->cur
, 0);
627 free_slab_cursor(&btr
->slab_cursor
);
630 /* rebuild the refcount tree */
632 /* Grab one refcount record. */
634 get_refcountbt_record(
635 struct xfs_btree_cur
*cur
,
638 struct xfs_refcount_irec
*rec
;
639 struct bt_rebuild
*btr
= priv
;
641 rec
= pop_slab_cursor(btr
->slab_cursor
);
642 memcpy(&cur
->bc_rec
.rc
, rec
, sizeof(struct xfs_refcount_irec
));
646 /* Set up the refcount rebuild parameters. */
649 struct repair_ctx
*sc
,
650 struct xfs_perag
*pag
,
651 unsigned int est_agfreeblocks
,
652 struct bt_rebuild
*btr
)
654 xfs_agnumber_t agno
= pag
->pag_agno
;
657 if (!xfs_has_reflink(sc
->mp
))
660 init_rebuild(sc
, &XFS_RMAP_OINFO_REFC
, est_agfreeblocks
, btr
);
661 btr
->cur
= libxfs_refcountbt_stage_cursor(sc
->mp
, &btr
->newbt
.afake
,
664 btr
->bload
.get_record
= get_refcountbt_record
;
665 btr
->bload
.claim_block
= rebuild_claim_block
;
667 /* Compute how many blocks we'll need. */
668 error
= -libxfs_btree_bload_compute_geometry(btr
->cur
, &btr
->bload
,
669 refcount_record_count(sc
->mp
, agno
));
672 _("Unable to compute refcount btree geometry, error %d.\n"), error
);
674 reserve_btblocks(sc
->mp
, agno
, btr
, btr
->bload
.nr_blocks
);
677 /* Rebuild a refcount btree. */
680 struct repair_ctx
*sc
,
682 struct bt_rebuild
*btr
)
686 error
= init_refcount_cursor(agno
, &btr
->slab_cursor
);
689 _("Insufficient memory to construct refcount cursor.\n"));
691 /* Add all observed refcount records. */
692 error
= -libxfs_btree_bload(btr
->cur
, &btr
->bload
, btr
);
695 _("Error %d while creating refcount btree for AG %u.\n"), error
, agno
);
697 /* Since we're not writing the AGF yet, no need to commit the cursor */
698 libxfs_btree_del_cursor(btr
->cur
, 0);
699 free_slab_cursor(&btr
->slab_cursor
);
703 estimate_allocbt_blocks(
704 struct xfs_perag
*pag
,
705 unsigned int nr_extents
)
707 /* Account for space consumed by both free space btrees */
708 return libxfs_allocbt_calc_size(pag
->pag_mount
, nr_extents
) * 2;
712 estimate_inobt_blocks(
713 struct xfs_perag
*pag
)
715 struct ino_tree_node
*ino_rec
;
716 xfs_agnumber_t agno
= pag
->pag_agno
;
717 unsigned int ino_recs
= 0;
718 unsigned int fino_recs
= 0;
721 for (ino_rec
= findfirst_inode_rec(agno
);
723 ino_rec
= next_ino_rec(ino_rec
)) {
724 unsigned int rec_nfinos
= 0;
727 for (i
= 0; i
< XFS_INODES_PER_CHUNK
; i
++) {
728 ASSERT(is_inode_confirmed(ino_rec
, i
));
730 * sparse inodes are not factored into superblock (free)
733 if (is_inode_sparse(ino_rec
, i
))
735 if (is_inode_free(ino_rec
, i
))
741 /* finobt only considers records with free inodes */
746 ret
= libxfs_iallocbt_calc_size(pag
->pag_mount
, ino_recs
);
747 if (xfs_has_finobt(pag
->pag_mount
))
748 ret
+= libxfs_iallocbt_calc_size(pag
->pag_mount
, fino_recs
);
753 /* Estimate the size of the per-AG btrees. */
755 estimate_agbtree_blocks(
756 struct xfs_perag
*pag
,
757 unsigned int free_extents
)
759 unsigned int ret
= 0;
761 ret
+= estimate_allocbt_blocks(pag
, free_extents
);
762 ret
+= estimate_inobt_blocks(pag
);
763 ret
+= estimate_rmapbt_blocks(pag
);
764 ret
+= estimate_refcountbt_blocks(pag
);