1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2020 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
7 #include "err_protos.h"
8 #include "libfrog/bitmap.h"
15 /* Initialize a btree rebuild context. */
18 struct repair_ctx
*sc
,
19 const struct xfs_owner_info
*oinfo
,
20 xfs_agblock_t free_space
,
21 struct bt_rebuild
*btr
)
23 memset(btr
, 0, sizeof(struct bt_rebuild
));
25 bulkload_init_ag(&btr
->newbt
, sc
, oinfo
);
26 bulkload_estimate_ag_slack(sc
, &btr
->bload
, free_space
);
30 * Update this free space record to reflect the blocks we stole from the
31 * beginning of the record.
36 struct extent_tree_node
*ext_ptr
,
39 struct extent_tree_node
*bno_ext_ptr
;
40 xfs_agblock_t new_start
= ext_ptr
->ex_startblock
+ len
;
41 xfs_extlen_t new_len
= ext_ptr
->ex_blockcount
- len
;
43 /* Delete the used-up extent from both extent trees. */
44 #ifdef XR_BLD_FREE_TRACE
45 fprintf(stderr
, "releasing extent: %u [%u %u]\n", agno
,
46 ext_ptr
->ex_startblock
, ext_ptr
->ex_blockcount
);
48 bno_ext_ptr
= find_bno_extent(agno
, ext_ptr
->ex_startblock
);
49 ASSERT(bno_ext_ptr
!= NULL
);
50 get_bno_extent(agno
, bno_ext_ptr
);
51 release_extent_tree_node(bno_ext_ptr
);
53 ext_ptr
= get_bcnt_extent(agno
, ext_ptr
->ex_startblock
,
54 ext_ptr
->ex_blockcount
);
55 release_extent_tree_node(ext_ptr
);
58 * If we only used part of this last extent, then we must reinsert the
59 * extent to maintain proper sorting order.
62 add_bno_extent(agno
, new_start
, new_len
);
63 add_bcnt_extent(agno
, new_start
, new_len
);
68 * Reserve blocks for the new per-AG structures. Returns true if all blocks
69 * were allocated, and false if we ran out of space.
75 struct bt_rebuild
*btr
,
78 struct extent_tree_node
*ext_ptr
;
79 uint32_t blocks_allocated
= 0;
83 while (blocks_allocated
< nr_blocks
) {
87 * Grab the smallest extent and use it up, then get the
88 * next smallest. This mimics the init_*_cursor code.
90 ext_ptr
= findfirst_bcnt_extent(agno
);
94 /* Use up the extent we've got. */
95 len
= min(ext_ptr
->ex_blockcount
, nr_blocks
- blocks_allocated
);
96 fsbno
= XFS_AGB_TO_FSB(mp
, agno
, ext_ptr
->ex_startblock
);
97 error
= bulkload_add_blocks(&btr
->newbt
, fsbno
, len
);
99 do_error(_("could not set up btree reservation: %s\n"),
102 error
= rmap_add_ag_rec(mp
, agno
, ext_ptr
->ex_startblock
, len
,
103 btr
->newbt
.oinfo
.oi_owner
);
105 do_error(_("could not set up btree rmaps: %s\n"),
108 consume_freespace(agno
, ext_ptr
, len
);
109 blocks_allocated
+= len
;
111 #ifdef XR_BLD_FREE_TRACE
112 fprintf(stderr
, "blocks_allocated = %d\n",
115 return blocks_allocated
== nr_blocks
;
120 struct xfs_mount
*mp
,
122 struct bt_rebuild
*btr
,
125 if (!reserve_agblocks(mp
, agno
, btr
, nr_blocks
))
127 _("error - not enough free space in filesystem, AG %u\n"),
131 /* Feed one of the new btree blocks to the bulk loader. */
134 struct xfs_btree_cur
*cur
,
135 union xfs_btree_ptr
*ptr
,
138 struct bt_rebuild
*btr
= priv
;
140 return bulkload_claim_block(cur
, &btr
->newbt
, ptr
);
144 * Scoop up leftovers from a rebuild cursor for later freeing, then free the
149 struct xfs_mount
*mp
,
150 struct bt_rebuild
*btr
,
151 struct bitmap
*lost_blocks
)
153 struct bulkload_resv
*resv
, *n
;
156 for_each_bulkload_reservation(&btr
->newbt
, resv
, n
) {
157 if (resv
->used
== resv
->len
)
160 error
= bitmap_set(lost_blocks
, resv
->fsbno
+ resv
->used
,
161 resv
->len
- resv
->used
);
164 _("Insufficient memory saving lost blocks, err=%d.\n"), error
);
165 resv
->used
= resv
->len
;
168 bulkload_destroy(&btr
->newbt
, 0);
174 * We need to leave some free records in the tree for the corner case of
175 * setting up the AGFL. This may require allocation of blocks, and as
176 * such can require insertion of new records into the tree (e.g. moving
177 * a record in the by-count tree when a long extent is shortened). If we
178 * pack the records into the leaves with no slack space, this requires a
179 * leaf split to occur and a block to be allocated from the free list.
180 * If we don't have any blocks on the free list (because we are setting
181 * it up!), then we fail, and the filesystem will fail with the same
182 * failure at runtime. Hence leave a couple of records slack space in
183 * each block to allow immediate modification of the tree without
184 * requiring splits to be done.
188 * Return the next free space extent tree record from the previous value we
191 static inline struct extent_tree_node
*
193 struct xfs_btree_cur
*cur
,
194 struct extent_tree_node
*prev_value
)
196 xfs_agnumber_t agno
= cur
->bc_ag
.agno
;
198 if (cur
->bc_btnum
== XFS_BTNUM_BNO
) {
200 return findfirst_bno_extent(agno
);
201 return findnext_bno_extent(prev_value
);
206 return findfirst_bcnt_extent(agno
);
207 return findnext_bcnt_extent(agno
, prev_value
);
210 /* Grab one bnobt record and put it in the btree cursor. */
213 struct xfs_btree_cur
*cur
,
216 struct bt_rebuild
*btr
= priv
;
217 struct xfs_alloc_rec_incore
*arec
= &cur
->bc_rec
.a
;
219 btr
->bno_rec
= get_bno_rec(cur
, btr
->bno_rec
);
220 arec
->ar_startblock
= btr
->bno_rec
->ex_startblock
;
221 arec
->ar_blockcount
= btr
->bno_rec
->ex_blockcount
;
222 btr
->freeblks
+= btr
->bno_rec
->ex_blockcount
;
227 init_freespace_cursors(
228 struct repair_ctx
*sc
,
230 unsigned int free_space
,
231 unsigned int *nr_extents
,
233 struct bt_rebuild
*btr_bno
,
234 struct bt_rebuild
*btr_cnt
)
236 unsigned int agfl_goal
;
239 agfl_goal
= libxfs_alloc_min_freelist(sc
->mp
, NULL
);
241 init_rebuild(sc
, &XFS_RMAP_OINFO_AG
, free_space
, btr_bno
);
242 init_rebuild(sc
, &XFS_RMAP_OINFO_AG
, free_space
, btr_cnt
);
244 btr_bno
->cur
= libxfs_allocbt_stage_cursor(sc
->mp
,
245 &btr_bno
->newbt
.afake
, agno
, XFS_BTNUM_BNO
);
246 btr_cnt
->cur
= libxfs_allocbt_stage_cursor(sc
->mp
,
247 &btr_cnt
->newbt
.afake
, agno
, XFS_BTNUM_CNT
);
249 btr_bno
->bload
.get_record
= get_bnobt_record
;
250 btr_bno
->bload
.claim_block
= rebuild_claim_block
;
252 btr_cnt
->bload
.get_record
= get_bnobt_record
;
253 btr_cnt
->bload
.claim_block
= rebuild_claim_block
;
256 * Now we need to allocate blocks for the free space btrees using the
257 * free space records we're about to put in them. Every record we use
258 * can change the shape of the free space trees, so we recompute the
259 * btree shape until we stop needing /more/ blocks. If we have any
260 * left over we'll stash them in the AGFL when we're done.
263 unsigned int num_freeblocks
;
264 int delta_bno
, delta_cnt
;
267 /* Compute how many bnobt blocks we'll need. */
268 error
= -libxfs_btree_bload_compute_geometry(btr_bno
->cur
,
269 &btr_bno
->bload
, *nr_extents
);
272 _("Unable to compute free space by block btree geometry, error %d.\n"), -error
);
274 /* Compute how many cntbt blocks we'll need. */
275 error
= -libxfs_btree_bload_compute_geometry(btr_cnt
->cur
,
276 &btr_cnt
->bload
, *nr_extents
);
279 _("Unable to compute free space by length btree geometry, error %d.\n"), -error
);
282 * Compute the deficit between the number of blocks reserved
283 * and the number of blocks we think we need for the btree.
285 delta_bno
= (int)btr_bno
->newbt
.nr_reserved
-
286 btr_bno
->bload
.nr_blocks
;
287 delta_cnt
= (int)btr_cnt
->newbt
.nr_reserved
-
288 btr_cnt
->bload
.nr_blocks
;
290 /* We don't need any more blocks, so we're done. */
291 if (delta_bno
>= 0 && delta_cnt
>= 0 &&
292 delta_bno
+ delta_cnt
>= agfl_goal
) {
293 *extra_blocks
= delta_bno
+ delta_cnt
;
297 /* Allocate however many more blocks we need this time. */
299 reserve_btblocks(sc
->mp
, agno
, btr_bno
, -delta_bno
);
303 reserve_btblocks(sc
->mp
, agno
, btr_cnt
, -delta_cnt
);
308 * Try to fill the bnobt cursor with extra blocks to populate
309 * the AGFL. If we don't get all the blocks we want, stop
310 * trying to fill the AGFL because the AG is totally out of
313 agfl_wanted
= agfl_goal
- (delta_bno
+ delta_cnt
);
314 if (agfl_wanted
> 0 &&
315 !reserve_agblocks(sc
->mp
, agno
, btr_bno
, agfl_wanted
))
318 /* Ok, now how many free space records do we have? */
319 *nr_extents
= count_bno_extents_blocks(agno
, &num_freeblocks
);
323 /* Rebuild the free space btrees. */
325 build_freespace_btrees(
326 struct repair_ctx
*sc
,
328 struct bt_rebuild
*btr_bno
,
329 struct bt_rebuild
*btr_cnt
)
333 /* Add all observed bnobt records. */
334 error
= -libxfs_btree_bload(btr_bno
->cur
, &btr_bno
->bload
, btr_bno
);
337 _("Error %d while creating bnobt btree for AG %u.\n"), error
, agno
);
339 /* Add all observed cntbt records. */
340 error
= -libxfs_btree_bload(btr_cnt
->cur
, &btr_cnt
->bload
, btr_cnt
);
343 _("Error %d while creating cntbt btree for AG %u.\n"), error
, agno
);
345 /* Since we're not writing the AGF yet, no need to commit the cursor */
346 libxfs_btree_del_cursor(btr_bno
->cur
, 0);
347 libxfs_btree_del_cursor(btr_cnt
->cur
, 0);
352 static inline struct ino_tree_node
*
354 struct xfs_btree_cur
*cur
,
355 struct ino_tree_node
*prev_value
)
357 xfs_agnumber_t agno
= cur
->bc_ag
.agno
;
359 if (cur
->bc_btnum
== XFS_BTNUM_INO
) {
361 return findfirst_inode_rec(agno
);
362 return next_ino_rec(prev_value
);
367 return findfirst_free_inode_rec(agno
);
368 return next_free_ino_rec(prev_value
);
371 /* Grab one inobt record. */
374 struct xfs_btree_cur
*cur
,
377 struct bt_rebuild
*btr
= priv
;
378 struct xfs_inobt_rec_incore
*irec
= &cur
->bc_rec
.i
;
379 struct ino_tree_node
*ino_rec
;
384 btr
->ino_rec
= ino_rec
= get_ino_rec(cur
, btr
->ino_rec
);
386 /* Transform the incore record into an on-disk record. */
387 irec
->ir_startino
= ino_rec
->ino_startnum
;
388 irec
->ir_free
= ino_rec
->ir_free
;
390 for (k
= 0; k
< sizeof(xfs_inofree_t
) * NBBY
; k
++) {
391 ASSERT(is_inode_confirmed(ino_rec
, k
));
393 if (is_inode_sparse(ino_rec
, k
))
395 if (is_inode_free(ino_rec
, k
))
400 irec
->ir_count
= inocnt
;
401 irec
->ir_freecount
= finocnt
;
403 if (xfs_sb_version_hassparseinodes(&cur
->bc_mp
->m_sb
)) {
409 * Convert the 64-bit in-core sparse inode state to the
410 * 16-bit on-disk holemask.
413 spmask
= (1 << XFS_INODES_PER_HOLEMASK_BIT
) - 1;
414 sparse
= ino_rec
->ir_sparse
;
415 for (k
= 0; k
< XFS_INOBT_HOLEMASK_BITS
; k
++) {
416 if (sparse
& spmask
) {
417 ASSERT((sparse
& spmask
) == spmask
);
418 holemask
|= (1 << k
);
420 ASSERT((sparse
& spmask
) == 0);
421 sparse
>>= XFS_INODES_PER_HOLEMASK_BIT
;
424 irec
->ir_holemask
= holemask
;
426 irec
->ir_holemask
= 0;
429 if (btr
->first_agino
== NULLAGINO
)
430 btr
->first_agino
= ino_rec
->ino_startnum
;
431 btr
->freecount
+= finocnt
;
432 btr
->count
+= inocnt
;
436 /* Initialize both inode btree cursors as needed. */
439 struct repair_ctx
*sc
,
441 unsigned int free_space
,
443 uint64_t *num_free_inos
,
444 struct bt_rebuild
*btr_ino
,
445 struct bt_rebuild
*btr_fino
)
447 struct ino_tree_node
*ino_rec
;
448 unsigned int ino_recs
= 0;
449 unsigned int fino_recs
= 0;
453 finobt
= xfs_sb_version_hasfinobt(&sc
->mp
->m_sb
);
454 init_rebuild(sc
, &XFS_RMAP_OINFO_INOBT
, free_space
, btr_ino
);
456 /* Compute inode statistics. */
459 for (ino_rec
= findfirst_inode_rec(agno
);
461 ino_rec
= next_ino_rec(ino_rec
)) {
462 unsigned int rec_ninos
= 0;
463 unsigned int rec_nfinos
= 0;
466 for (i
= 0; i
< XFS_INODES_PER_CHUNK
; i
++) {
467 ASSERT(is_inode_confirmed(ino_rec
, i
));
469 * sparse inodes are not factored into superblock (free)
472 if (is_inode_sparse(ino_rec
, i
))
474 if (is_inode_free(ino_rec
, i
))
479 *num_free_inos
+= rec_nfinos
;
480 *num_inos
+= rec_ninos
;
483 /* finobt only considers records with free inodes */
488 btr_ino
->cur
= libxfs_inobt_stage_cursor(sc
->mp
, &btr_ino
->newbt
.afake
,
489 agno
, XFS_BTNUM_INO
);
491 btr_ino
->bload
.get_record
= get_inobt_record
;
492 btr_ino
->bload
.claim_block
= rebuild_claim_block
;
493 btr_ino
->first_agino
= NULLAGINO
;
495 /* Compute how many inobt blocks we'll need. */
496 error
= -libxfs_btree_bload_compute_geometry(btr_ino
->cur
,
497 &btr_ino
->bload
, ino_recs
);
500 _("Unable to compute inode btree geometry, error %d.\n"), error
);
502 reserve_btblocks(sc
->mp
, agno
, btr_ino
, btr_ino
->bload
.nr_blocks
);
507 init_rebuild(sc
, &XFS_RMAP_OINFO_INOBT
, free_space
, btr_fino
);
508 btr_fino
->cur
= libxfs_inobt_stage_cursor(sc
->mp
,
509 &btr_fino
->newbt
.afake
, agno
, XFS_BTNUM_FINO
);
511 btr_fino
->bload
.get_record
= get_inobt_record
;
512 btr_fino
->bload
.claim_block
= rebuild_claim_block
;
513 btr_fino
->first_agino
= NULLAGINO
;
515 /* Compute how many finobt blocks we'll need. */
516 error
= -libxfs_btree_bload_compute_geometry(btr_fino
->cur
,
517 &btr_fino
->bload
, fino_recs
);
520 _("Unable to compute free inode btree geometry, error %d.\n"), error
);
522 reserve_btblocks(sc
->mp
, agno
, btr_fino
, btr_fino
->bload
.nr_blocks
);
525 /* Rebuild the inode btrees. */
528 struct repair_ctx
*sc
,
530 struct bt_rebuild
*btr_ino
,
531 struct bt_rebuild
*btr_fino
)
535 /* Add all observed inobt records. */
536 error
= -libxfs_btree_bload(btr_ino
->cur
, &btr_ino
->bload
, btr_ino
);
539 _("Error %d while creating inobt btree for AG %u.\n"), error
, agno
);
541 /* Since we're not writing the AGI yet, no need to commit the cursor */
542 libxfs_btree_del_cursor(btr_ino
->cur
, 0);
544 if (!xfs_sb_version_hasfinobt(&sc
->mp
->m_sb
))
547 /* Add all observed finobt records. */
548 error
= -libxfs_btree_bload(btr_fino
->cur
, &btr_fino
->bload
, btr_fino
);
551 _("Error %d while creating finobt btree for AG %u.\n"), error
, agno
);
553 /* Since we're not writing the AGI yet, no need to commit the cursor */
554 libxfs_btree_del_cursor(btr_fino
->cur
, 0);
557 /* rebuild the rmap tree */
559 /* Grab one rmap record. */
562 struct xfs_btree_cur
*cur
,
565 struct xfs_rmap_irec
*rec
;
566 struct bt_rebuild
*btr
= priv
;
568 rec
= pop_slab_cursor(btr
->slab_cursor
);
569 memcpy(&cur
->bc_rec
.r
, rec
, sizeof(struct xfs_rmap_irec
));
573 /* Set up the rmap rebuild parameters. */
576 struct repair_ctx
*sc
,
578 unsigned int free_space
,
579 struct bt_rebuild
*btr
)
583 if (!xfs_sb_version_hasrmapbt(&sc
->mp
->m_sb
))
586 init_rebuild(sc
, &XFS_RMAP_OINFO_AG
, free_space
, btr
);
587 btr
->cur
= libxfs_rmapbt_stage_cursor(sc
->mp
, &btr
->newbt
.afake
, agno
);
589 btr
->bload
.get_record
= get_rmapbt_record
;
590 btr
->bload
.claim_block
= rebuild_claim_block
;
592 /* Compute how many blocks we'll need. */
593 error
= -libxfs_btree_bload_compute_geometry(btr
->cur
, &btr
->bload
,
594 rmap_record_count(sc
->mp
, agno
));
597 _("Unable to compute rmap btree geometry, error %d.\n"), error
);
599 reserve_btblocks(sc
->mp
, agno
, btr
, btr
->bload
.nr_blocks
);
602 /* Rebuild a rmap btree. */
605 struct repair_ctx
*sc
,
607 struct bt_rebuild
*btr
)
611 error
= rmap_init_cursor(agno
, &btr
->slab_cursor
);
614 _("Insufficient memory to construct rmap cursor.\n"));
616 /* Add all observed rmap records. */
617 error
= -libxfs_btree_bload(btr
->cur
, &btr
->bload
, btr
);
620 _("Error %d while creating rmap btree for AG %u.\n"), error
, agno
);
622 /* Since we're not writing the AGF yet, no need to commit the cursor */
623 libxfs_btree_del_cursor(btr
->cur
, 0);
624 free_slab_cursor(&btr
->slab_cursor
);
627 /* rebuild the refcount tree */
629 /* Grab one refcount record. */
631 get_refcountbt_record(
632 struct xfs_btree_cur
*cur
,
635 struct xfs_refcount_irec
*rec
;
636 struct bt_rebuild
*btr
= priv
;
638 rec
= pop_slab_cursor(btr
->slab_cursor
);
639 memcpy(&cur
->bc_rec
.rc
, rec
, sizeof(struct xfs_refcount_irec
));
643 /* Set up the refcount rebuild parameters. */
646 struct repair_ctx
*sc
,
648 unsigned int free_space
,
649 struct bt_rebuild
*btr
)
653 if (!xfs_sb_version_hasreflink(&sc
->mp
->m_sb
))
656 init_rebuild(sc
, &XFS_RMAP_OINFO_REFC
, free_space
, btr
);
657 btr
->cur
= libxfs_refcountbt_stage_cursor(sc
->mp
, &btr
->newbt
.afake
,
660 btr
->bload
.get_record
= get_refcountbt_record
;
661 btr
->bload
.claim_block
= rebuild_claim_block
;
663 /* Compute how many blocks we'll need. */
664 error
= -libxfs_btree_bload_compute_geometry(btr
->cur
, &btr
->bload
,
665 refcount_record_count(sc
->mp
, agno
));
668 _("Unable to compute refcount btree geometry, error %d.\n"), error
);
670 reserve_btblocks(sc
->mp
, agno
, btr
, btr
->bload
.nr_blocks
);
673 /* Rebuild a refcount btree. */
676 struct repair_ctx
*sc
,
678 struct bt_rebuild
*btr
)
682 error
= init_refcount_cursor(agno
, &btr
->slab_cursor
);
685 _("Insufficient memory to construct refcount cursor.\n"));
687 /* Add all observed refcount records. */
688 error
= -libxfs_btree_bload(btr
->cur
, &btr
->bload
, btr
);
691 _("Error %d while creating refcount btree for AG %u.\n"), error
, agno
);
693 /* Since we're not writing the AGF yet, no need to commit the cursor */
694 libxfs_btree_del_cursor(btr
->cur
, 0);
695 free_slab_cursor(&btr
->slab_cursor
);