]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/phase5.c
xfsprogs: Release v6.15.0
[thirdparty/xfsprogs-dev.git] / repair / phase5.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6
7 #include "libxfs.h"
8 #include "libfrog/bitmap.h"
9 #include "avl.h"
10 #include "globals.h"
11 #include "agheader.h"
12 #include "incore.h"
13 #include "protos.h"
14 #include "err_protos.h"
15 #include "dinode.h"
16 #include "rt.h"
17 #include "versions.h"
18 #include "threads.h"
19 #include "progress.h"
20 #include "slab.h"
21 #include "rmap.h"
22 #include "bulkload.h"
23 #include "agbtree.h"
24 #include "zoned.h"
25
26 static uint64_t *sb_icount_ag; /* allocated inodes per ag */
27 static uint64_t *sb_ifree_ag; /* free inodes per ag */
28 static uint64_t *sb_fdblocks_ag; /* free data blocks per ag */
29
30 static int
31 mk_incore_fstree(
32 struct xfs_mount *mp,
33 xfs_agnumber_t agno,
34 unsigned int *num_freeblocks)
35 {
36 int in_extent;
37 int num_extents;
38 xfs_agblock_t extent_start;
39 xfs_extlen_t extent_len;
40 xfs_agblock_t agbno;
41 xfs_agblock_t ag_end;
42 uint free_blocks;
43 xfs_extlen_t blen;
44 int bstate;
45
46 *num_freeblocks = 0;
47
48 /*
49 * scan the bitmap for the ag looking for continuous
50 * extents of free blocks. At this point, we know
51 * that blocks in the bitmap are either set to an
52 * "in use" state or set to unknown (0) since the
53 * bmaps were zero'ed in phase 4 and only blocks
54 * being used by inodes, inode bmaps, ag headers,
55 * and the files themselves were put into the bitmap.
56 *
57 */
58 ASSERT(agno < mp->m_sb.sb_agcount);
59
60 extent_start = extent_len = 0;
61 in_extent = 0;
62 num_extents = free_blocks = 0;
63
64 if (agno < mp->m_sb.sb_agcount - 1)
65 ag_end = mp->m_sb.sb_agblocks;
66 else
67 ag_end = mp->m_sb.sb_dblocks -
68 (xfs_rfsblock_t)mp->m_sb.sb_agblocks *
69 (mp->m_sb.sb_agcount - 1);
70
71 /*
72 * ok, now find the number of extents, keep track of the
73 * largest extent.
74 */
75 for (agbno = 0; agbno < ag_end; agbno += blen) {
76 bstate = get_bmap_ext(agno, agbno, ag_end, &blen, false);
77 if (bstate < XR_E_INUSE) {
78 free_blocks += blen;
79 if (in_extent == 0) {
80 /*
81 * found the start of a free extent
82 */
83 in_extent = 1;
84 num_extents++;
85 extent_start = agbno;
86 extent_len = blen;
87 } else {
88 extent_len += blen;
89 }
90 } else {
91 if (in_extent) {
92 /*
93 * free extent ends here, add extent to the
94 * 2 incore extent (avl-to-be-B+) trees
95 */
96 in_extent = 0;
97 #if defined(XR_BLD_FREE_TRACE) && defined(XR_BLD_ADD_EXTENT)
98 fprintf(stderr, "adding extent %u [%u %u]\n",
99 agno, extent_start, extent_len);
100 #endif
101 add_bno_extent(agno, extent_start, extent_len);
102 add_bcnt_extent(agno, extent_start, extent_len);
103 *num_freeblocks += extent_len;
104 }
105 }
106 }
107 if (in_extent) {
108 /*
109 * free extent ends here
110 */
111 #if defined(XR_BLD_FREE_TRACE) && defined(XR_BLD_ADD_EXTENT)
112 fprintf(stderr, "adding extent %u [%u %u]\n",
113 agno, extent_start, extent_len);
114 #endif
115 add_bno_extent(agno, extent_start, extent_len);
116 add_bcnt_extent(agno, extent_start, extent_len);
117 *num_freeblocks += extent_len;
118 }
119
120 return(num_extents);
121 }
122
123 /*
124 * XXX: yet more code that can be shared with mkfs, growfs.
125 */
126 static void
127 build_agi(
128 struct xfs_mount *mp,
129 xfs_agnumber_t agno,
130 struct bt_rebuild *btr_ino,
131 struct bt_rebuild *btr_fino)
132 {
133 struct xfs_buf *agi_buf;
134 struct xfs_agi *agi;
135 int i;
136 int error;
137
138 error = -libxfs_buf_get(mp->m_dev,
139 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
140 mp->m_sb.sb_sectsize / BBSIZE, &agi_buf);
141 if (error)
142 do_error(_("Cannot grab AG %u AGI buffer, err=%d"),
143 agno, error);
144 agi_buf->b_ops = &xfs_agi_buf_ops;
145 agi = agi_buf->b_addr;
146 memset(agi, 0, mp->m_sb.sb_sectsize);
147
148 agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
149 agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
150 agi->agi_seqno = cpu_to_be32(agno);
151 if (agno < mp->m_sb.sb_agcount - 1)
152 agi->agi_length = cpu_to_be32(mp->m_sb.sb_agblocks);
153 else
154 agi->agi_length = cpu_to_be32(mp->m_sb.sb_dblocks -
155 (xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno);
156 agi->agi_count = cpu_to_be32(btr_ino->count);
157 agi->agi_root = cpu_to_be32(btr_ino->newbt.afake.af_root);
158 agi->agi_level = cpu_to_be32(btr_ino->newbt.afake.af_levels);
159 agi->agi_freecount = cpu_to_be32(btr_ino->freecount);
160 agi->agi_newino = cpu_to_be32(btr_ino->first_agino);
161 agi->agi_dirino = cpu_to_be32(NULLAGINO);
162
163 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
164 agi->agi_unlinked[i] = cpu_to_be32(NULLAGINO);
165
166 if (xfs_has_crc(mp))
167 platform_uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
168
169 if (xfs_has_finobt(mp)) {
170 agi->agi_free_root =
171 cpu_to_be32(btr_fino->newbt.afake.af_root);
172 agi->agi_free_level =
173 cpu_to_be32(btr_fino->newbt.afake.af_levels);
174 }
175
176 if (xfs_has_inobtcounts(mp)) {
177 agi->agi_iblocks = cpu_to_be32(btr_ino->newbt.afake.af_blocks);
178 agi->agi_fblocks = cpu_to_be32(btr_fino->newbt.afake.af_blocks);
179 }
180
181 libxfs_buf_mark_dirty(agi_buf);
182 libxfs_buf_relse(agi_buf);
183 }
184
185 /* Fill the AGFL with any leftover bnobt rebuilder blocks. */
186 static void
187 fill_agfl(
188 struct bt_rebuild *btr,
189 __be32 *agfl_bnos,
190 unsigned int *agfl_idx)
191 {
192 struct bulkload_resv *resv, *n;
193 struct xfs_mount *mp = btr->newbt.sc->mp;
194
195 for_each_bulkload_reservation(&btr->newbt, resv, n) {
196 xfs_agblock_t bno;
197
198 bno = resv->agbno + resv->used;
199 while (resv->used < resv->len &&
200 *agfl_idx < libxfs_agfl_size(mp)) {
201 agfl_bnos[(*agfl_idx)++] = cpu_to_be32(bno++);
202 resv->used++;
203 }
204 }
205 }
206
207 /*
208 * build both the agf and the agfl for an agno given both
209 * btree cursors.
210 *
211 * XXX: yet more common code that can be shared with mkfs/growfs.
212 */
213 static void
214 build_agf_agfl(
215 struct xfs_mount *mp,
216 xfs_agnumber_t agno,
217 struct bt_rebuild *btr_bno,
218 struct bt_rebuild *btr_cnt,
219 struct bt_rebuild *btr_rmap,
220 struct bt_rebuild *btr_refc,
221 struct bitmap *lost_blocks)
222 {
223 struct extent_tree_node *ext_ptr;
224 struct xfs_buf *agf_buf, *agfl_buf;
225 unsigned int agfl_idx;
226 struct xfs_agfl *agfl;
227 struct xfs_agf *agf;
228 __be32 *freelist;
229 int error;
230
231 error = -libxfs_buf_get(mp->m_dev,
232 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
233 mp->m_sb.sb_sectsize / BBSIZE, &agf_buf);
234 if (error)
235 do_error(_("Cannot grab AG %u AGF buffer, err=%d"),
236 agno, error);
237 agf_buf->b_ops = &xfs_agf_buf_ops;
238 agf = agf_buf->b_addr;
239 memset(agf, 0, mp->m_sb.sb_sectsize);
240
241 #ifdef XR_BLD_FREE_TRACE
242 fprintf(stderr, "agf = %p, agf_buf->b_addr = %p\n",
243 agf, agf_buf->b_addr);
244 #endif
245
246 /*
247 * set up fixed part of agf
248 */
249 agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
250 agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
251 agf->agf_seqno = cpu_to_be32(agno);
252
253 if (agno < mp->m_sb.sb_agcount - 1)
254 agf->agf_length = cpu_to_be32(mp->m_sb.sb_agblocks);
255 else
256 agf->agf_length = cpu_to_be32(mp->m_sb.sb_dblocks -
257 (xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno);
258
259 agf->agf_bno_root =
260 cpu_to_be32(btr_bno->newbt.afake.af_root);
261 agf->agf_bno_level =
262 cpu_to_be32(btr_bno->newbt.afake.af_levels);
263 agf->agf_cnt_root =
264 cpu_to_be32(btr_cnt->newbt.afake.af_root);
265 agf->agf_cnt_level =
266 cpu_to_be32(btr_cnt->newbt.afake.af_levels);
267 agf->agf_freeblks = cpu_to_be32(btr_bno->freeblks);
268
269 if (xfs_has_rmapbt(mp)) {
270 agf->agf_rmap_root =
271 cpu_to_be32(btr_rmap->newbt.afake.af_root);
272 agf->agf_rmap_level =
273 cpu_to_be32(btr_rmap->newbt.afake.af_levels);
274 agf->agf_rmap_blocks =
275 cpu_to_be32(btr_rmap->newbt.afake.af_blocks);
276 }
277
278 if (xfs_has_reflink(mp)) {
279 agf->agf_refcount_root =
280 cpu_to_be32(btr_refc->newbt.afake.af_root);
281 agf->agf_refcount_level =
282 cpu_to_be32(btr_refc->newbt.afake.af_levels);
283 agf->agf_refcount_blocks =
284 cpu_to_be32(btr_refc->newbt.afake.af_blocks);
285 }
286
287 /*
288 * Count and record the number of btree blocks consumed if required.
289 */
290 if (xfs_has_lazysbcount(mp)) {
291 unsigned int blks;
292 /*
293 * Don't count the root blocks as they are already
294 * accounted for.
295 */
296 blks = btr_bno->newbt.afake.af_blocks +
297 btr_cnt->newbt.afake.af_blocks - 2;
298 if (xfs_has_rmapbt(mp))
299 blks += btr_rmap->newbt.afake.af_blocks - 1;
300 agf->agf_btreeblks = cpu_to_be32(blks);
301 #ifdef XR_BLD_FREE_TRACE
302 fprintf(stderr, "agf->agf_btreeblks = %u\n",
303 be32_to_cpu(agf->agf_btreeblks));
304 #endif
305 }
306
307 #ifdef XR_BLD_FREE_TRACE
308 fprintf(stderr, "bno root = %u, bcnt root = %u\n",
309 be32_to_cpu(agf->agf_bno_root),
310 be32_to_cpu(agf->agf_cnt_root));
311 #endif
312
313 if (xfs_has_crc(mp))
314 platform_uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
315
316 /* initialise the AGFL, then fill it if there are blocks left over. */
317 error = -libxfs_buf_get(mp->m_dev,
318 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
319 mp->m_sb.sb_sectsize / BBSIZE, &agfl_buf);
320 if (error)
321 do_error(_("Cannot grab AG %u AGFL buffer, err=%d"),
322 agno, error);
323 agfl_buf->b_ops = &xfs_agfl_buf_ops;
324 agfl = XFS_BUF_TO_AGFL(agfl_buf);
325
326 /* setting to 0xff results in initialisation to NULLAGBLOCK */
327 memset(agfl, 0xff, mp->m_sb.sb_sectsize);
328 freelist = xfs_buf_to_agfl_bno(agfl_buf);
329 if (xfs_has_crc(mp)) {
330 agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
331 agfl->agfl_seqno = cpu_to_be32(agno);
332 platform_uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
333 for (agfl_idx = 0; agfl_idx < libxfs_agfl_size(mp); agfl_idx++)
334 freelist[agfl_idx] = cpu_to_be32(NULLAGBLOCK);
335 }
336
337 /* Fill the AGFL with leftover blocks or save them for later. */
338 agfl_idx = 0;
339 freelist = xfs_buf_to_agfl_bno(agfl_buf);
340 fill_agfl(btr_bno, freelist, &agfl_idx);
341 fill_agfl(btr_cnt, freelist, &agfl_idx);
342 if (xfs_has_rmapbt(mp))
343 fill_agfl(btr_rmap, freelist, &agfl_idx);
344
345 /* Set the AGF counters for the AGFL. */
346 if (agfl_idx > 0) {
347 agf->agf_flfirst = 0;
348 agf->agf_fllast = cpu_to_be32(agfl_idx - 1);
349 agf->agf_flcount = cpu_to_be32(agfl_idx);
350 rmap_store_agflcount(mp, agno, agfl_idx);
351
352 #ifdef XR_BLD_FREE_TRACE
353 fprintf(stderr, "writing agfl for ag %u\n", agno);
354 #endif
355
356 } else {
357 agf->agf_flfirst = 0;
358 agf->agf_fllast = cpu_to_be32(libxfs_agfl_size(mp) - 1);
359 agf->agf_flcount = 0;
360 }
361
362 libxfs_buf_mark_dirty(agfl_buf);
363 libxfs_buf_relse(agfl_buf);
364
365 ext_ptr = findbiggest_bcnt_extent(agno);
366 agf->agf_longest = cpu_to_be32((ext_ptr != NULL) ?
367 ext_ptr->ex_blockcount : 0);
368
369 ASSERT(be32_to_cpu(agf->agf_bno_root) !=
370 be32_to_cpu(agf->agf_cnt_root));
371 ASSERT(be32_to_cpu(agf->agf_refcount_root) !=
372 be32_to_cpu(agf->agf_bno_root));
373 ASSERT(be32_to_cpu(agf->agf_refcount_root) !=
374 be32_to_cpu(agf->agf_cnt_root));
375
376 libxfs_buf_mark_dirty(agf_buf);
377 libxfs_buf_relse(agf_buf);
378
379 /*
380 * now fix up the free list appropriately
381 */
382 fix_freelist(mp, agno, true);
383
384 #ifdef XR_BLD_FREE_TRACE
385 fprintf(stderr, "wrote agf for ag %u\n", agno);
386 #endif
387 }
388
389 /*
390 * update the superblock counters, sync the sb version numbers and
391 * feature bits to the filesystem, and sync up the on-disk superblock
392 * to match the incore superblock.
393 */
394 static void
395 sync_sb(xfs_mount_t *mp)
396 {
397 struct xfs_buf *bp;
398
399 bp = libxfs_getsb(mp);
400 if (!bp)
401 do_error(_("couldn't get superblock\n"));
402
403 mp->m_sb.sb_icount = sb_icount;
404 mp->m_sb.sb_ifree = sb_ifree;
405 mp->m_sb.sb_fdblocks = sb_fdblocks;
406 mp->m_sb.sb_frextents = sb_frextents;
407
408 update_sb_version(mp);
409
410 libxfs_sb_to_disk(bp->b_addr, &mp->m_sb);
411 libxfs_buf_mark_dirty(bp);
412 libxfs_buf_relse(bp);
413 }
414
415 /*
416 * make sure the root and realtime inodes show up allocated
417 * even if they've been freed. they get reinitialized in phase6.
418 */
419 static void
420 keep_fsinos(xfs_mount_t *mp)
421 {
422 ino_tree_node_t *irec;
423 unsigned int inuse = xfs_rootrec_inodes_inuse(mp), i;
424
425 irec = find_inode_rec(mp, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
426 XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
427
428 for (i = 0; i < inuse; i++) {
429 set_inode_used(irec, i);
430
431 /* Everything after the root dir is metadata */
432 if (i)
433 set_inode_is_meta(irec, i);
434 }
435 }
436
437 static void
438 phase5_func(
439 struct xfs_mount *mp,
440 struct xfs_perag *pag,
441 struct bitmap *lost_blocks)
442 {
443 struct repair_ctx sc = { .mp = mp, };
444 struct bt_rebuild btr_bno;
445 struct bt_rebuild btr_cnt;
446 struct bt_rebuild btr_ino;
447 struct bt_rebuild btr_fino;
448 struct bt_rebuild btr_rmap;
449 struct bt_rebuild btr_refc;
450 xfs_agnumber_t agno = pag_agno(pag);
451 int extra_blocks = 0;
452 uint num_freeblocks;
453 xfs_agblock_t num_extents;
454 unsigned int est_agfreeblocks = 0;
455 unsigned int total_btblocks;
456
457 if (verbose)
458 do_log(_(" - agno = %d\n"), agno);
459
460 /*
461 * build up incore bno and bcnt extent btrees
462 */
463 num_extents = mk_incore_fstree(mp, agno, &num_freeblocks);
464
465 #ifdef XR_BLD_FREE_TRACE
466 fprintf(stderr, "# of bno extents is %d\n", count_bno_extents(agno));
467 #endif
468
469 if (num_extents == 0) {
470 /*
471 * XXX - what we probably should do here is pick an inode for
472 * a regular file in the allocation group that has space
473 * allocated and shoot it by traversing the bmap list and
474 * putting all its extents on the incore freespace trees,
475 * clearing the inode, and clearing the in-use bit in the
476 * incore inode tree. Then try mk_incore_fstree() again.
477 */
478 do_error(
479 _("unable to rebuild AG %u. Not enough free space in on-disk AG.\n"),
480 agno);
481 }
482
483 /*
484 * Estimate the number of free blocks in this AG after rebuilding
485 * all btrees, unless we already decided that we need to pack all
486 * btree blocks maximally.
487 */
488 if (!need_packed_btrees) {
489 total_btblocks = estimate_agbtree_blocks(pag, num_extents);
490 if (num_freeblocks > total_btblocks)
491 est_agfreeblocks = num_freeblocks - total_btblocks;
492 }
493
494 init_ino_cursors(&sc, pag, est_agfreeblocks, &sb_icount_ag[agno],
495 &sb_ifree_ag[agno], &btr_ino, &btr_fino);
496
497 init_rmapbt_cursor(&sc, pag, est_agfreeblocks, &btr_rmap);
498
499 init_refc_cursor(&sc, pag, est_agfreeblocks, &btr_refc);
500
501 num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
502 /*
503 * lose two blocks per AG -- the space tree roots are counted as
504 * allocated since the space trees always have roots
505 */
506 sb_fdblocks_ag[agno] += num_freeblocks - 2;
507
508 if (num_extents == 0) {
509 /*
510 * XXX - what we probably should do here is pick an inode for
511 * a regular file in the allocation group that has space
512 * allocated and shoot it by traversing the bmap list and
513 * putting all its extents on the incore freespace trees,
514 * clearing the inode, and clearing the in-use bit in the
515 * incore inode tree. Then try mk_incore_fstree() again.
516 */
517 do_error(_("unable to rebuild AG %u. No free space.\n"), agno);
518 }
519
520 #ifdef XR_BLD_FREE_TRACE
521 fprintf(stderr, "# of bno extents is %d\n", num_extents);
522 #endif
523
524 /*
525 * track blocks that we might really lose
526 */
527 init_freespace_cursors(&sc, pag, est_agfreeblocks, &num_extents,
528 &extra_blocks, &btr_bno, &btr_cnt);
529
530 /*
531 * freespace btrees live in the "free space" but the filesystem treats
532 * AGFL blocks as allocated since they aren't described by the
533 * freespace trees
534 */
535
536 /*
537 * see if we can fit all the extra blocks into the AGFL
538 */
539 extra_blocks = (extra_blocks - libxfs_agfl_size(mp) > 0) ?
540 extra_blocks - libxfs_agfl_size(mp) : 0;
541
542 if (extra_blocks > 0)
543 sb_fdblocks_ag[agno] -= extra_blocks;
544
545 #ifdef XR_BLD_FREE_TRACE
546 fprintf(stderr, "# of bno extents is %d\n", count_bno_extents(agno));
547 fprintf(stderr, "# of bcnt extents is %d\n", count_bcnt_extents(agno));
548 #endif
549
550 build_freespace_btrees(&sc, agno, &btr_bno, &btr_cnt);
551
552 #ifdef XR_BLD_FREE_TRACE
553 fprintf(stderr, "# of free blocks == %d/%d\n", btr_bno.freeblks,
554 btr_cnt.freeblks);
555 #endif
556 ASSERT(btr_bno.freeblks == btr_cnt.freeblks);
557
558 if (xfs_has_rmapbt(mp)) {
559 build_rmap_tree(&sc, agno, &btr_rmap);
560 sb_fdblocks_ag[agno] += btr_rmap.newbt.afake.af_blocks - 1;
561 }
562
563 if (xfs_has_reflink(mp))
564 build_refcount_tree(&sc, agno, &btr_refc);
565
566 /*
567 * set up agf and agfl
568 */
569 build_agf_agfl(mp, agno, &btr_bno, &btr_cnt, &btr_rmap, &btr_refc,
570 lost_blocks);
571
572 build_inode_btrees(&sc, agno, &btr_ino, &btr_fino);
573
574 /* build the agi */
575 build_agi(mp, agno, &btr_ino, &btr_fino);
576
577 /*
578 * tear down cursors
579 */
580 finish_rebuild(mp, &btr_bno, lost_blocks);
581 finish_rebuild(mp, &btr_cnt, lost_blocks);
582 finish_rebuild(mp, &btr_ino, lost_blocks);
583 if (xfs_has_finobt(mp))
584 finish_rebuild(mp, &btr_fino, lost_blocks);
585 if (xfs_has_rmapbt(mp))
586 finish_rebuild(mp, &btr_rmap, lost_blocks);
587 if (xfs_has_reflink(mp))
588 finish_rebuild(mp, &btr_refc, lost_blocks);
589
590 /*
591 * release the incore per-AG bno/bcnt trees so the extent nodes
592 * can be recycled
593 */
594 release_agbno_extent_tree(agno);
595 release_agbcnt_extent_tree(agno);
596 PROG_RPT_INC(prog_rpt_done[agno], 1);
597 }
598
599 /* Inject this unused space back into the filesystem. */
600 static int
601 inject_lost_extent(
602 uint64_t start,
603 uint64_t length,
604 void *arg)
605 {
606 struct xfs_mount *mp = arg;
607 struct xfs_trans *tp;
608 struct xfs_perag *pag;
609 xfs_agnumber_t agno;
610 xfs_agblock_t agbno;
611 int error;
612
613 error = -libxfs_trans_alloc_rollable(mp, 16, &tp);
614 if (error)
615 return error;
616
617 agno = XFS_FSB_TO_AGNO(mp, start);
618 agbno = XFS_FSB_TO_AGBNO(mp, start);
619 pag = libxfs_perag_get(mp, agno);
620 error = -libxfs_free_extent(tp, pag, agbno, length,
621 &XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_NONE);
622 libxfs_perag_put(pag);
623
624 if (error)
625 return error;
626
627 return -libxfs_trans_commit(tp);
628 }
629
630 void
631 check_rtmetadata(
632 struct xfs_mount *mp)
633 {
634 if (xfs_has_zoned(mp)) {
635 check_zones(mp);
636 return;
637 }
638
639 generate_rtinfo(mp);
640 check_rtbitmap(mp);
641 check_rtsummary(mp);
642 }
643
644 /*
645 * Estimate the amount of free space used by the perag metadata without
646 * building the incore tree. This is only necessary if realtime btrees are
647 * enabled.
648 */
649 static xfs_extlen_t
650 estimate_agbtree_blocks_early(
651 struct xfs_perag *pag,
652 unsigned int *num_freeblocks)
653 {
654 struct xfs_mount *mp = pag_mount(pag);
655 xfs_agblock_t agbno;
656 xfs_agblock_t ag_end;
657 xfs_extlen_t extent_len;
658 xfs_extlen_t blen;
659 unsigned int num_extents = 0;
660 int bstate;
661 bool in_extent = false;
662
663 /* Find the number of free space extents. */
664 ag_end = libxfs_ag_block_count(mp, pag_agno(pag));
665 for (agbno = 0; agbno < ag_end; agbno += blen) {
666 bstate = get_bmap_ext(pag_agno(pag), agbno, ag_end, &blen,
667 false);
668 if (bstate < XR_E_INUSE) {
669 if (!in_extent) {
670 /*
671 * found the start of a free extent
672 */
673 in_extent = true;
674 num_extents++;
675 extent_len = blen;
676 } else {
677 extent_len += blen;
678 }
679 } else {
680 if (in_extent) {
681 /*
682 * free extent ends here
683 */
684 in_extent = false;
685 *num_freeblocks += extent_len;
686 }
687 }
688 }
689 if (in_extent)
690 *num_freeblocks += extent_len;
691
692 return estimate_agbtree_blocks(pag, num_extents);
693 }
694
695 /*
696 * Decide if we need to pack every new btree block completely full to conserve
697 * disk space. Normally we rebuild btree blocks to be 75% full, but we don't
698 * want to start rebuilding AG btrees that way only to discover that there
699 * isn't enough space left in the data volume to rebuild inode-based btrees.
700 */
701 static bool
702 are_packed_btrees_needed(
703 struct xfs_mount *mp)
704 {
705 struct xfs_perag *pag = NULL;
706 struct xfs_rtgroup *rtg = NULL;
707 unsigned long long metadata_blocks = 0;
708 unsigned long long fdblocks = 0;
709
710 /*
711 * If we don't have inode-based metadata, we can let the AG btrees
712 * pack as needed; there are no global space concerns here.
713 */
714 if (!xfs_has_rtrmapbt(mp) && !xfs_has_rtreflink(mp))
715 return false;
716
717 while ((pag = xfs_perag_next(mp, pag))) {
718 unsigned int ag_fdblocks = 0;
719
720 metadata_blocks += estimate_agbtree_blocks_early(pag,
721 &ag_fdblocks);
722 fdblocks += ag_fdblocks;
723 }
724
725 while ((rtg = xfs_rtgroup_next(mp, rtg))) {
726 metadata_blocks += estimate_rtrmapbt_blocks(rtg);
727 metadata_blocks += estimate_rtrefcountbt_blocks(rtg);
728 }
729
730 /*
731 * If we think we'll have more metadata blocks than free space, then
732 * pack the btree blocks.
733 */
734 if (metadata_blocks > fdblocks)
735 return true;
736
737 /*
738 * If the amount of free space after building btrees is less than 9%
739 * of the data volume, pack the btree blocks.
740 */
741 fdblocks -= metadata_blocks;
742 if (fdblocks < ((mp->m_sb.sb_dblocks * 3) >> 5))
743 return true;
744 return false;
745 }
746
747 void
748 phase5(xfs_mount_t *mp)
749 {
750 struct bitmap *lost_blocks = NULL;
751 struct xfs_perag *pag = NULL;
752 xfs_agnumber_t agno;
753 int error;
754
755 do_log(_("Phase 5 - rebuild AG headers and trees...\n"));
756 set_progress_msg(PROG_FMT_REBUILD_AG, (uint64_t)glob_agcount);
757
758 #ifdef XR_BLD_FREE_TRACE
759 fprintf(stderr, "inobt level 1, maxrec = %d, minrec = %d\n",
760 libxfs_inobt_maxrecs(mp, mp->m_sb.sb_blocksize, false),
761 libxfs_inobt_maxrecs(mp, mp->m_sb.sb_blocksize, false) / 2);
762 fprintf(stderr, "inobt level 0 (leaf), maxrec = %d, minrec = %d\n",
763 libxfs_inobt_maxrecs(mp, mp->m_sb.sb_blocksize, true),
764 libxfs_inobt_maxrecs(mp, mp->m_sb.sb_blocksize, true) / 2);
765 fprintf(stderr, "xr inobt level 0 (leaf), maxrec = %d\n",
766 XR_INOBT_BLOCK_MAXRECS(mp, 0));
767 fprintf(stderr, "xr inobt level 1 (int), maxrec = %d\n",
768 XR_INOBT_BLOCK_MAXRECS(mp, 1));
769 fprintf(stderr, "bnobt level 1, maxrec = %d, minrec = %d\n",
770 libxfs_allocbt_maxrecs(mp, mp->m_sb.sb_blocksize, false),
771 libxfs_allocbt_maxrecs(mp, mp->m_sb.sb_blocksize, false) / 2);
772 fprintf(stderr, "bnobt level 0 (leaf), maxrec = %d, minrec = %d\n",
773 libxfs_allocbt_maxrecs(mp, mp->m_sb.sb_blocksize, true),
774 libxfs_allocbt_maxrecs(mp, mp->m_sb.sb_blocksize, true) / 2);
775 #endif
776 /*
777 * make sure the root and realtime inodes show up allocated
778 */
779 keep_fsinos(mp);
780
781 /* allocate per ag counters */
782 sb_icount_ag = calloc(mp->m_sb.sb_agcount, sizeof(uint64_t));
783 if (sb_icount_ag == NULL)
784 do_error(_("cannot alloc sb_icount_ag buffers\n"));
785
786 sb_ifree_ag = calloc(mp->m_sb.sb_agcount, sizeof(uint64_t));
787 if (sb_ifree_ag == NULL)
788 do_error(_("cannot alloc sb_ifree_ag buffers\n"));
789
790 sb_fdblocks_ag = calloc(mp->m_sb.sb_agcount, sizeof(uint64_t));
791 if (sb_fdblocks_ag == NULL)
792 do_error(_("cannot alloc sb_fdblocks_ag buffers\n"));
793
794 error = bitmap_alloc(&lost_blocks);
795 if (error)
796 do_error(_("cannot alloc lost block bitmap\n"));
797
798 need_packed_btrees = are_packed_btrees_needed(mp);
799
800 while ((pag = xfs_perag_next(mp, pag)))
801 phase5_func(mp, pag, lost_blocks);
802
803 print_final_rpt();
804
805 /* aggregate per ag counters */
806 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
807 sb_icount += sb_icount_ag[agno];
808 sb_ifree += sb_ifree_ag[agno];
809 sb_fdblocks += sb_fdblocks_ag[agno];
810 }
811 free(sb_icount_ag);
812 free(sb_ifree_ag);
813 free(sb_fdblocks_ag);
814
815 do_log(_(" - reset superblock...\n"));
816
817 /*
818 * sync superblock counter and set version bits correctly
819 */
820 sync_sb(mp);
821
822 /*
823 * Put the per-AG btree rmap data into the rmapbt now that we've reset
824 * the superblock counters.
825 */
826 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
827 error = rmap_commit_agbtree_mappings(mp, agno);
828 if (error)
829 do_error(
830 _("unable to add AG %u reverse-mapping data to btree.\n"), agno);
831 }
832
833 /*
834 * Put blocks that were unnecessarily reserved for btree
835 * reconstruction back into the filesystem free space data.
836 */
837 error = bitmap_iterate(lost_blocks, inject_lost_extent, mp);
838 if (error)
839 do_error(_("Unable to reinsert lost blocks into filesystem.\n"));
840 bitmap_free(&lost_blocks);
841
842 bad_ino_btree = 0;
843
844 }