2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "err_protos.h"
35 * we maintain the current slice (path from root to leaf)
36 * of the btree incore. when we need a new block, we ask
37 * the block allocator for the address of a block on that
38 * level, map the block in, and set up the appropriate
39 * pointers (child, silbing, etc.) and keys that should
40 * point to the new block.
42 typedef struct bt_stat_level
{
44 * set in setup_cursor routine and maintained in the tree-building
47 xfs_buf_t
*buf_p
; /* 2 buffer pointers to ... */
48 xfs_buf_t
*prev_buf_p
;
49 xfs_agblock_t agbno
; /* current block being filled */
50 xfs_agblock_t prev_agbno
; /* previous block */
52 * set in calculate/init cursor routines for each btree level
54 int num_recs_tot
; /* # tree recs in level */
55 int num_blocks
; /* # tree blocks in level */
56 int num_recs_pb
; /* num_recs_tot / num_blocks */
57 int modulo
; /* num_recs_tot % num_blocks */
60 typedef struct bt_status
{
61 int init
; /* cursor set up once? */
62 int num_levels
; /* # of levels in btree */
63 xfs_extlen_t num_tot_blocks
; /* # blocks alloc'ed for tree */
64 xfs_extlen_t num_free_blocks
;/* # blocks currently unused */
66 xfs_agblock_t root
; /* root block */
68 * list of blocks to be used to set up this tree
69 * and pointer to the first unused block on the list
71 xfs_agblock_t
*btree_blocks
; /* block list */
72 xfs_agblock_t
*free_btree_blocks
; /* first unused block */
74 * per-level status info
76 bt_stat_level_t level
[XFS_BTREE_MAXLEVELS
];
77 uint64_t owner
; /* owner */
81 * extra metadata for the agi
84 xfs_agino_t first_agino
;
86 xfs_agino_t freecount
;
89 static __uint64_t
*sb_icount_ag
; /* allocated inodes per ag */
90 static __uint64_t
*sb_ifree_ag
; /* free inodes per ag */
91 static __uint64_t
*sb_fdblocks_ag
; /* free data blocks per ag */
94 mk_incore_fstree(xfs_mount_t
*mp
, xfs_agnumber_t agno
)
98 xfs_agblock_t extent_start
;
99 xfs_extlen_t extent_len
;
101 xfs_agblock_t ag_end
;
107 * scan the bitmap for the ag looking for continuous
108 * extents of free blocks. At this point, we know
109 * that blocks in the bitmap are either set to an
110 * "in use" state or set to unknown (0) since the
111 * bmaps were zero'ed in phase 4 and only blocks
112 * being used by inodes, inode bmaps, ag headers,
113 * and the files themselves were put into the bitmap.
116 ASSERT(agno
< mp
->m_sb
.sb_agcount
);
118 extent_start
= extent_len
= 0;
120 num_extents
= free_blocks
= 0;
122 if (agno
< mp
->m_sb
.sb_agcount
- 1)
123 ag_end
= mp
->m_sb
.sb_agblocks
;
125 ag_end
= mp
->m_sb
.sb_dblocks
-
126 (xfs_rfsblock_t
)mp
->m_sb
.sb_agblocks
*
127 (mp
->m_sb
.sb_agcount
- 1);
130 * ok, now find the number of extents, keep track of the
133 for (agbno
= 0; agbno
< ag_end
; agbno
+= blen
) {
134 bstate
= get_bmap_ext(agno
, agbno
, ag_end
, &blen
);
135 if (bstate
< XR_E_INUSE
) {
137 if (in_extent
== 0) {
139 * found the start of a free extent
143 extent_start
= agbno
;
151 * free extent ends here, add extent to the
152 * 2 incore extent (avl-to-be-B+) trees
155 #if defined(XR_BLD_FREE_TRACE) && defined(XR_BLD_ADD_EXTENT)
156 fprintf(stderr
, "adding extent %u [%u %u]\n",
157 agno
, extent_start
, extent_len
);
159 add_bno_extent(agno
, extent_start
, extent_len
);
160 add_bcnt_extent(agno
, extent_start
, extent_len
);
166 * free extent ends here
168 #if defined(XR_BLD_FREE_TRACE) && defined(XR_BLD_ADD_EXTENT)
169 fprintf(stderr
, "adding extent %u [%u %u]\n",
170 agno
, extent_start
, extent_len
);
172 add_bno_extent(agno
, extent_start
, extent_len
);
173 add_bcnt_extent(agno
, extent_start
, extent_len
);
180 get_next_blockaddr(xfs_agnumber_t agno
, int level
, bt_status_t
*curs
)
182 ASSERT(curs
->free_btree_blocks
< curs
->btree_blocks
+
183 curs
->num_tot_blocks
);
184 ASSERT(curs
->num_free_blocks
> 0);
186 curs
->num_free_blocks
--;
187 return(*curs
->free_btree_blocks
++);
191 * set up the dynamically allocated block allocation data in the btree
192 * cursor that depends on the info in the static portion of the cursor.
193 * allocates space from the incore bno/bcnt extent trees and sets up
194 * the first path up the left side of the tree. Also sets up the
195 * cursor pointer to the btree root. called by init_freespace_cursor()
196 * and init_ino_cursor()
199 setup_cursor(xfs_mount_t
*mp
, xfs_agnumber_t agno
, bt_status_t
*curs
)
203 xfs_extlen_t big_extent_len
;
204 xfs_agblock_t big_extent_start
;
205 extent_tree_node_t
*ext_ptr
;
206 extent_tree_node_t
*bno_ext_ptr
;
207 xfs_extlen_t blocks_allocated
;
208 xfs_agblock_t
*agb_ptr
;
212 * get the number of blocks we need to allocate, then
213 * set up block number array, set the free block pointer
214 * to the first block in the array, and null the array
216 big_extent_len
= curs
->num_tot_blocks
;
217 blocks_allocated
= 0;
219 ASSERT(big_extent_len
> 0);
221 if ((curs
->btree_blocks
= malloc(sizeof(xfs_agblock_t
)
222 * big_extent_len
)) == NULL
)
223 do_error(_("could not set up btree block array\n"));
225 agb_ptr
= curs
->free_btree_blocks
= curs
->btree_blocks
;
227 for (j
= 0; j
< curs
->num_free_blocks
; j
++, agb_ptr
++)
228 *agb_ptr
= NULLAGBLOCK
;
231 * grab the smallest extent and use it up, then get the
232 * next smallest. This mimics the init_*_cursor code.
234 ext_ptr
= findfirst_bcnt_extent(agno
);
236 agb_ptr
= curs
->btree_blocks
;
239 * set up the free block array
241 while (blocks_allocated
< big_extent_len
) {
244 _("error - not enough free space in filesystem\n"));
246 * use up the extent we've got
248 for (u
= 0; u
< ext_ptr
->ex_blockcount
&&
249 blocks_allocated
< big_extent_len
; u
++) {
250 ASSERT(agb_ptr
< curs
->btree_blocks
251 + curs
->num_tot_blocks
);
252 *agb_ptr
++ = ext_ptr
->ex_startblock
+ u
;
256 error
= rmap_add_ag_rec(mp
, agno
, ext_ptr
->ex_startblock
, u
,
259 do_error(_("could not set up btree rmaps: %s\n"),
263 * if we only used part of this last extent, then we
264 * need only to reset the extent in the extent
265 * trees and we're done
267 if (u
< ext_ptr
->ex_blockcount
) {
268 big_extent_start
= ext_ptr
->ex_startblock
+ u
;
269 big_extent_len
= ext_ptr
->ex_blockcount
- u
;
271 ASSERT(big_extent_len
> 0);
273 bno_ext_ptr
= find_bno_extent(agno
,
274 ext_ptr
->ex_startblock
);
275 ASSERT(bno_ext_ptr
!= NULL
);
276 get_bno_extent(agno
, bno_ext_ptr
);
277 release_extent_tree_node(bno_ext_ptr
);
279 ext_ptr
= get_bcnt_extent(agno
, ext_ptr
->ex_startblock
,
280 ext_ptr
->ex_blockcount
);
281 release_extent_tree_node(ext_ptr
);
282 #ifdef XR_BLD_FREE_TRACE
283 fprintf(stderr
, "releasing extent: %u [%u %u]\n",
284 agno
, ext_ptr
->ex_startblock
,
285 ext_ptr
->ex_blockcount
);
286 fprintf(stderr
, "blocks_allocated = %d\n",
290 add_bno_extent(agno
, big_extent_start
, big_extent_len
);
291 add_bcnt_extent(agno
, big_extent_start
, big_extent_len
);
296 * delete the used-up extent from both extent trees and
297 * find next biggest extent
299 #ifdef XR_BLD_FREE_TRACE
300 fprintf(stderr
, "releasing extent: %u [%u %u]\n",
301 agno
, ext_ptr
->ex_startblock
, ext_ptr
->ex_blockcount
);
303 bno_ext_ptr
= find_bno_extent(agno
, ext_ptr
->ex_startblock
);
304 ASSERT(bno_ext_ptr
!= NULL
);
305 get_bno_extent(agno
, bno_ext_ptr
);
306 release_extent_tree_node(bno_ext_ptr
);
308 ext_ptr
= get_bcnt_extent(agno
, ext_ptr
->ex_startblock
,
309 ext_ptr
->ex_blockcount
);
310 ASSERT(ext_ptr
!= NULL
);
311 release_extent_tree_node(ext_ptr
);
313 ext_ptr
= findfirst_bcnt_extent(agno
);
315 #ifdef XR_BLD_FREE_TRACE
316 fprintf(stderr
, "blocks_allocated = %d\n",
322 write_cursor(bt_status_t
*curs
)
326 for (i
= 0; i
< curs
->num_levels
; i
++) {
327 #if defined(XR_BLD_FREE_TRACE) || defined(XR_BLD_INO_TRACE)
328 fprintf(stderr
, "writing bt block %u\n", curs
->level
[i
].agbno
);
330 if (curs
->level
[i
].prev_buf_p
!= NULL
) {
331 ASSERT(curs
->level
[i
].prev_agbno
!= NULLAGBLOCK
);
332 #if defined(XR_BLD_FREE_TRACE) || defined(XR_BLD_INO_TRACE)
333 fprintf(stderr
, "writing bt prev block %u\n",
334 curs
->level
[i
].prev_agbno
);
336 libxfs_writebuf(curs
->level
[i
].prev_buf_p
, 0);
338 libxfs_writebuf(curs
->level
[i
].buf_p
, 0);
343 finish_cursor(bt_status_t
*curs
)
345 ASSERT(curs
->num_free_blocks
== 0);
346 free(curs
->btree_blocks
);
350 * We need to leave some free records in the tree for the corner case of
351 * setting up the AGFL. This may require allocation of blocks, and as
352 * such can require insertion of new records into the tree (e.g. moving
353 * a record in the by-count tree when a long extent is shortened). If we
354 * pack the records into the leaves with no slack space, this requires a
355 * leaf split to occur and a block to be allocated from the free list.
356 * If we don't have any blocks on the free list (because we are setting
357 * it up!), then we fail, and the filesystem will fail with the same
358 * failure at runtime. Hence leave a couple of records slack space in
359 * each block to allow immediate modification of the tree without
360 * requiring splits to be done.
362 * XXX(hch): any reason we don't just look at mp->m_alloc_mxr?
364 #define XR_ALLOC_BLOCK_MAXRECS(mp, level) \
365 (libxfs_allocbt_maxrecs((mp), (mp)->m_sb.sb_blocksize, (level) == 0) - 2)
368 * this calculates a freespace cursor for an ag.
369 * btree_curs is an in/out. returns the number of
370 * blocks that will show up in the AGFL.
373 calculate_freespace_cursor(xfs_mount_t
*mp
, xfs_agnumber_t agno
,
374 xfs_agblock_t
*extents
, bt_status_t
*btree_curs
)
376 xfs_extlen_t blocks_needed
; /* a running count */
377 xfs_extlen_t blocks_allocated_pt
; /* per tree */
378 xfs_extlen_t blocks_allocated_total
; /* for both trees */
379 xfs_agblock_t num_extents
;
383 bt_stat_level_t
*lptr
;
384 bt_stat_level_t
*p_lptr
;
385 extent_tree_node_t
*ext_ptr
;
388 num_extents
= *extents
;
391 ASSERT(num_extents
!= 0);
393 lptr
= &btree_curs
->level
[0];
394 btree_curs
->init
= 1;
397 * figure out how much space we need for the leaf level
398 * of the tree and set up the cursor for the leaf level
399 * (note that the same code is duplicated further down)
401 lptr
->num_blocks
= howmany(num_extents
, XR_ALLOC_BLOCK_MAXRECS(mp
, 0));
402 lptr
->num_recs_pb
= num_extents
/ lptr
->num_blocks
;
403 lptr
->modulo
= num_extents
% lptr
->num_blocks
;
404 lptr
->num_recs_tot
= num_extents
;
407 #ifdef XR_BLD_FREE_TRACE
408 fprintf(stderr
, "%s 0 %d %d %d %d\n", __func__
,
415 * if we need more levels, set them up. # of records
416 * per level is the # of blocks in the level below it
418 if (lptr
->num_blocks
> 1) {
419 for (; btree_curs
->level
[level
- 1].num_blocks
> 1
420 && level
< XFS_BTREE_MAXLEVELS
;
422 lptr
= &btree_curs
->level
[level
];
423 p_lptr
= &btree_curs
->level
[level
- 1];
424 lptr
->num_blocks
= howmany(p_lptr
->num_blocks
,
425 XR_ALLOC_BLOCK_MAXRECS(mp
, level
));
426 lptr
->modulo
= p_lptr
->num_blocks
428 lptr
->num_recs_pb
= p_lptr
->num_blocks
430 lptr
->num_recs_tot
= p_lptr
->num_blocks
;
431 #ifdef XR_BLD_FREE_TRACE
432 fprintf(stderr
, "%s %d %d %d %d %d\n", __func__
,
442 ASSERT(lptr
->num_blocks
== 1);
443 btree_curs
->num_levels
= level
;
446 * ok, now we have a hypothetical cursor that
447 * will work for both the bno and bcnt trees.
448 * now figure out if using up blocks to set up the
449 * trees will perturb the shape of the freespace tree.
450 * if so, we've over-allocated. the freespace trees
451 * as they will be *after* accounting for the free space
452 * we've used up will need fewer blocks to to represent
453 * than we've allocated. We can use the AGFL to hold
454 * XFS_AGFL_SIZE (sector/xfs_agfl_t) blocks but that's it.
455 * Thus we limit things to XFS_AGFL_SIZE/2 for each of the 2 btrees.
456 * if the number of extra blocks is more than that,
457 * we'll have to be called again.
459 for (blocks_needed
= 0, i
= 0; i
< level
; i
++) {
460 blocks_needed
+= btree_curs
->level
[i
].num_blocks
;
464 * record the # of blocks we've allocated
466 blocks_allocated_pt
= blocks_needed
;
468 blocks_allocated_total
= blocks_needed
;
471 * figure out how many free extents will be used up by
472 * our space allocation
474 if ((ext_ptr
= findfirst_bcnt_extent(agno
)) == NULL
)
475 do_error(_("can't rebuild fs trees -- not enough free space "
476 "on ag %u\n"), agno
);
478 while (ext_ptr
!= NULL
&& blocks_needed
> 0) {
479 if (ext_ptr
->ex_blockcount
<= blocks_needed
) {
480 blocks_needed
-= ext_ptr
->ex_blockcount
;
486 ext_ptr
= findnext_bcnt_extent(agno
, ext_ptr
);
488 #ifdef XR_BLD_FREE_TRACE
489 if (ext_ptr
!= NULL
) {
490 fprintf(stderr
, "got next extent [%u %u]\n",
491 ext_ptr
->ex_startblock
, ext_ptr
->ex_blockcount
);
493 fprintf(stderr
, "out of extents\n");
497 if (blocks_needed
> 0)
498 do_error(_("ag %u - not enough free space to build freespace "
501 ASSERT(num_extents
>= extents_used
);
503 num_extents
-= extents_used
;
506 * see if the number of leaf blocks will change as a result
507 * of the number of extents changing
509 if (howmany(num_extents
, XR_ALLOC_BLOCK_MAXRECS(mp
, 0))
510 != btree_curs
->level
[0].num_blocks
) {
512 * yes -- recalculate the cursor. If the number of
513 * excess (overallocated) blocks is < XFS_AGFL_SIZE/2, we're ok.
514 * we can put those into the AGFL. we don't try
515 * and get things to converge exactly (reach a
516 * state with zero excess blocks) because there
517 * exist pathological cases which will never
518 * converge. first, check for the zero-case.
520 if (num_extents
== 0) {
522 * ok, we've used up all the free blocks
523 * trying to lay out the leaf level. go
524 * to a one block (empty) btree and put the
525 * already allocated blocks into the AGFL
527 if (btree_curs
->level
[0].num_blocks
!= 1) {
529 * we really needed more blocks because
530 * the old tree had more than one level.
533 do_warn(_("not enough free blocks left to "
534 "describe all free blocks in AG "
537 #ifdef XR_BLD_FREE_TRACE
539 "ag %u -- no free extents, alloc'ed %d\n",
540 agno
, blocks_allocated_pt
);
542 lptr
->num_blocks
= 1;
544 lptr
->num_recs_pb
= 0;
545 lptr
->num_recs_tot
= 0;
547 btree_curs
->num_levels
= 1;
550 * don't reset the allocation stats, assume
551 * they're all extra blocks
552 * don't forget to return the total block count
553 * not the per-tree block count. these are the
554 * extras that will go into the AGFL. subtract
555 * two for the root blocks.
557 btree_curs
->num_tot_blocks
= blocks_allocated_pt
;
558 btree_curs
->num_free_blocks
= blocks_allocated_pt
;
562 return(blocks_allocated_total
- 2);
565 lptr
= &btree_curs
->level
[0];
566 lptr
->num_blocks
= howmany(num_extents
,
567 XR_ALLOC_BLOCK_MAXRECS(mp
, 0));
568 lptr
->num_recs_pb
= num_extents
/ lptr
->num_blocks
;
569 lptr
->modulo
= num_extents
% lptr
->num_blocks
;
570 lptr
->num_recs_tot
= num_extents
;
574 * if we need more levels, set them up
576 if (lptr
->num_blocks
> 1) {
577 for (level
= 1; btree_curs
->level
[level
-1].num_blocks
578 > 1 && level
< XFS_BTREE_MAXLEVELS
;
580 lptr
= &btree_curs
->level
[level
];
581 p_lptr
= &btree_curs
->level
[level
-1];
582 lptr
->num_blocks
= howmany(p_lptr
->num_blocks
,
583 XR_ALLOC_BLOCK_MAXRECS(mp
, level
));
584 lptr
->modulo
= p_lptr
->num_blocks
586 lptr
->num_recs_pb
= p_lptr
->num_blocks
588 lptr
->num_recs_tot
= p_lptr
->num_blocks
;
591 ASSERT(lptr
->num_blocks
== 1);
592 btree_curs
->num_levels
= level
;
595 * now figure out the number of excess blocks
597 for (blocks_needed
= 0, i
= 0; i
< level
; i
++) {
598 blocks_needed
+= btree_curs
->level
[i
].num_blocks
;
602 ASSERT(blocks_allocated_total
>= blocks_needed
);
603 extra_blocks
= blocks_allocated_total
- blocks_needed
;
605 if (extents_used
> 0) {
607 * reset the leaf level geometry to account
608 * for consumed extents. we can leave the
609 * rest of the cursor alone since the number
610 * of leaf blocks hasn't changed.
612 lptr
= &btree_curs
->level
[0];
614 lptr
->num_recs_pb
= num_extents
/ lptr
->num_blocks
;
615 lptr
->modulo
= num_extents
% lptr
->num_blocks
;
616 lptr
->num_recs_tot
= num_extents
;
622 btree_curs
->num_tot_blocks
= blocks_allocated_pt
;
623 btree_curs
->num_free_blocks
= blocks_allocated_pt
;
625 *extents
= num_extents
;
627 return(extra_blocks
);
631 prop_freespace_cursor(xfs_mount_t
*mp
, xfs_agnumber_t agno
,
632 bt_status_t
*btree_curs
, xfs_agblock_t startblock
,
633 xfs_extlen_t blockcount
, int level
, __uint32_t magic
)
635 struct xfs_btree_block
*bt_hdr
;
636 xfs_alloc_key_t
*bt_key
;
637 xfs_alloc_ptr_t
*bt_ptr
;
639 bt_stat_level_t
*lptr
;
640 __uint32_t crc_magic
;
642 if (magic
== XFS_ABTB_MAGIC
)
643 crc_magic
= XFS_ABTB_CRC_MAGIC
;
645 crc_magic
= XFS_ABTC_CRC_MAGIC
;
649 if (level
>= btree_curs
->num_levels
)
652 lptr
= &btree_curs
->level
[level
];
653 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
655 if (be16_to_cpu(bt_hdr
->bb_numrecs
) == 0) {
657 * only happens once when initializing the
658 * left-hand side of the tree.
660 prop_freespace_cursor(mp
, agno
, btree_curs
, startblock
,
661 blockcount
, level
, magic
);
664 if (be16_to_cpu(bt_hdr
->bb_numrecs
) ==
665 lptr
->num_recs_pb
+ (lptr
->modulo
> 0)) {
667 * write out current prev block, grab us a new block,
668 * and set the rightsib pointer of current block
670 #ifdef XR_BLD_FREE_TRACE
671 fprintf(stderr
, " %d ", lptr
->prev_agbno
);
673 if (lptr
->prev_agbno
!= NULLAGBLOCK
) {
674 ASSERT(lptr
->prev_buf_p
!= NULL
);
675 libxfs_writebuf(lptr
->prev_buf_p
, 0);
677 lptr
->prev_agbno
= lptr
->agbno
;;
678 lptr
->prev_buf_p
= lptr
->buf_p
;
679 agbno
= get_next_blockaddr(agno
, level
, btree_curs
);
681 bt_hdr
->bb_u
.s
.bb_rightsib
= cpu_to_be32(agbno
);
683 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
684 XFS_AGB_TO_DADDR(mp
, agno
, agbno
),
685 XFS_FSB_TO_BB(mp
, 1));
692 * initialize block header
694 lptr
->buf_p
->b_ops
= &xfs_allocbt_buf_ops
;
695 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
696 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
697 if (xfs_sb_version_hascrc(&mp
->m_sb
))
698 libxfs_btree_init_block(mp
, lptr
->buf_p
, crc_magic
, level
,
699 0, agno
, XFS_BTREE_CRC_BLOCKS
);
701 libxfs_btree_init_block(mp
, lptr
->buf_p
, magic
, level
,
704 bt_hdr
->bb_u
.s
.bb_leftsib
= cpu_to_be32(lptr
->prev_agbno
);
707 * propagate extent record for first extent in new block up
709 prop_freespace_cursor(mp
, agno
, btree_curs
, startblock
,
710 blockcount
, level
, magic
);
713 * add extent info to current block
715 be16_add_cpu(&bt_hdr
->bb_numrecs
, 1);
717 bt_key
= XFS_ALLOC_KEY_ADDR(mp
, bt_hdr
,
718 be16_to_cpu(bt_hdr
->bb_numrecs
));
719 bt_ptr
= XFS_ALLOC_PTR_ADDR(mp
, bt_hdr
,
720 be16_to_cpu(bt_hdr
->bb_numrecs
),
723 bt_key
->ar_startblock
= cpu_to_be32(startblock
);
724 bt_key
->ar_blockcount
= cpu_to_be32(blockcount
);
725 *bt_ptr
= cpu_to_be32(btree_curs
->level
[level
-1].agbno
);
729 * rebuilds a freespace tree given a cursor and magic number of type
730 * of tree to build (bno or bcnt). returns the number of free blocks
731 * represented by the tree.
734 build_freespace_tree(xfs_mount_t
*mp
, xfs_agnumber_t agno
,
735 bt_status_t
*btree_curs
, __uint32_t magic
)
739 struct xfs_btree_block
*bt_hdr
;
740 xfs_alloc_rec_t
*bt_rec
;
743 extent_tree_node_t
*ext_ptr
;
744 bt_stat_level_t
*lptr
;
745 xfs_extlen_t freeblks
;
746 __uint32_t crc_magic
;
748 #ifdef XR_BLD_FREE_TRACE
749 fprintf(stderr
, "in build_freespace_tree, agno = %d\n", agno
);
751 level
= btree_curs
->num_levels
;
755 if (magic
== XFS_ABTB_MAGIC
)
756 crc_magic
= XFS_ABTB_CRC_MAGIC
;
758 crc_magic
= XFS_ABTC_CRC_MAGIC
;
761 * initialize the first block on each btree level
763 for (i
= 0; i
< level
; i
++) {
764 lptr
= &btree_curs
->level
[i
];
766 agbno
= get_next_blockaddr(agno
, i
, btree_curs
);
767 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
768 XFS_AGB_TO_DADDR(mp
, agno
, agbno
),
769 XFS_FSB_TO_BB(mp
, 1));
771 if (i
== btree_curs
->num_levels
- 1)
772 btree_curs
->root
= agbno
;
775 lptr
->prev_agbno
= NULLAGBLOCK
;
776 lptr
->prev_buf_p
= NULL
;
778 * initialize block header
780 lptr
->buf_p
->b_ops
= &xfs_allocbt_buf_ops
;
781 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
782 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
783 if (xfs_sb_version_hascrc(&mp
->m_sb
))
784 libxfs_btree_init_block(mp
, lptr
->buf_p
, crc_magic
, i
,
785 0, agno
, XFS_BTREE_CRC_BLOCKS
);
787 libxfs_btree_init_block(mp
, lptr
->buf_p
, magic
, i
,
791 * run along leaf, setting up records. as we have to switch
792 * blocks, call the prop_freespace_cursor routine to set up the new
793 * pointers for the parent. that can recurse up to the root
794 * if required. set the sibling pointers for leaf level here.
796 if (magic
== XFS_ABTB_MAGIC
)
797 ext_ptr
= findfirst_bno_extent(agno
);
799 ext_ptr
= findfirst_bcnt_extent(agno
);
801 #ifdef XR_BLD_FREE_TRACE
802 fprintf(stderr
, "bft, agno = %d, start = %u, count = %u\n",
803 agno
, ext_ptr
->ex_startblock
, ext_ptr
->ex_blockcount
);
806 lptr
= &btree_curs
->level
[0];
808 for (i
= 0; i
< btree_curs
->level
[0].num_blocks
; i
++) {
810 * block initialization, lay in block header
812 lptr
->buf_p
->b_ops
= &xfs_allocbt_buf_ops
;
813 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
814 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
815 if (xfs_sb_version_hascrc(&mp
->m_sb
))
816 libxfs_btree_init_block(mp
, lptr
->buf_p
, crc_magic
, 0,
817 0, agno
, XFS_BTREE_CRC_BLOCKS
);
819 libxfs_btree_init_block(mp
, lptr
->buf_p
, magic
, 0,
822 bt_hdr
->bb_u
.s
.bb_leftsib
= cpu_to_be32(lptr
->prev_agbno
);
823 bt_hdr
->bb_numrecs
= cpu_to_be16(lptr
->num_recs_pb
+
825 #ifdef XR_BLD_FREE_TRACE
826 fprintf(stderr
, "bft, bb_numrecs = %d\n",
827 be16_to_cpu(bt_hdr
->bb_numrecs
));
830 if (lptr
->modulo
> 0)
834 * initialize values in the path up to the root if
835 * this is a multi-level btree
837 if (btree_curs
->num_levels
> 1)
838 prop_freespace_cursor(mp
, agno
, btree_curs
,
839 ext_ptr
->ex_startblock
,
840 ext_ptr
->ex_blockcount
,
843 bt_rec
= (xfs_alloc_rec_t
*)
844 ((char *)bt_hdr
+ XFS_ALLOC_BLOCK_LEN(mp
));
845 for (j
= 0; j
< be16_to_cpu(bt_hdr
->bb_numrecs
); j
++) {
846 ASSERT(ext_ptr
!= NULL
);
847 bt_rec
[j
].ar_startblock
= cpu_to_be32(
848 ext_ptr
->ex_startblock
);
849 bt_rec
[j
].ar_blockcount
= cpu_to_be32(
850 ext_ptr
->ex_blockcount
);
851 freeblks
+= ext_ptr
->ex_blockcount
;
852 if (magic
== XFS_ABTB_MAGIC
)
853 ext_ptr
= findnext_bno_extent(ext_ptr
);
855 ext_ptr
= findnext_bcnt_extent(agno
, ext_ptr
);
857 #ifdef XR_BLD_FREE_TRACE
859 fprintf(stderr
, "null extent pointer, j = %d\n",
863 "bft, agno = %d, start = %u, count = %u\n",
864 agno
, ext_ptr
->ex_startblock
,
865 ext_ptr
->ex_blockcount
);
870 if (ext_ptr
!= NULL
) {
872 * get next leaf level block
874 if (lptr
->prev_buf_p
!= NULL
) {
875 #ifdef XR_BLD_FREE_TRACE
876 fprintf(stderr
, " writing fst agbno %u\n",
879 ASSERT(lptr
->prev_agbno
!= NULLAGBLOCK
);
880 libxfs_writebuf(lptr
->prev_buf_p
, 0);
882 lptr
->prev_buf_p
= lptr
->buf_p
;
883 lptr
->prev_agbno
= lptr
->agbno
;
884 lptr
->agbno
= get_next_blockaddr(agno
, 0, btree_curs
);
885 bt_hdr
->bb_u
.s
.bb_rightsib
= cpu_to_be32(lptr
->agbno
);
887 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
888 XFS_AGB_TO_DADDR(mp
, agno
, lptr
->agbno
),
889 XFS_FSB_TO_BB(mp
, 1));
897 * XXX(hch): any reason we don't just look at mp->m_inobt_mxr?
899 #define XR_INOBT_BLOCK_MAXRECS(mp, level) \
900 libxfs_inobt_maxrecs((mp), (mp)->m_sb.sb_blocksize, \
904 * we don't have to worry here about how chewing up free extents
905 * may perturb things because inode tree building happens before
906 * freespace tree building.
909 init_ino_cursor(xfs_mount_t
*mp
, xfs_agnumber_t agno
, bt_status_t
*btree_curs
,
910 __uint64_t
*num_inos
, __uint64_t
*num_free_inos
, int finobt
)
916 ino_tree_node_t
*ino_rec
;
919 bt_stat_level_t
*lptr
;
920 bt_stat_level_t
*p_lptr
;
921 xfs_extlen_t blocks_allocated
;
924 *num_inos
= *num_free_inos
= 0;
927 lptr
= &btree_curs
->level
[0];
928 btree_curs
->init
= 1;
929 btree_curs
->owner
= XFS_RMAP_OWN_INOBT
;
932 * build up statistics
934 ino_rec
= findfirst_inode_rec(agno
);
935 for (num_recs
= 0; ino_rec
!= NULL
; ino_rec
= next_ino_rec(ino_rec
)) {
938 for (i
= 0; i
< XFS_INODES_PER_CHUNK
; i
++) {
939 ASSERT(is_inode_confirmed(ino_rec
, i
));
941 * sparse inodes are not factored into superblock (free)
944 if (is_inode_sparse(ino_rec
, i
))
946 if (is_inode_free(ino_rec
, i
))
952 * finobt only considers records with free inodes
954 if (finobt
&& !rec_nfinos
)
957 nfinos
+= rec_nfinos
;
964 * easy corner-case -- no inode records
966 lptr
->num_blocks
= 1;
968 lptr
->num_recs_pb
= 0;
969 lptr
->num_recs_tot
= 0;
971 btree_curs
->num_levels
= 1;
972 btree_curs
->num_tot_blocks
= btree_curs
->num_free_blocks
= 1;
974 setup_cursor(mp
, agno
, btree_curs
);
979 blocks_allocated
= lptr
->num_blocks
= howmany(num_recs
,
980 XR_INOBT_BLOCK_MAXRECS(mp
, 0));
982 lptr
->modulo
= num_recs
% lptr
->num_blocks
;
983 lptr
->num_recs_pb
= num_recs
/ lptr
->num_blocks
;
984 lptr
->num_recs_tot
= num_recs
;
987 if (lptr
->num_blocks
> 1) {
988 for (; btree_curs
->level
[level
-1].num_blocks
> 1
989 && level
< XFS_BTREE_MAXLEVELS
;
991 lptr
= &btree_curs
->level
[level
];
992 p_lptr
= &btree_curs
->level
[level
- 1];
993 lptr
->num_blocks
= howmany(p_lptr
->num_blocks
,
994 XR_INOBT_BLOCK_MAXRECS(mp
, level
));
995 lptr
->modulo
= p_lptr
->num_blocks
% lptr
->num_blocks
;
996 lptr
->num_recs_pb
= p_lptr
->num_blocks
998 lptr
->num_recs_tot
= p_lptr
->num_blocks
;
1000 blocks_allocated
+= lptr
->num_blocks
;
1003 ASSERT(lptr
->num_blocks
== 1);
1004 btree_curs
->num_levels
= level
;
1006 btree_curs
->num_tot_blocks
= btree_curs
->num_free_blocks
1009 setup_cursor(mp
, agno
, btree_curs
);
1012 *num_free_inos
= nfinos
;
1018 prop_ino_cursor(xfs_mount_t
*mp
, xfs_agnumber_t agno
, bt_status_t
*btree_curs
,
1019 xfs_agino_t startino
, int level
)
1021 struct xfs_btree_block
*bt_hdr
;
1022 xfs_inobt_key_t
*bt_key
;
1023 xfs_inobt_ptr_t
*bt_ptr
;
1024 xfs_agblock_t agbno
;
1025 bt_stat_level_t
*lptr
;
1029 if (level
>= btree_curs
->num_levels
)
1032 lptr
= &btree_curs
->level
[level
];
1033 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1035 if (be16_to_cpu(bt_hdr
->bb_numrecs
) == 0) {
1037 * this only happens once to initialize the
1038 * first path up the left side of the tree
1039 * where the agbno's are already set up
1041 prop_ino_cursor(mp
, agno
, btree_curs
, startino
, level
);
1044 if (be16_to_cpu(bt_hdr
->bb_numrecs
) ==
1045 lptr
->num_recs_pb
+ (lptr
->modulo
> 0)) {
1047 * write out current prev block, grab us a new block,
1048 * and set the rightsib pointer of current block
1050 #ifdef XR_BLD_INO_TRACE
1051 fprintf(stderr
, " ino prop agbno %d ", lptr
->prev_agbno
);
1053 if (lptr
->prev_agbno
!= NULLAGBLOCK
) {
1054 ASSERT(lptr
->prev_buf_p
!= NULL
);
1055 libxfs_writebuf(lptr
->prev_buf_p
, 0);
1057 lptr
->prev_agbno
= lptr
->agbno
;;
1058 lptr
->prev_buf_p
= lptr
->buf_p
;
1059 agbno
= get_next_blockaddr(agno
, level
, btree_curs
);
1061 bt_hdr
->bb_u
.s
.bb_rightsib
= cpu_to_be32(agbno
);
1063 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
1064 XFS_AGB_TO_DADDR(mp
, agno
, agbno
),
1065 XFS_FSB_TO_BB(mp
, 1));
1066 lptr
->agbno
= agbno
;
1072 * initialize block header
1074 lptr
->buf_p
->b_ops
= &xfs_inobt_buf_ops
;
1075 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1076 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
1077 if (xfs_sb_version_hascrc(&mp
->m_sb
))
1078 libxfs_btree_init_block(mp
, lptr
->buf_p
, XFS_IBT_CRC_MAGIC
,
1080 XFS_BTREE_CRC_BLOCKS
);
1082 libxfs_btree_init_block(mp
, lptr
->buf_p
, XFS_IBT_MAGIC
,
1085 bt_hdr
->bb_u
.s
.bb_leftsib
= cpu_to_be32(lptr
->prev_agbno
);
1088 * propagate extent record for first extent in new block up
1090 prop_ino_cursor(mp
, agno
, btree_curs
, startino
, level
);
1093 * add inode info to current block
1095 be16_add_cpu(&bt_hdr
->bb_numrecs
, 1);
1097 bt_key
= XFS_INOBT_KEY_ADDR(mp
, bt_hdr
,
1098 be16_to_cpu(bt_hdr
->bb_numrecs
));
1099 bt_ptr
= XFS_INOBT_PTR_ADDR(mp
, bt_hdr
,
1100 be16_to_cpu(bt_hdr
->bb_numrecs
),
1101 mp
->m_inobt_mxr
[1]);
1103 bt_key
->ir_startino
= cpu_to_be32(startino
);
1104 *bt_ptr
= cpu_to_be32(btree_curs
->level
[level
-1].agbno
);
1108 * XXX: yet more code that can be shared with mkfs, growfs.
1111 build_agi(xfs_mount_t
*mp
, xfs_agnumber_t agno
, bt_status_t
*btree_curs
,
1112 bt_status_t
*finobt_curs
, struct agi_stat
*agi_stat
)
1118 agi_buf
= libxfs_getbuf(mp
->m_dev
,
1119 XFS_AG_DADDR(mp
, agno
, XFS_AGI_DADDR(mp
)),
1120 mp
->m_sb
.sb_sectsize
/BBSIZE
);
1121 agi_buf
->b_ops
= &xfs_agi_buf_ops
;
1122 agi
= XFS_BUF_TO_AGI(agi_buf
);
1123 memset(agi
, 0, mp
->m_sb
.sb_sectsize
);
1125 agi
->agi_magicnum
= cpu_to_be32(XFS_AGI_MAGIC
);
1126 agi
->agi_versionnum
= cpu_to_be32(XFS_AGI_VERSION
);
1127 agi
->agi_seqno
= cpu_to_be32(agno
);
1128 if (agno
< mp
->m_sb
.sb_agcount
- 1)
1129 agi
->agi_length
= cpu_to_be32(mp
->m_sb
.sb_agblocks
);
1131 agi
->agi_length
= cpu_to_be32(mp
->m_sb
.sb_dblocks
-
1132 (xfs_rfsblock_t
) mp
->m_sb
.sb_agblocks
* agno
);
1133 agi
->agi_count
= cpu_to_be32(agi_stat
->count
);
1134 agi
->agi_root
= cpu_to_be32(btree_curs
->root
);
1135 agi
->agi_level
= cpu_to_be32(btree_curs
->num_levels
);
1136 agi
->agi_freecount
= cpu_to_be32(agi_stat
->freecount
);
1137 agi
->agi_newino
= cpu_to_be32(agi_stat
->first_agino
);
1138 agi
->agi_dirino
= cpu_to_be32(NULLAGINO
);
1140 for (i
= 0; i
< XFS_AGI_UNLINKED_BUCKETS
; i
++)
1141 agi
->agi_unlinked
[i
] = cpu_to_be32(NULLAGINO
);
1143 if (xfs_sb_version_hascrc(&mp
->m_sb
))
1144 platform_uuid_copy(&agi
->agi_uuid
, &mp
->m_sb
.sb_meta_uuid
);
1146 if (xfs_sb_version_hasfinobt(&mp
->m_sb
)) {
1147 agi
->agi_free_root
= cpu_to_be32(finobt_curs
->root
);
1148 agi
->agi_free_level
= cpu_to_be32(finobt_curs
->num_levels
);
1151 libxfs_writebuf(agi_buf
, 0);
1155 * rebuilds an inode tree given a cursor. We're lazy here and call
1156 * the routine that builds the agi
1159 build_ino_tree(xfs_mount_t
*mp
, xfs_agnumber_t agno
,
1160 bt_status_t
*btree_curs
, __uint32_t magic
,
1161 struct agi_stat
*agi_stat
, int finobt
)
1165 xfs_agblock_t agbno
;
1166 xfs_agino_t first_agino
;
1167 struct xfs_btree_block
*bt_hdr
;
1168 xfs_inobt_rec_t
*bt_rec
;
1169 ino_tree_node_t
*ino_rec
;
1170 bt_stat_level_t
*lptr
;
1171 xfs_agino_t count
= 0;
1172 xfs_agino_t freecount
= 0;
1176 int level
= btree_curs
->num_levels
;
1181 for (i
= 0; i
< level
; i
++) {
1182 lptr
= &btree_curs
->level
[i
];
1184 agbno
= get_next_blockaddr(agno
, i
, btree_curs
);
1185 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
1186 XFS_AGB_TO_DADDR(mp
, agno
, agbno
),
1187 XFS_FSB_TO_BB(mp
, 1));
1189 if (i
== btree_curs
->num_levels
- 1)
1190 btree_curs
->root
= agbno
;
1192 lptr
->agbno
= agbno
;
1193 lptr
->prev_agbno
= NULLAGBLOCK
;
1194 lptr
->prev_buf_p
= NULL
;
1196 * initialize block header
1199 lptr
->buf_p
->b_ops
= &xfs_inobt_buf_ops
;
1200 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1201 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
1202 if (xfs_sb_version_hascrc(&mp
->m_sb
))
1203 libxfs_btree_init_block(mp
, lptr
->buf_p
, magic
,
1205 XFS_BTREE_CRC_BLOCKS
);
1207 libxfs_btree_init_block(mp
, lptr
->buf_p
, magic
,
1212 * run along leaf, setting up records. as we have to switch
1213 * blocks, call the prop_ino_cursor routine to set up the new
1214 * pointers for the parent. that can recurse up to the root
1215 * if required. set the sibling pointers for leaf level here.
1218 ino_rec
= findfirst_free_inode_rec(agno
);
1220 ino_rec
= findfirst_inode_rec(agno
);
1222 if (ino_rec
!= NULL
)
1223 first_agino
= ino_rec
->ino_startnum
;
1225 first_agino
= NULLAGINO
;
1227 lptr
= &btree_curs
->level
[0];
1229 for (i
= 0; i
< lptr
->num_blocks
; i
++) {
1231 * block initialization, lay in block header
1233 lptr
->buf_p
->b_ops
= &xfs_inobt_buf_ops
;
1234 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1235 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
1236 if (xfs_sb_version_hascrc(&mp
->m_sb
))
1237 libxfs_btree_init_block(mp
, lptr
->buf_p
, magic
,
1239 XFS_BTREE_CRC_BLOCKS
);
1241 libxfs_btree_init_block(mp
, lptr
->buf_p
, magic
,
1244 bt_hdr
->bb_u
.s
.bb_leftsib
= cpu_to_be32(lptr
->prev_agbno
);
1245 bt_hdr
->bb_numrecs
= cpu_to_be16(lptr
->num_recs_pb
+
1246 (lptr
->modulo
> 0));
1248 if (lptr
->modulo
> 0)
1251 if (lptr
->num_recs_pb
> 0)
1252 prop_ino_cursor(mp
, agno
, btree_curs
,
1253 ino_rec
->ino_startnum
, 0);
1255 bt_rec
= (xfs_inobt_rec_t
*)
1256 ((char *)bt_hdr
+ XFS_INOBT_BLOCK_LEN(mp
));
1257 for (j
= 0; j
< be16_to_cpu(bt_hdr
->bb_numrecs
); j
++) {
1258 ASSERT(ino_rec
!= NULL
);
1259 bt_rec
[j
].ir_startino
=
1260 cpu_to_be32(ino_rec
->ino_startnum
);
1261 bt_rec
[j
].ir_free
= cpu_to_be64(ino_rec
->ir_free
);
1263 inocnt
= finocnt
= 0;
1264 for (k
= 0; k
< sizeof(xfs_inofree_t
)*NBBY
; k
++) {
1265 ASSERT(is_inode_confirmed(ino_rec
, k
));
1267 if (is_inode_sparse(ino_rec
, k
))
1269 if (is_inode_free(ino_rec
, k
))
1275 * Set the freecount and check whether we need to update
1276 * the sparse format fields. Otherwise, skip to the next
1279 inorec_set_freecount(mp
, &bt_rec
[j
], finocnt
);
1280 if (!xfs_sb_version_hassparseinodes(&mp
->m_sb
))
1284 * Convert the 64-bit in-core sparse inode state to the
1285 * 16-bit on-disk holemask.
1288 spmask
= (1 << XFS_INODES_PER_HOLEMASK_BIT
) - 1;
1289 sparse
= ino_rec
->ir_sparse
;
1290 for (k
= 0; k
< XFS_INOBT_HOLEMASK_BITS
; k
++) {
1291 if (sparse
& spmask
) {
1292 ASSERT((sparse
& spmask
) == spmask
);
1293 holemask
|= (1 << k
);
1295 ASSERT((sparse
& spmask
) == 0);
1296 sparse
>>= XFS_INODES_PER_HOLEMASK_BIT
;
1299 bt_rec
[j
].ir_u
.sp
.ir_count
= inocnt
;
1300 bt_rec
[j
].ir_u
.sp
.ir_holemask
= cpu_to_be16(holemask
);
1303 freecount
+= finocnt
;
1307 ino_rec
= next_free_ino_rec(ino_rec
);
1309 ino_rec
= next_ino_rec(ino_rec
);
1312 if (ino_rec
!= NULL
) {
1314 * get next leaf level block
1316 if (lptr
->prev_buf_p
!= NULL
) {
1317 #ifdef XR_BLD_INO_TRACE
1318 fprintf(stderr
, "writing inobt agbno %u\n",
1321 ASSERT(lptr
->prev_agbno
!= NULLAGBLOCK
);
1322 libxfs_writebuf(lptr
->prev_buf_p
, 0);
1324 lptr
->prev_buf_p
= lptr
->buf_p
;
1325 lptr
->prev_agbno
= lptr
->agbno
;
1326 lptr
->agbno
= get_next_blockaddr(agno
, 0, btree_curs
);
1327 bt_hdr
->bb_u
.s
.bb_rightsib
= cpu_to_be32(lptr
->agbno
);
1329 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
1330 XFS_AGB_TO_DADDR(mp
, agno
, lptr
->agbno
),
1331 XFS_FSB_TO_BB(mp
, 1));
1336 agi_stat
->first_agino
= first_agino
;
1337 agi_stat
->count
= count
;
1338 agi_stat
->freecount
= freecount
;
1342 /* rebuild the rmap tree */
1345 * we don't have to worry here about how chewing up free extents
1346 * may perturb things because rmap tree building happens before
1347 * freespace tree building.
1351 struct xfs_mount
*mp
,
1352 xfs_agnumber_t agno
,
1353 struct bt_status
*btree_curs
)
1357 struct bt_stat_level
*lptr
;
1358 struct bt_stat_level
*p_lptr
;
1359 xfs_extlen_t blocks_allocated
;
1362 if (!xfs_sb_version_hasrmapbt(&mp
->m_sb
)) {
1363 memset(btree_curs
, 0, sizeof(struct bt_status
));
1367 lptr
= &btree_curs
->level
[0];
1368 btree_curs
->init
= 1;
1369 btree_curs
->owner
= XFS_RMAP_OWN_AG
;
1372 * build up statistics
1374 num_recs
= rmap_record_count(mp
, agno
);
1375 if (num_recs
== 0) {
1377 * easy corner-case -- no rmap records
1379 lptr
->num_blocks
= 1;
1381 lptr
->num_recs_pb
= 0;
1382 lptr
->num_recs_tot
= 0;
1384 btree_curs
->num_levels
= 1;
1385 btree_curs
->num_tot_blocks
= btree_curs
->num_free_blocks
= 1;
1387 setup_cursor(mp
, agno
, btree_curs
);
1393 * Leave enough slack in the rmapbt that we can insert the
1394 * metadata AG entries without too many splits.
1396 maxrecs
= mp
->m_rmap_mxr
[0];
1397 if (num_recs
> maxrecs
)
1399 blocks_allocated
= lptr
->num_blocks
= howmany(num_recs
, maxrecs
);
1401 lptr
->modulo
= num_recs
% lptr
->num_blocks
;
1402 lptr
->num_recs_pb
= num_recs
/ lptr
->num_blocks
;
1403 lptr
->num_recs_tot
= num_recs
;
1406 if (lptr
->num_blocks
> 1) {
1407 for (; btree_curs
->level
[level
-1].num_blocks
> 1
1408 && level
< XFS_BTREE_MAXLEVELS
;
1410 lptr
= &btree_curs
->level
[level
];
1411 p_lptr
= &btree_curs
->level
[level
- 1];
1412 lptr
->num_blocks
= howmany(p_lptr
->num_blocks
,
1414 lptr
->modulo
= p_lptr
->num_blocks
% lptr
->num_blocks
;
1415 lptr
->num_recs_pb
= p_lptr
->num_blocks
1417 lptr
->num_recs_tot
= p_lptr
->num_blocks
;
1419 blocks_allocated
+= lptr
->num_blocks
;
1422 ASSERT(lptr
->num_blocks
== 1);
1423 btree_curs
->num_levels
= level
;
1425 btree_curs
->num_tot_blocks
= btree_curs
->num_free_blocks
1428 setup_cursor(mp
, agno
, btree_curs
);
1433 struct xfs_mount
*mp
,
1434 xfs_agnumber_t agno
,
1435 struct bt_status
*btree_curs
,
1436 struct xfs_rmap_irec
*rm_rec
,
1439 struct xfs_btree_block
*bt_hdr
;
1440 struct xfs_rmap_key
*bt_key
;
1441 xfs_rmap_ptr_t
*bt_ptr
;
1442 xfs_agblock_t agbno
;
1443 struct bt_stat_level
*lptr
;
1447 if (level
>= btree_curs
->num_levels
)
1450 lptr
= &btree_curs
->level
[level
];
1451 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1453 if (be16_to_cpu(bt_hdr
->bb_numrecs
) == 0) {
1455 * this only happens once to initialize the
1456 * first path up the left side of the tree
1457 * where the agbno's are already set up
1459 prop_rmap_cursor(mp
, agno
, btree_curs
, rm_rec
, level
);
1462 if (be16_to_cpu(bt_hdr
->bb_numrecs
) ==
1463 lptr
->num_recs_pb
+ (lptr
->modulo
> 0)) {
1465 * write out current prev block, grab us a new block,
1466 * and set the rightsib pointer of current block
1468 #ifdef XR_BLD_INO_TRACE
1469 fprintf(stderr
, " rmap prop agbno %d ", lptr
->prev_agbno
);
1471 if (lptr
->prev_agbno
!= NULLAGBLOCK
) {
1472 ASSERT(lptr
->prev_buf_p
!= NULL
);
1473 libxfs_writebuf(lptr
->prev_buf_p
, 0);
1475 lptr
->prev_agbno
= lptr
->agbno
;
1476 lptr
->prev_buf_p
= lptr
->buf_p
;
1477 agbno
= get_next_blockaddr(agno
, level
, btree_curs
);
1479 bt_hdr
->bb_u
.s
.bb_rightsib
= cpu_to_be32(agbno
);
1481 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
1482 XFS_AGB_TO_DADDR(mp
, agno
, agbno
),
1483 XFS_FSB_TO_BB(mp
, 1));
1484 lptr
->agbno
= agbno
;
1490 * initialize block header
1492 lptr
->buf_p
->b_ops
= &xfs_rmapbt_buf_ops
;
1493 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1494 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
1495 libxfs_btree_init_block(mp
, lptr
->buf_p
, XFS_RMAP_CRC_MAGIC
,
1497 XFS_BTREE_CRC_BLOCKS
);
1499 bt_hdr
->bb_u
.s
.bb_leftsib
= cpu_to_be32(lptr
->prev_agbno
);
1502 * propagate extent record for first extent in new block up
1504 prop_rmap_cursor(mp
, agno
, btree_curs
, rm_rec
, level
);
1507 * add rmap info to current block
1509 be16_add_cpu(&bt_hdr
->bb_numrecs
, 1);
1511 bt_key
= XFS_RMAP_KEY_ADDR(bt_hdr
,
1512 be16_to_cpu(bt_hdr
->bb_numrecs
));
1513 bt_ptr
= XFS_RMAP_PTR_ADDR(bt_hdr
,
1514 be16_to_cpu(bt_hdr
->bb_numrecs
),
1517 bt_key
->rm_startblock
= cpu_to_be32(rm_rec
->rm_startblock
);
1518 bt_key
->rm_owner
= cpu_to_be64(rm_rec
->rm_owner
);
1519 bt_key
->rm_offset
= cpu_to_be64(rm_rec
->rm_offset
);
1521 *bt_ptr
= cpu_to_be32(btree_curs
->level
[level
-1].agbno
);
1526 struct xfs_mount
*mp
,
1527 xfs_agnumber_t agno
,
1528 struct bt_status
*btree_curs
,
1529 struct xfs_rmap_irec
*rm_highkey
)
1531 struct xfs_btree_block
*bt_hdr
;
1532 struct xfs_rmap_key
*bt_key
;
1533 struct bt_stat_level
*lptr
;
1534 struct xfs_rmap_irec key
= {0};
1535 struct xfs_rmap_irec high_key
;
1540 high_key
= *rm_highkey
;
1541 for (level
= 1; level
< btree_curs
->num_levels
; level
++) {
1542 lptr
= &btree_curs
->level
[level
];
1543 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1544 numrecs
= be16_to_cpu(bt_hdr
->bb_numrecs
);
1545 bt_key
= XFS_RMAP_HIGH_KEY_ADDR(bt_hdr
, numrecs
);
1547 bt_key
->rm_startblock
= cpu_to_be32(high_key
.rm_startblock
);
1548 bt_key
->rm_owner
= cpu_to_be64(high_key
.rm_owner
);
1549 bt_key
->rm_offset
= cpu_to_be64(
1550 libxfs_rmap_irec_offset_pack(&high_key
));
1552 for (i
= 1; i
< numrecs
- 1; i
++) {
1553 bt_key
= XFS_RMAP_HIGH_KEY_ADDR(bt_hdr
, i
);
1554 key
.rm_startblock
= be32_to_cpu(bt_key
->rm_startblock
);
1555 key
.rm_owner
= be64_to_cpu(bt_key
->rm_owner
);
1556 key
.rm_offset
= be64_to_cpu(bt_key
->rm_offset
);
1557 if (rmap_diffkeys(&key
, &high_key
) > 0)
1564 * rebuilds a rmap btree given a cursor.
1568 struct xfs_mount
*mp
,
1569 xfs_agnumber_t agno
,
1570 struct bt_status
*btree_curs
)
1574 xfs_agblock_t agbno
;
1575 struct xfs_btree_block
*bt_hdr
;
1576 struct xfs_rmap_irec
*rm_rec
;
1577 struct xfs_slab_cursor
*rmap_cur
;
1578 struct xfs_rmap_rec
*bt_rec
;
1579 struct xfs_rmap_irec highest_key
= {0};
1580 struct xfs_rmap_irec hi_key
= {0};
1581 struct bt_stat_level
*lptr
;
1582 int level
= btree_curs
->num_levels
;
1585 highest_key
.rm_flags
= 0;
1586 for (i
= 0; i
< level
; i
++) {
1587 lptr
= &btree_curs
->level
[i
];
1589 agbno
= get_next_blockaddr(agno
, i
, btree_curs
);
1590 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
1591 XFS_AGB_TO_DADDR(mp
, agno
, agbno
),
1592 XFS_FSB_TO_BB(mp
, 1));
1594 if (i
== btree_curs
->num_levels
- 1)
1595 btree_curs
->root
= agbno
;
1597 lptr
->agbno
= agbno
;
1598 lptr
->prev_agbno
= NULLAGBLOCK
;
1599 lptr
->prev_buf_p
= NULL
;
1601 * initialize block header
1604 lptr
->buf_p
->b_ops
= &xfs_rmapbt_buf_ops
;
1605 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1606 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
1607 libxfs_btree_init_block(mp
, lptr
->buf_p
, XFS_RMAP_CRC_MAGIC
,
1609 XFS_BTREE_CRC_BLOCKS
);
1613 * run along leaf, setting up records. as we have to switch
1614 * blocks, call the prop_rmap_cursor routine to set up the new
1615 * pointers for the parent. that can recurse up to the root
1616 * if required. set the sibling pointers for leaf level here.
1618 error
= rmap_init_cursor(agno
, &rmap_cur
);
1621 _("Insufficient memory to construct reverse-map cursor."));
1622 rm_rec
= pop_slab_cursor(rmap_cur
);
1623 lptr
= &btree_curs
->level
[0];
1625 for (i
= 0; i
< lptr
->num_blocks
&& rm_rec
!= NULL
; i
++) {
1627 * block initialization, lay in block header
1629 lptr
->buf_p
->b_ops
= &xfs_rmapbt_buf_ops
;
1630 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1631 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
1632 libxfs_btree_init_block(mp
, lptr
->buf_p
, XFS_RMAP_CRC_MAGIC
,
1634 XFS_BTREE_CRC_BLOCKS
);
1636 bt_hdr
->bb_u
.s
.bb_leftsib
= cpu_to_be32(lptr
->prev_agbno
);
1637 bt_hdr
->bb_numrecs
= cpu_to_be16(lptr
->num_recs_pb
+
1638 (lptr
->modulo
> 0));
1640 if (lptr
->modulo
> 0)
1643 if (lptr
->num_recs_pb
> 0) {
1644 ASSERT(rm_rec
!= NULL
);
1645 prop_rmap_cursor(mp
, agno
, btree_curs
, rm_rec
, 0);
1648 bt_rec
= (struct xfs_rmap_rec
*)
1649 ((char *)bt_hdr
+ XFS_RMAP_BLOCK_LEN
);
1650 highest_key
.rm_startblock
= 0;
1651 highest_key
.rm_owner
= 0;
1652 highest_key
.rm_offset
= 0;
1653 for (j
= 0; j
< be16_to_cpu(bt_hdr
->bb_numrecs
); j
++) {
1654 ASSERT(rm_rec
!= NULL
);
1655 bt_rec
[j
].rm_startblock
=
1656 cpu_to_be32(rm_rec
->rm_startblock
);
1657 bt_rec
[j
].rm_blockcount
=
1658 cpu_to_be32(rm_rec
->rm_blockcount
);
1659 bt_rec
[j
].rm_owner
= cpu_to_be64(rm_rec
->rm_owner
);
1660 bt_rec
[j
].rm_offset
= cpu_to_be64(
1661 libxfs_rmap_irec_offset_pack(rm_rec
));
1662 rmap_high_key_from_rec(rm_rec
, &hi_key
);
1663 if (rmap_diffkeys(&hi_key
, &highest_key
) > 0)
1664 highest_key
= hi_key
;
1666 rm_rec
= pop_slab_cursor(rmap_cur
);
1669 /* Now go set the parent key */
1670 prop_rmap_highkey(mp
, agno
, btree_curs
, &highest_key
);
1672 if (rm_rec
!= NULL
) {
1674 * get next leaf level block
1676 if (lptr
->prev_buf_p
!= NULL
) {
1677 #ifdef XR_BLD_RL_TRACE
1678 fprintf(stderr
, "writing rmapbt agbno %u\n",
1681 ASSERT(lptr
->prev_agbno
!= NULLAGBLOCK
);
1682 libxfs_writebuf(lptr
->prev_buf_p
, 0);
1684 lptr
->prev_buf_p
= lptr
->buf_p
;
1685 lptr
->prev_agbno
= lptr
->agbno
;
1686 lptr
->agbno
= get_next_blockaddr(agno
, 0, btree_curs
);
1687 bt_hdr
->bb_u
.s
.bb_rightsib
= cpu_to_be32(lptr
->agbno
);
1689 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
1690 XFS_AGB_TO_DADDR(mp
, agno
, lptr
->agbno
),
1691 XFS_FSB_TO_BB(mp
, 1));
1694 free_slab_cursor(&rmap_cur
);
1697 /* rebuild the refcount tree */
1700 * we don't have to worry here about how chewing up free extents
1701 * may perturb things because reflink tree building happens before
1702 * freespace tree building.
1706 struct xfs_mount
*mp
,
1707 xfs_agnumber_t agno
,
1708 struct bt_status
*btree_curs
)
1712 struct bt_stat_level
*lptr
;
1713 struct bt_stat_level
*p_lptr
;
1714 xfs_extlen_t blocks_allocated
;
1716 if (!xfs_sb_version_hasreflink(&mp
->m_sb
)) {
1717 memset(btree_curs
, 0, sizeof(struct bt_status
));
1721 lptr
= &btree_curs
->level
[0];
1722 btree_curs
->init
= 1;
1723 btree_curs
->owner
= XFS_RMAP_OWN_REFC
;
1726 * build up statistics
1728 num_recs
= refcount_record_count(mp
, agno
);
1729 if (num_recs
== 0) {
1731 * easy corner-case -- no refcount records
1733 lptr
->num_blocks
= 1;
1735 lptr
->num_recs_pb
= 0;
1736 lptr
->num_recs_tot
= 0;
1738 btree_curs
->num_levels
= 1;
1739 btree_curs
->num_tot_blocks
= btree_curs
->num_free_blocks
= 1;
1741 setup_cursor(mp
, agno
, btree_curs
);
1746 blocks_allocated
= lptr
->num_blocks
= howmany(num_recs
,
1749 lptr
->modulo
= num_recs
% lptr
->num_blocks
;
1750 lptr
->num_recs_pb
= num_recs
/ lptr
->num_blocks
;
1751 lptr
->num_recs_tot
= num_recs
;
1754 if (lptr
->num_blocks
> 1) {
1755 for (; btree_curs
->level
[level
-1].num_blocks
> 1
1756 && level
< XFS_BTREE_MAXLEVELS
;
1758 lptr
= &btree_curs
->level
[level
];
1759 p_lptr
= &btree_curs
->level
[level
- 1];
1760 lptr
->num_blocks
= howmany(p_lptr
->num_blocks
,
1762 lptr
->modulo
= p_lptr
->num_blocks
% lptr
->num_blocks
;
1763 lptr
->num_recs_pb
= p_lptr
->num_blocks
1765 lptr
->num_recs_tot
= p_lptr
->num_blocks
;
1767 blocks_allocated
+= lptr
->num_blocks
;
1770 ASSERT(lptr
->num_blocks
== 1);
1771 btree_curs
->num_levels
= level
;
1773 btree_curs
->num_tot_blocks
= btree_curs
->num_free_blocks
1776 setup_cursor(mp
, agno
, btree_curs
);
1781 struct xfs_mount
*mp
,
1782 xfs_agnumber_t agno
,
1783 struct bt_status
*btree_curs
,
1784 xfs_agblock_t startbno
,
1787 struct xfs_btree_block
*bt_hdr
;
1788 struct xfs_refcount_key
*bt_key
;
1789 xfs_refcount_ptr_t
*bt_ptr
;
1790 xfs_agblock_t agbno
;
1791 struct bt_stat_level
*lptr
;
1795 if (level
>= btree_curs
->num_levels
)
1798 lptr
= &btree_curs
->level
[level
];
1799 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1801 if (be16_to_cpu(bt_hdr
->bb_numrecs
) == 0) {
1803 * this only happens once to initialize the
1804 * first path up the left side of the tree
1805 * where the agbno's are already set up
1807 prop_refc_cursor(mp
, agno
, btree_curs
, startbno
, level
);
1810 if (be16_to_cpu(bt_hdr
->bb_numrecs
) ==
1811 lptr
->num_recs_pb
+ (lptr
->modulo
> 0)) {
1813 * write out current prev block, grab us a new block,
1814 * and set the rightsib pointer of current block
1816 #ifdef XR_BLD_INO_TRACE
1817 fprintf(stderr
, " ino prop agbno %d ", lptr
->prev_agbno
);
1819 if (lptr
->prev_agbno
!= NULLAGBLOCK
) {
1820 ASSERT(lptr
->prev_buf_p
!= NULL
);
1821 libxfs_writebuf(lptr
->prev_buf_p
, 0);
1823 lptr
->prev_agbno
= lptr
->agbno
;
1824 lptr
->prev_buf_p
= lptr
->buf_p
;
1825 agbno
= get_next_blockaddr(agno
, level
, btree_curs
);
1827 bt_hdr
->bb_u
.s
.bb_rightsib
= cpu_to_be32(agbno
);
1829 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
1830 XFS_AGB_TO_DADDR(mp
, agno
, agbno
),
1831 XFS_FSB_TO_BB(mp
, 1));
1832 lptr
->agbno
= agbno
;
1838 * initialize block header
1840 lptr
->buf_p
->b_ops
= &xfs_refcountbt_buf_ops
;
1841 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1842 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
1843 libxfs_btree_init_block(mp
, lptr
->buf_p
, XFS_REFC_CRC_MAGIC
,
1845 XFS_BTREE_CRC_BLOCKS
);
1847 bt_hdr
->bb_u
.s
.bb_leftsib
= cpu_to_be32(lptr
->prev_agbno
);
1850 * propagate extent record for first extent in new block up
1852 prop_refc_cursor(mp
, agno
, btree_curs
, startbno
, level
);
1855 * add inode info to current block
1857 be16_add_cpu(&bt_hdr
->bb_numrecs
, 1);
1859 bt_key
= XFS_REFCOUNT_KEY_ADDR(bt_hdr
,
1860 be16_to_cpu(bt_hdr
->bb_numrecs
));
1861 bt_ptr
= XFS_REFCOUNT_PTR_ADDR(bt_hdr
,
1862 be16_to_cpu(bt_hdr
->bb_numrecs
),
1865 bt_key
->rc_startblock
= cpu_to_be32(startbno
);
1866 *bt_ptr
= cpu_to_be32(btree_curs
->level
[level
-1].agbno
);
1870 * rebuilds a refcount btree given a cursor.
1873 build_refcount_tree(
1874 struct xfs_mount
*mp
,
1875 xfs_agnumber_t agno
,
1876 struct bt_status
*btree_curs
)
1880 xfs_agblock_t agbno
;
1881 struct xfs_btree_block
*bt_hdr
;
1882 struct xfs_refcount_irec
*refc_rec
;
1883 struct xfs_slab_cursor
*refc_cur
;
1884 struct xfs_refcount_rec
*bt_rec
;
1885 struct bt_stat_level
*lptr
;
1886 int level
= btree_curs
->num_levels
;
1889 for (i
= 0; i
< level
; i
++) {
1890 lptr
= &btree_curs
->level
[i
];
1892 agbno
= get_next_blockaddr(agno
, i
, btree_curs
);
1893 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
1894 XFS_AGB_TO_DADDR(mp
, agno
, agbno
),
1895 XFS_FSB_TO_BB(mp
, 1));
1897 if (i
== btree_curs
->num_levels
- 1)
1898 btree_curs
->root
= agbno
;
1900 lptr
->agbno
= agbno
;
1901 lptr
->prev_agbno
= NULLAGBLOCK
;
1902 lptr
->prev_buf_p
= NULL
;
1904 * initialize block header
1907 lptr
->buf_p
->b_ops
= &xfs_refcountbt_buf_ops
;
1908 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1909 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
1910 libxfs_btree_init_block(mp
, lptr
->buf_p
, XFS_REFC_CRC_MAGIC
,
1912 XFS_BTREE_CRC_BLOCKS
);
1916 * run along leaf, setting up records. as we have to switch
1917 * blocks, call the prop_refc_cursor routine to set up the new
1918 * pointers for the parent. that can recurse up to the root
1919 * if required. set the sibling pointers for leaf level here.
1921 error
= init_refcount_cursor(agno
, &refc_cur
);
1924 _("Insufficient memory to construct refcount cursor."));
1925 refc_rec
= pop_slab_cursor(refc_cur
);
1926 lptr
= &btree_curs
->level
[0];
1928 for (i
= 0; i
< lptr
->num_blocks
; i
++) {
1930 * block initialization, lay in block header
1932 lptr
->buf_p
->b_ops
= &xfs_refcountbt_buf_ops
;
1933 bt_hdr
= XFS_BUF_TO_BLOCK(lptr
->buf_p
);
1934 memset(bt_hdr
, 0, mp
->m_sb
.sb_blocksize
);
1935 libxfs_btree_init_block(mp
, lptr
->buf_p
, XFS_REFC_CRC_MAGIC
,
1937 XFS_BTREE_CRC_BLOCKS
);
1939 bt_hdr
->bb_u
.s
.bb_leftsib
= cpu_to_be32(lptr
->prev_agbno
);
1940 bt_hdr
->bb_numrecs
= cpu_to_be16(lptr
->num_recs_pb
+
1941 (lptr
->modulo
> 0));
1943 if (lptr
->modulo
> 0)
1946 if (lptr
->num_recs_pb
> 0)
1947 prop_refc_cursor(mp
, agno
, btree_curs
,
1948 refc_rec
->rc_startblock
, 0);
1950 bt_rec
= (struct xfs_refcount_rec
*)
1951 ((char *)bt_hdr
+ XFS_REFCOUNT_BLOCK_LEN
);
1952 for (j
= 0; j
< be16_to_cpu(bt_hdr
->bb_numrecs
); j
++) {
1953 ASSERT(refc_rec
!= NULL
);
1954 bt_rec
[j
].rc_startblock
=
1955 cpu_to_be32(refc_rec
->rc_startblock
);
1956 bt_rec
[j
].rc_blockcount
=
1957 cpu_to_be32(refc_rec
->rc_blockcount
);
1958 bt_rec
[j
].rc_refcount
= cpu_to_be32(refc_rec
->rc_refcount
);
1960 refc_rec
= pop_slab_cursor(refc_cur
);
1963 if (refc_rec
!= NULL
) {
1965 * get next leaf level block
1967 if (lptr
->prev_buf_p
!= NULL
) {
1968 #ifdef XR_BLD_RL_TRACE
1969 fprintf(stderr
, "writing refcntbt agbno %u\n",
1972 ASSERT(lptr
->prev_agbno
!= NULLAGBLOCK
);
1973 libxfs_writebuf(lptr
->prev_buf_p
, 0);
1975 lptr
->prev_buf_p
= lptr
->buf_p
;
1976 lptr
->prev_agbno
= lptr
->agbno
;
1977 lptr
->agbno
= get_next_blockaddr(agno
, 0, btree_curs
);
1978 bt_hdr
->bb_u
.s
.bb_rightsib
= cpu_to_be32(lptr
->agbno
);
1980 lptr
->buf_p
= libxfs_getbuf(mp
->m_dev
,
1981 XFS_AGB_TO_DADDR(mp
, agno
, lptr
->agbno
),
1982 XFS_FSB_TO_BB(mp
, 1));
1985 free_slab_cursor(&refc_cur
);
1989 * build both the agf and the agfl for an agno given both
1992 * XXX: yet more common code that can be shared with mkfs/growfs.
1996 struct xfs_mount
*mp
,
1997 xfs_agnumber_t agno
,
1998 struct bt_status
*bno_bt
,
1999 struct bt_status
*bcnt_bt
,
2000 xfs_extlen_t freeblks
, /* # free blocks in tree */
2001 int lostblocks
, /* # blocks that will be lost */
2002 struct bt_status
*rmap_bt
,
2003 struct bt_status
*refcnt_bt
,
2004 struct xfs_slab
*lost_fsb
)
2006 struct extent_tree_node
*ext_ptr
;
2007 struct xfs_buf
*agf_buf
, *agfl_buf
;
2009 struct xfs_agfl
*agfl
;
2010 struct xfs_agf
*agf
;
2015 agf_buf
= libxfs_getbuf(mp
->m_dev
,
2016 XFS_AG_DADDR(mp
, agno
, XFS_AGF_DADDR(mp
)),
2017 mp
->m_sb
.sb_sectsize
/BBSIZE
);
2018 agf_buf
->b_ops
= &xfs_agf_buf_ops
;
2019 agf
= XFS_BUF_TO_AGF(agf_buf
);
2020 memset(agf
, 0, mp
->m_sb
.sb_sectsize
);
2022 #ifdef XR_BLD_FREE_TRACE
2023 fprintf(stderr
, "agf = 0x%p, agf_buf->b_addr = 0x%p\n",
2024 agf
, agf_buf
->b_addr
);
2028 * set up fixed part of agf
2030 agf
->agf_magicnum
= cpu_to_be32(XFS_AGF_MAGIC
);
2031 agf
->agf_versionnum
= cpu_to_be32(XFS_AGF_VERSION
);
2032 agf
->agf_seqno
= cpu_to_be32(agno
);
2034 if (agno
< mp
->m_sb
.sb_agcount
- 1)
2035 agf
->agf_length
= cpu_to_be32(mp
->m_sb
.sb_agblocks
);
2037 agf
->agf_length
= cpu_to_be32(mp
->m_sb
.sb_dblocks
-
2038 (xfs_rfsblock_t
) mp
->m_sb
.sb_agblocks
* agno
);
2040 agf
->agf_roots
[XFS_BTNUM_BNO
] = cpu_to_be32(bno_bt
->root
);
2041 agf
->agf_levels
[XFS_BTNUM_BNO
] = cpu_to_be32(bno_bt
->num_levels
);
2042 agf
->agf_roots
[XFS_BTNUM_CNT
] = cpu_to_be32(bcnt_bt
->root
);
2043 agf
->agf_levels
[XFS_BTNUM_CNT
] = cpu_to_be32(bcnt_bt
->num_levels
);
2044 agf
->agf_roots
[XFS_BTNUM_RMAP
] = cpu_to_be32(rmap_bt
->root
);
2045 agf
->agf_levels
[XFS_BTNUM_RMAP
] = cpu_to_be32(rmap_bt
->num_levels
);
2046 agf
->agf_freeblks
= cpu_to_be32(freeblks
);
2047 agf
->agf_rmap_blocks
= cpu_to_be32(rmap_bt
->num_tot_blocks
-
2048 rmap_bt
->num_free_blocks
);
2049 agf
->agf_refcount_root
= cpu_to_be32(refcnt_bt
->root
);
2050 agf
->agf_refcount_level
= cpu_to_be32(refcnt_bt
->num_levels
);
2051 agf
->agf_refcount_blocks
= cpu_to_be32(refcnt_bt
->num_tot_blocks
-
2052 refcnt_bt
->num_free_blocks
);
2055 * Count and record the number of btree blocks consumed if required.
2057 if (xfs_sb_version_haslazysbcount(&mp
->m_sb
)) {
2060 * Don't count the root blocks as they are already
2063 blks
= (bno_bt
->num_tot_blocks
- bno_bt
->num_free_blocks
) +
2064 (bcnt_bt
->num_tot_blocks
- bcnt_bt
->num_free_blocks
) -
2066 if (xfs_sb_version_hasrmapbt(&mp
->m_sb
))
2067 blks
+= rmap_bt
->num_tot_blocks
- rmap_bt
->num_free_blocks
- 1;
2068 agf
->agf_btreeblks
= cpu_to_be32(blks
);
2069 #ifdef XR_BLD_FREE_TRACE
2070 fprintf(stderr
, "agf->agf_btreeblks = %u\n",
2071 be32_to_cpu(agf
->agf_btreeblks
));
2075 #ifdef XR_BLD_FREE_TRACE
2076 fprintf(stderr
, "bno root = %u, bcnt root = %u, indices = %u %u\n",
2077 be32_to_cpu(agf
->agf_roots
[XFS_BTNUM_BNO
]),
2078 be32_to_cpu(agf
->agf_roots
[XFS_BTNUM_CNT
]),
2083 if (xfs_sb_version_hascrc(&mp
->m_sb
))
2084 platform_uuid_copy(&agf
->agf_uuid
, &mp
->m_sb
.sb_meta_uuid
);
2086 /* initialise the AGFL, then fill it if there are blocks left over. */
2087 agfl_buf
= libxfs_getbuf(mp
->m_dev
,
2088 XFS_AG_DADDR(mp
, agno
, XFS_AGFL_DADDR(mp
)),
2089 mp
->m_sb
.sb_sectsize
/BBSIZE
);
2090 agfl_buf
->b_ops
= &xfs_agfl_buf_ops
;
2091 agfl
= XFS_BUF_TO_AGFL(agfl_buf
);
2093 /* setting to 0xff results in initialisation to NULLAGBLOCK */
2094 memset(agfl
, 0xff, mp
->m_sb
.sb_sectsize
);
2095 if (xfs_sb_version_hascrc(&mp
->m_sb
)) {
2096 agfl
->agfl_magicnum
= cpu_to_be32(XFS_AGFL_MAGIC
);
2097 agfl
->agfl_seqno
= cpu_to_be32(agno
);
2098 platform_uuid_copy(&agfl
->agfl_uuid
, &mp
->m_sb
.sb_meta_uuid
);
2099 for (i
= 0; i
< XFS_AGFL_SIZE(mp
); i
++)
2100 agfl
->agfl_bno
[i
] = cpu_to_be32(NULLAGBLOCK
);
2102 freelist
= XFS_BUF_TO_AGFL_BNO(mp
, agfl_buf
);
2105 * do we have left-over blocks in the btree cursors that should
2106 * be used to fill the AGFL?
2108 if (bno_bt
->num_free_blocks
> 0 || bcnt_bt
->num_free_blocks
> 0) {
2110 * yes, now grab as many blocks as we can
2113 while (bno_bt
->num_free_blocks
> 0 && i
< XFS_AGFL_SIZE(mp
)) {
2114 freelist
[i
] = cpu_to_be32(
2115 get_next_blockaddr(agno
, 0, bno_bt
));
2119 while (bcnt_bt
->num_free_blocks
> 0 && i
< XFS_AGFL_SIZE(mp
)) {
2120 freelist
[i
] = cpu_to_be32(
2121 get_next_blockaddr(agno
, 0, bcnt_bt
));
2125 * now throw the rest of the blocks away and complain
2127 while (bno_bt
->num_free_blocks
> 0) {
2128 fsb
= XFS_AGB_TO_FSB(mp
, agno
,
2129 get_next_blockaddr(agno
, 0, bno_bt
));
2130 error
= slab_add(lost_fsb
, &fsb
);
2133 _("Insufficient memory saving lost blocks.\n"));
2135 while (bcnt_bt
->num_free_blocks
> 0) {
2136 fsb
= XFS_AGB_TO_FSB(mp
, agno
,
2137 get_next_blockaddr(agno
, 0, bcnt_bt
));
2138 error
= slab_add(lost_fsb
, &fsb
);
2141 _("Insufficient memory saving lost blocks.\n"));
2144 agf
->agf_flfirst
= 0;
2145 agf
->agf_fllast
= cpu_to_be32(i
- 1);
2146 agf
->agf_flcount
= cpu_to_be32(i
);
2147 rmap_store_agflcount(mp
, agno
, i
);
2149 #ifdef XR_BLD_FREE_TRACE
2150 fprintf(stderr
, "writing agfl for ag %u\n", agno
);
2154 agf
->agf_flfirst
= 0;
2155 agf
->agf_fllast
= cpu_to_be32(XFS_AGFL_SIZE(mp
) - 1);
2156 agf
->agf_flcount
= 0;
2159 libxfs_writebuf(agfl_buf
, 0);
2161 ext_ptr
= findbiggest_bcnt_extent(agno
);
2162 agf
->agf_longest
= cpu_to_be32((ext_ptr
!= NULL
) ?
2163 ext_ptr
->ex_blockcount
: 0);
2165 ASSERT(be32_to_cpu(agf
->agf_roots
[XFS_BTNUM_BNOi
]) !=
2166 be32_to_cpu(agf
->agf_roots
[XFS_BTNUM_CNTi
]));
2167 ASSERT(be32_to_cpu(agf
->agf_refcount_root
) !=
2168 be32_to_cpu(agf
->agf_roots
[XFS_BTNUM_BNOi
]));
2169 ASSERT(be32_to_cpu(agf
->agf_refcount_root
) !=
2170 be32_to_cpu(agf
->agf_roots
[XFS_BTNUM_CNTi
]));
2172 libxfs_writebuf(agf_buf
, 0);
2175 * now fix up the free list appropriately
2177 fix_freelist(mp
, agno
, true);
2179 #ifdef XR_BLD_FREE_TRACE
2180 fprintf(stderr
, "wrote agf for ag %u\n", agno
);
2185 * update the superblock counters, sync the sb version numbers and
2186 * feature bits to the filesystem, and sync up the on-disk superblock
2187 * to match the incore superblock.
2190 sync_sb(xfs_mount_t
*mp
)
2194 bp
= libxfs_getsb(mp
, 0);
2196 do_error(_("couldn't get superblock\n"));
2198 mp
->m_sb
.sb_icount
= sb_icount
;
2199 mp
->m_sb
.sb_ifree
= sb_ifree
;
2200 mp
->m_sb
.sb_fdblocks
= sb_fdblocks
;
2201 mp
->m_sb
.sb_frextents
= sb_frextents
;
2203 update_sb_version(mp
);
2205 libxfs_sb_to_disk(XFS_BUF_TO_SBP(bp
), &mp
->m_sb
);
2206 libxfs_writebuf(bp
, 0);
2210 * make sure the root and realtime inodes show up allocated
2211 * even if they've been freed. they get reinitialized in phase6.
2214 keep_fsinos(xfs_mount_t
*mp
)
2216 ino_tree_node_t
*irec
;
2219 irec
= find_inode_rec(mp
, XFS_INO_TO_AGNO(mp
, mp
->m_sb
.sb_rootino
),
2220 XFS_INO_TO_AGINO(mp
, mp
->m_sb
.sb_rootino
));
2222 for (i
= 0; i
< 3; i
++)
2223 set_inode_used(irec
, i
);
2229 xfs_agnumber_t agno
,
2230 struct xfs_slab
*lost_fsb
)
2232 __uint64_t num_inos
;
2233 __uint64_t num_free_inos
;
2234 __uint64_t finobt_num_inos
;
2235 __uint64_t finobt_num_free_inos
;
2236 bt_status_t bno_btree_curs
;
2237 bt_status_t bcnt_btree_curs
;
2238 bt_status_t ino_btree_curs
;
2239 bt_status_t fino_btree_curs
;
2240 bt_status_t rmap_btree_curs
;
2241 bt_status_t refcnt_btree_curs
;
2242 int extra_blocks
= 0;
2243 uint num_freeblocks
;
2244 xfs_extlen_t freeblks1
;
2246 xfs_extlen_t freeblks2
;
2248 xfs_agblock_t num_extents
;
2250 struct agi_stat agi_stat
= {0,};
2254 do_log(_(" - agno = %d\n"), agno
);
2258 * build up incore bno and bcnt extent btrees
2260 num_extents
= mk_incore_fstree(mp
, agno
);
2262 #ifdef XR_BLD_FREE_TRACE
2263 fprintf(stderr
, "# of bno extents is %d\n",
2264 count_bno_extents(agno
));
2267 if (num_extents
== 0) {
2269 * XXX - what we probably should do here is pick an
2270 * inode for a regular file in the allocation group
2271 * that has space allocated and shoot it by traversing
2272 * the bmap list and putting all its extents on the
2273 * incore freespace trees, clearing the inode,
2274 * and clearing the in-use bit in the incore inode
2275 * tree. Then try mk_incore_fstree() again.
2277 do_error(_("unable to rebuild AG %u. "
2278 "Not enough free space in on-disk AG.\n"),
2283 * ok, now set up the btree cursors for the
2284 * on-disk btrees (includs pre-allocating all
2285 * required blocks for the trees themselves)
2287 init_ino_cursor(mp
, agno
, &ino_btree_curs
, &num_inos
,
2290 if (xfs_sb_version_hasfinobt(&mp
->m_sb
))
2291 init_ino_cursor(mp
, agno
, &fino_btree_curs
,
2292 &finobt_num_inos
, &finobt_num_free_inos
,
2295 sb_icount_ag
[agno
] += num_inos
;
2296 sb_ifree_ag
[agno
] += num_free_inos
;
2299 * Set up the btree cursors for the on-disk rmap btrees,
2300 * which includes pre-allocating all required blocks.
2302 init_rmapbt_cursor(mp
, agno
, &rmap_btree_curs
);
2305 * Set up the btree cursors for the on-disk refcount btrees,
2306 * which includes pre-allocating all required blocks.
2308 init_refc_cursor(mp
, agno
, &refcnt_btree_curs
);
2310 num_extents
= count_bno_extents_blocks(agno
, &num_freeblocks
);
2312 * lose two blocks per AG -- the space tree roots
2313 * are counted as allocated since the space trees
2316 sb_fdblocks_ag
[agno
] += num_freeblocks
- 2;
2318 if (num_extents
== 0) {
2320 * XXX - what we probably should do here is pick an
2321 * inode for a regular file in the allocation group
2322 * that has space allocated and shoot it by traversing
2323 * the bmap list and putting all its extents on the
2324 * incore freespace trees, clearing the inode,
2325 * and clearing the in-use bit in the incore inode
2326 * tree. Then try mk_incore_fstree() again.
2329 _("unable to rebuild AG %u. No free space.\n"), agno
);
2332 #ifdef XR_BLD_FREE_TRACE
2333 fprintf(stderr
, "# of bno extents is %d\n", num_extents
);
2337 * track blocks that we might really lose
2339 extra_blocks
= calculate_freespace_cursor(mp
, agno
,
2340 &num_extents
, &bno_btree_curs
);
2343 * freespace btrees live in the "free space" but
2344 * the filesystem treats AGFL blocks as allocated
2345 * since they aren't described by the freespace trees
2349 * see if we can fit all the extra blocks into the AGFL
2351 extra_blocks
= (extra_blocks
- XFS_AGFL_SIZE(mp
) > 0)
2352 ? extra_blocks
- XFS_AGFL_SIZE(mp
)
2355 if (extra_blocks
> 0)
2356 sb_fdblocks_ag
[agno
] -= extra_blocks
;
2358 bcnt_btree_curs
= bno_btree_curs
;
2360 bno_btree_curs
.owner
= XFS_RMAP_OWN_AG
;
2361 bcnt_btree_curs
.owner
= XFS_RMAP_OWN_AG
;
2362 setup_cursor(mp
, agno
, &bno_btree_curs
);
2363 setup_cursor(mp
, agno
, &bcnt_btree_curs
);
2365 #ifdef XR_BLD_FREE_TRACE
2366 fprintf(stderr
, "# of bno extents is %d\n",
2367 count_bno_extents(agno
));
2368 fprintf(stderr
, "# of bcnt extents is %d\n",
2369 count_bcnt_extents(agno
));
2373 * now rebuild the freespace trees
2375 freeblks1
= build_freespace_tree(mp
, agno
,
2376 &bno_btree_curs
, XFS_ABTB_MAGIC
);
2377 #ifdef XR_BLD_FREE_TRACE
2378 fprintf(stderr
, "# of free blocks == %d\n", freeblks1
);
2380 write_cursor(&bno_btree_curs
);
2383 freeblks2
= build_freespace_tree(mp
, agno
,
2384 &bcnt_btree_curs
, XFS_ABTC_MAGIC
);
2386 (void) build_freespace_tree(mp
, agno
,
2387 &bcnt_btree_curs
, XFS_ABTC_MAGIC
);
2389 write_cursor(&bcnt_btree_curs
);
2391 ASSERT(freeblks1
== freeblks2
);
2393 if (xfs_sb_version_hasrmapbt(&mp
->m_sb
)) {
2394 build_rmap_tree(mp
, agno
, &rmap_btree_curs
);
2395 write_cursor(&rmap_btree_curs
);
2396 sb_fdblocks_ag
[agno
] += (rmap_btree_curs
.num_tot_blocks
-
2397 rmap_btree_curs
.num_free_blocks
) - 1;
2400 if (xfs_sb_version_hasreflink(&mp
->m_sb
)) {
2401 build_refcount_tree(mp
, agno
, &refcnt_btree_curs
);
2402 write_cursor(&refcnt_btree_curs
);
2406 * set up agf and agfl
2408 build_agf_agfl(mp
, agno
, &bno_btree_curs
,
2409 &bcnt_btree_curs
, freeblks1
, extra_blocks
,
2410 &rmap_btree_curs
, &refcnt_btree_curs
, lost_fsb
);
2412 * build inode allocation tree.
2414 magic
= xfs_sb_version_hascrc(&mp
->m_sb
) ?
2415 XFS_IBT_CRC_MAGIC
: XFS_IBT_MAGIC
;
2416 build_ino_tree(mp
, agno
, &ino_btree_curs
, magic
, &agi_stat
, 0);
2417 write_cursor(&ino_btree_curs
);
2420 * build free inode tree
2422 if (xfs_sb_version_hasfinobt(&mp
->m_sb
)) {
2423 magic
= xfs_sb_version_hascrc(&mp
->m_sb
) ?
2424 XFS_FIBT_CRC_MAGIC
: XFS_FIBT_MAGIC
;
2425 build_ino_tree(mp
, agno
, &fino_btree_curs
, magic
,
2427 write_cursor(&fino_btree_curs
);
2431 build_agi(mp
, agno
, &ino_btree_curs
, &fino_btree_curs
,
2437 finish_cursor(&bno_btree_curs
);
2438 finish_cursor(&ino_btree_curs
);
2439 if (xfs_sb_version_hasrmapbt(&mp
->m_sb
))
2440 finish_cursor(&rmap_btree_curs
);
2441 if (xfs_sb_version_hasreflink(&mp
->m_sb
))
2442 finish_cursor(&refcnt_btree_curs
);
2443 if (xfs_sb_version_hasfinobt(&mp
->m_sb
))
2444 finish_cursor(&fino_btree_curs
);
2445 finish_cursor(&bcnt_btree_curs
);
2448 * Put the per-AG btree rmap data into the rmapbt
2450 error
= rmap_store_ag_btree_rec(mp
, agno
);
2453 _("unable to add AG %u reverse-mapping data to btree.\n"), agno
);
2456 * release the incore per-AG bno/bcnt trees so
2457 * the extent nodes can be recycled
2459 release_agbno_extent_tree(agno
);
2460 release_agbcnt_extent_tree(agno
);
2462 PROG_RPT_INC(prog_rpt_done
[agno
], 1);
2465 /* Inject lost blocks back into the filesystem. */
2468 struct xfs_mount
*mp
,
2469 struct xfs_slab
*lost_fsbs
)
2471 struct xfs_trans
*tp
= NULL
;
2472 struct xfs_slab_cursor
*cur
= NULL
;
2474 struct xfs_trans_res tres
= {0};
2475 struct xfs_owner_info oinfo
;
2478 libxfs_rmap_ag_owner(&oinfo
, XFS_RMAP_OWN_AG
);
2479 error
= init_slab_cursor(lost_fsbs
, NULL
, &cur
);
2483 while ((fsb
= pop_slab_cursor(cur
)) != NULL
) {
2484 error
= -libxfs_trans_alloc(mp
, &tres
, 16, 0, 0, &tp
);
2488 error
= -libxfs_free_extent(tp
, *fsb
, 1, &oinfo
,
2493 error
= -libxfs_trans_commit(tp
);
2501 libxfs_trans_cancel(tp
);
2502 free_slab_cursor(&cur
);
2507 phase5(xfs_mount_t
*mp
)
2509 struct xfs_slab
*lost_fsb
;
2510 xfs_agnumber_t agno
;
2513 do_log(_("Phase 5 - rebuild AG headers and trees...\n"));
2514 set_progress_msg(PROG_FMT_REBUILD_AG
, (__uint64_t
)glob_agcount
);
2516 #ifdef XR_BLD_FREE_TRACE
2517 fprintf(stderr
, "inobt level 1, maxrec = %d, minrec = %d\n",
2518 libxfs_inobt_maxrecs(mp
, mp
->m_sb
.sb_blocksize
, 0),
2519 libxfs_inobt_maxrecs(mp
, mp
->m_sb
.sb_blocksize
, 0) / 2);
2520 fprintf(stderr
, "inobt level 0 (leaf), maxrec = %d, minrec = %d\n",
2521 libxfs_inobt_maxrecs(mp
, mp
->m_sb
.sb_blocksize
, 1),
2522 libxfs_inobt_maxrecs(mp
, mp
->m_sb
.sb_blocksize
, 1) / 2);
2523 fprintf(stderr
, "xr inobt level 0 (leaf), maxrec = %d\n",
2524 XR_INOBT_BLOCK_MAXRECS(mp
, 0));
2525 fprintf(stderr
, "xr inobt level 1 (int), maxrec = %d\n",
2526 XR_INOBT_BLOCK_MAXRECS(mp
, 1));
2527 fprintf(stderr
, "bnobt level 1, maxrec = %d, minrec = %d\n",
2528 libxfs_allocbt_maxrecs(mp
, mp
->m_sb
.sb_blocksize
, 0),
2529 libxfs_allocbt_maxrecs(mp
, mp
->m_sb
.sb_blocksize
, 0) / 2);
2530 fprintf(stderr
, "bnobt level 0 (leaf), maxrec = %d, minrec = %d\n",
2531 libxfs_allocbt_maxrecs(mp
, mp
->m_sb
.sb_blocksize
, 1),
2532 libxfs_allocbt_maxrecs(mp
, mp
->m_sb
.sb_blocksize
, 1) / 2);
2535 * make sure the root and realtime inodes show up allocated
2539 /* allocate per ag counters */
2540 sb_icount_ag
= calloc(mp
->m_sb
.sb_agcount
, sizeof(__uint64_t
));
2541 if (sb_icount_ag
== NULL
)
2542 do_error(_("cannot alloc sb_icount_ag buffers\n"));
2544 sb_ifree_ag
= calloc(mp
->m_sb
.sb_agcount
, sizeof(__uint64_t
));
2545 if (sb_ifree_ag
== NULL
)
2546 do_error(_("cannot alloc sb_ifree_ag buffers\n"));
2548 sb_fdblocks_ag
= calloc(mp
->m_sb
.sb_agcount
, sizeof(__uint64_t
));
2549 if (sb_fdblocks_ag
== NULL
)
2550 do_error(_("cannot alloc sb_fdblocks_ag buffers\n"));
2552 error
= init_slab(&lost_fsb
, sizeof(xfs_fsblock_t
));
2554 do_error(_("cannot alloc lost block slab\n"));
2556 for (agno
= 0; agno
< mp
->m_sb
.sb_agcount
; agno
++)
2557 phase5_func(mp
, agno
, lost_fsb
);
2561 /* aggregate per ag counters */
2562 for (agno
= 0; agno
< mp
->m_sb
.sb_agcount
; agno
++) {
2563 sb_icount
+= sb_icount_ag
[agno
];
2564 sb_ifree
+= sb_ifree_ag
[agno
];
2565 sb_fdblocks
+= sb_fdblocks_ag
[agno
];
2569 free(sb_fdblocks_ag
);
2571 if (mp
->m_sb
.sb_rblocks
) {
2573 _(" - generate realtime summary info and bitmap...\n"));
2575 generate_rtinfo(mp
, btmcompute
, sumcompute
);
2578 do_log(_(" - reset superblock...\n"));
2581 * sync superblock counter and set version bits correctly
2585 error
= inject_lost_blocks(mp
, lost_fsb
);
2587 do_error(_("Unable to reinsert lost blocks into filesystem.\n"));
2588 free_slab(&lost_fsb
);