]>
git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/scan.c
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "err_protos.h"
33 static xfs_mount_t
*mp
= NULL
;
36 * Variables to validate AG header values against the manual count
37 * from the btree traversal.
41 xfs_extlen_t agffreeblks
;
42 xfs_extlen_t agflongest
;
43 __uint64_t agfbtreeblks
;
45 __uint32_t agifreecount
;
48 __uint64_t ifreecount
;
52 set_mp(xfs_mount_t
*mpp
)
54 libxfs_bcache_purge();
64 void (*func
)(struct xfs_btree_block
*block
,
75 const struct xfs_buf_ops
*ops
)
79 bp
= libxfs_readbuf(mp
->m_dev
, XFS_AGB_TO_DADDR(mp
, agno
, root
),
80 XFS_FSB_TO_BB(mp
, 1), 0, ops
);
82 do_error(_("can't read btree block %d/%d\n"), agno
, root
);
85 (*func
)(XFS_BUF_TO_BLOCK(bp
), nlevels
- 1, root
, agno
, suspect
,
91 * returns 1 on bad news (inode needs to be cleared), 0 on good
97 int (*func
)(struct xfs_btree_block
*block
,
106 bmap_cursor_t
*bm_cursor
,
117 bmap_cursor_t
*bm_cursor
,
121 const struct xfs_buf_ops
*ops
)
127 bp
= libxfs_readbuf(mp
->m_dev
, XFS_FSB_TO_DADDR(mp
, root
),
128 XFS_FSB_TO_BB(mp
, 1), 0, ops
);
130 do_error(_("can't read btree block %d/%d\n"),
131 XFS_FSB_TO_AGNO(mp
, root
),
132 XFS_FSB_TO_AGBNO(mp
, root
));
135 err
= (*func
)(XFS_BUF_TO_BLOCK(bp
), nlevels
- 1,
136 type
, whichfork
, root
, ino
, tot
, nex
, blkmapp
,
137 bm_cursor
, isroot
, check_dups
, &dirty
,
140 ASSERT(dirty
== 0 || (dirty
&& !no_modify
));
142 if (dirty
&& !no_modify
)
143 libxfs_writebuf(bp
, 0);
152 struct xfs_btree_block
*block
,
161 bmap_cursor_t
*bm_cursor
,
170 xfs_bmbt_key_t
*pkey
;
172 xfs_dfiloff_t first_key
;
173 xfs_dfiloff_t last_key
;
174 char *forkname
= get_forkname(whichfork
);
181 * unlike the ag freeblock btrees, if anything looks wrong
182 * in an inode bmap tree, just bail. it's possible that
183 * we'll miss a case where the to-be-toasted inode and
184 * another inode are claiming the same block but that's
187 if (be32_to_cpu(block
->bb_magic
) != magic
) {
189 _("bad magic # %#x in inode %" PRIu64
" (%s fork) bmbt block %" PRIu64
"\n"),
190 be32_to_cpu(block
->bb_magic
), ino
, forkname
, bno
);
193 if (be16_to_cpu(block
->bb_level
) != level
) {
195 _("expected level %d got %d in inode %" PRIu64
", (%s fork) bmbt block %" PRIu64
"\n"),
196 level
, be16_to_cpu(block
->bb_level
),
201 if (magic
== XFS_BMAP_CRC_MAGIC
) {
203 if (be64_to_cpu(block
->bb_u
.l
.bb_owner
) != ino
) {
205 _("expected owner inode %" PRIu64
", got %llu, bmbt block %" PRIu64
"\n"),
206 ino
, be64_to_cpu(block
->bb_u
.l
.bb_owner
), bno
);
211 if (check_dups
== 0) {
213 * check sibling pointers. if bad we have a conflict
214 * between the sibling pointers and the child pointers
215 * in the parent block. blow out the inode if that happens
217 if (bm_cursor
->level
[level
].fsbno
!= NULLDFSBNO
) {
219 * this is not the first block on this level
220 * so the cursor for this level has recorded the
221 * values for this's block left-sibling.
223 if (bno
!= bm_cursor
->level
[level
].right_fsbno
) {
225 _("bad fwd (right) sibling pointer (saw %" PRIu64
" parent block says %" PRIu64
")\n"
226 "\tin inode %" PRIu64
" (%s fork) bmap btree block %" PRIu64
"\n"),
227 bm_cursor
->level
[level
].right_fsbno
,
229 bm_cursor
->level
[level
].fsbno
);
232 if (be64_to_cpu(block
->bb_u
.l
.bb_leftsib
) !=
233 bm_cursor
->level
[level
].fsbno
) {
235 _("bad back (left) sibling pointer (saw %llu parent block says %" PRIu64
")\n"
236 "\tin inode %" PRIu64
" (%s fork) bmap btree block %" PRIu64
"\n"),
238 be64_to_cpu(block
->bb_u
.l
.bb_leftsib
),
239 bm_cursor
->level
[level
].fsbno
,
245 * This is the first or only block on this level.
246 * Check that the left sibling pointer is NULL
248 if (be64_to_cpu(block
->bb_u
.l
.bb_leftsib
) != NULLDFSBNO
) {
250 _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
251 "\tin inode %" PRIu64
" (%s fork) bmap btree block %" PRIu64
"\n"),
253 be64_to_cpu(block
->bb_u
.l
.bb_leftsib
),
260 * update cursor block pointers to reflect this block
262 bm_cursor
->level
[level
].fsbno
= bno
;
263 bm_cursor
->level
[level
].left_fsbno
=
264 be64_to_cpu(block
->bb_u
.l
.bb_leftsib
);
265 bm_cursor
->level
[level
].right_fsbno
=
266 be64_to_cpu(block
->bb_u
.l
.bb_rightsib
);
268 agno
= XFS_FSB_TO_AGNO(mp
, bno
);
269 agbno
= XFS_FSB_TO_AGBNO(mp
, bno
);
271 pthread_mutex_lock(&ag_locks
[agno
].lock
);
272 state
= get_bmap(agno
, agbno
);
277 set_bmap(agno
, agbno
, XR_E_INUSE
);
282 * we'll try and continue searching here since
283 * the block looks like it's been claimed by file
284 * to store user data, a directory to store directory
285 * data, or the space allocation btrees but since
286 * we made it here, the block probably
287 * contains btree data.
289 set_bmap(agno
, agbno
, XR_E_MULT
);
291 _("inode 0x%" PRIx64
"bmap block 0x%" PRIx64
" claimed, state is %d\n"),
296 set_bmap(agno
, agbno
, XR_E_MULT
);
298 _("inode 0x%" PRIx64
" bmap block 0x%" PRIx64
" claimed, state is %d\n"),
301 * if we made it to here, this is probably a bmap block
302 * that is being used by *another* file as a bmap block
303 * so the block will be valid. Both files should be
304 * trashed along with any other file that impinges on
305 * any blocks referenced by either file. So we
306 * continue searching down this btree to mark all
313 _("bad state %d, inode %" PRIu64
" bmap block 0x%" PRIx64
"\n"),
317 pthread_mutex_unlock(&ag_locks
[agno
].lock
);
320 * attribute fork for realtime files is in the regular
323 if (type
!= XR_INO_RTDATA
|| whichfork
!= XFS_DATA_FORK
) {
324 if (search_dup_extent(XFS_FSB_TO_AGNO(mp
, bno
),
325 XFS_FSB_TO_AGBNO(mp
, bno
),
326 XFS_FSB_TO_AGBNO(mp
, bno
) + 1))
329 if (search_rt_dup_extent(mp
, bno
))
334 numrecs
= be16_to_cpu(block
->bb_numrecs
);
337 if (numrecs
> mp
->m_bmap_dmxr
[0] || (isroot
== 0 && numrecs
<
338 mp
->m_bmap_dmnr
[0])) {
340 _("inode %" PRIu64
" bad # of bmap records (%u, min - %u, max - %u)\n"),
341 ino
, numrecs
, mp
->m_bmap_dmnr
[0],
345 rp
= XFS_BMBT_REC_ADDR(mp
, block
, 1);
348 * XXX - if we were going to fix up the btree record,
349 * we'd do it right here. For now, if there's a problem,
350 * we'll bail out and presumably clear the inode.
352 if (check_dups
== 0) {
353 err
= process_bmbt_reclist(mp
, rp
, &numrecs
, type
, ino
,
354 tot
, blkmapp
, &first_key
,
355 &last_key
, whichfork
);
360 * check that key ordering is monotonically increasing.
361 * if the last_key value in the cursor is set to
362 * NULLDFILOFF, then we know this is the first block
363 * on the leaf level and we shouldn't check the
366 if (first_key
<= bm_cursor
->level
[level
].last_key
&&
367 bm_cursor
->level
[level
].last_key
!=
370 _("out-of-order bmap key (file offset) in inode %" PRIu64
", %s fork, fsbno %" PRIu64
"\n"),
375 * update cursor keys to reflect this block.
376 * don't have to check if last_key is > first_key
377 * since that gets checked by process_bmbt_reclist.
379 bm_cursor
->level
[level
].first_key
= first_key
;
380 bm_cursor
->level
[level
].last_key
= last_key
;
384 return scan_bmbt_reclist(mp
, rp
, &numrecs
, type
, ino
,
388 if (numrecs
> mp
->m_bmap_dmxr
[1] || (isroot
== 0 && numrecs
<
389 mp
->m_bmap_dmnr
[1])) {
391 _("inode %" PRIu64
" bad # of bmap records (%u, min - %u, max - %u)\n"),
392 ino
, numrecs
, mp
->m_bmap_dmnr
[1], mp
->m_bmap_dmxr
[1]);
395 pp
= XFS_BMBT_PTR_ADDR(mp
, block
, 1, mp
->m_bmap_dmxr
[1]);
396 pkey
= XFS_BMBT_KEY_ADDR(mp
, block
, 1);
398 last_key
= NULLDFILOFF
;
400 for (i
= 0, err
= 0; i
< numrecs
; i
++) {
402 * XXX - if we were going to fix up the interior btree nodes,
403 * we'd do it right here. For now, if there's a problem,
404 * we'll bail out and presumably clear the inode.
406 if (!verify_dfsbno(mp
, be64_to_cpu(pp
[i
]))) {
408 _("bad bmap btree ptr 0x%llx in ino %" PRIu64
"\n"),
409 (unsigned long long) be64_to_cpu(pp
[i
]), ino
);
413 err
= scan_lbtree(be64_to_cpu(pp
[i
]), level
, scan_bmapbt
,
414 type
, whichfork
, ino
, tot
, nex
, blkmapp
,
415 bm_cursor
, 0, check_dups
, magic
,
421 * fix key (offset) mismatches between the first key
422 * in the child block (as recorded in the cursor) and the
423 * key in the interior node referencing the child block.
425 * fixes cases where entries have been shifted between
426 * child blocks but the parent hasn't been updated. We
427 * don't have to worry about the key values in the cursor
428 * not being set since we only look at the key values of
429 * our child and those are guaranteed to be set by the
430 * call to scan_lbtree() above.
432 if (check_dups
== 0 && be64_to_cpu(pkey
[i
].br_startoff
) !=
433 bm_cursor
->level
[level
-1].first_key
) {
436 _("correcting bt key (was %llu, now %" PRIu64
") in inode %" PRIu64
"\n"
437 "\t\t%s fork, btree block %" PRIu64
"\n"),
439 be64_to_cpu(pkey
[i
].br_startoff
),
440 bm_cursor
->level
[level
-1].first_key
,
444 pkey
[i
].br_startoff
= cpu_to_be64(
445 bm_cursor
->level
[level
-1].first_key
);
448 _("bad btree key (is %llu, should be %" PRIu64
") in inode %" PRIu64
"\n"
449 "\t\t%s fork, btree block %" PRIu64
"\n"),
451 be64_to_cpu(pkey
[i
].br_startoff
),
452 bm_cursor
->level
[level
-1].first_key
,
459 * If we're the last node at our level, check that the last child
460 * block's forward sibling pointer is NULL.
462 if (check_dups
== 0 &&
463 bm_cursor
->level
[level
].right_fsbno
== NULLDFSBNO
&&
464 bm_cursor
->level
[level
- 1].right_fsbno
!= NULLDFSBNO
) {
466 _("bad fwd (right) sibling pointer (saw %" PRIu64
" should be NULLDFSBNO)\n"
467 "\tin inode %" PRIu64
" (%s fork) bmap btree block %" PRIu64
"\n"),
468 bm_cursor
->level
[level
- 1].right_fsbno
,
469 ino
, forkname
, bm_cursor
->level
[level
- 1].fsbno
);
474 * update cursor keys to reflect this block
476 if (check_dups
== 0) {
477 bm_cursor
->level
[level
].first_key
=
478 be64_to_cpu(pkey
[0].br_startoff
);
479 bm_cursor
->level
[level
].last_key
=
480 be64_to_cpu(pkey
[numrecs
- 1].br_startoff
);
488 struct xfs_btree_block
*block
,
497 struct aghdr_cnts
*agcnts
= priv
;
505 xfs_extlen_t lastcount
= 0;
506 xfs_agblock_t lastblock
= 0;
509 case XFS_ABTB_CRC_MAGIC
:
513 case XFS_ABTC_CRC_MAGIC
:
523 if (be32_to_cpu(block
->bb_magic
) != magic
) {
524 do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
525 be32_to_cpu(block
->bb_magic
), name
, agno
, bno
);
532 * All freespace btree blocks except the roots are freed for a
533 * fully used filesystem, thus they are counted towards the
534 * free data block counter.
537 agcnts
->agfbtreeblks
++;
541 if (be16_to_cpu(block
->bb_level
) != level
) {
542 do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
543 level
, be16_to_cpu(block
->bb_level
), name
, agno
, bno
);
550 * check for btree blocks multiply claimed
552 state
= get_bmap(agno
, bno
);
553 if (state
!= XR_E_UNKNOWN
) {
554 set_bmap(agno
, bno
, XR_E_MULT
);
556 _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
557 name
, state
, agno
, bno
, suspect
);
560 set_bmap(agno
, bno
, XR_E_FS_MAP
);
562 numrecs
= be16_to_cpu(block
->bb_numrecs
);
565 if (numrecs
> mp
->m_alloc_mxr
[0]) {
566 numrecs
= mp
->m_alloc_mxr
[0];
569 if (isroot
== 0 && numrecs
< mp
->m_alloc_mnr
[0]) {
570 numrecs
= mp
->m_alloc_mnr
[0];
576 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
577 be16_to_cpu(block
->bb_numrecs
),
578 mp
->m_alloc_mnr
[0], mp
->m_alloc_mxr
[0],
583 rp
= XFS_ALLOC_REC_ADDR(mp
, block
, 1);
584 for (i
= 0; i
< numrecs
; i
++) {
585 xfs_agblock_t b
, end
;
586 xfs_extlen_t len
, blen
;
588 b
= be32_to_cpu(rp
[i
].ar_startblock
);
589 len
= be32_to_cpu(rp
[i
].ar_blockcount
);
592 if (b
== 0 || !verify_agbno(mp
, agno
, b
)) {
594 _("invalid start block %u in record %u of %s btree block %u/%u\n"),
595 b
, i
, name
, agno
, bno
);
598 if (len
== 0 || !verify_agbno(mp
, agno
, end
- 1)) {
600 _("invalid length %u in record %u of %s btree block %u/%u\n"),
601 len
, i
, name
, agno
, bno
);
605 if (magic
== XFS_ABTB_MAGIC
||
606 magic
== XFS_ABTB_CRC_MAGIC
) {
607 if (b
<= lastblock
) {
609 "out-of-order bno btree record %d (%u %u) block %u/%u\n"),
610 i
, b
, len
, agno
, bno
);
615 agcnts
->fdblocks
+= len
;
616 agcnts
->agffreeblks
+= len
;
617 if (len
> agcnts
->agflongest
)
618 agcnts
->agflongest
= len
;
619 if (len
< lastcount
) {
621 "out-of-order cnt btree record %d (%u %u) block %u/%u\n"),
622 i
, b
, len
, agno
, bno
);
628 for ( ; b
< end
; b
+= blen
) {
629 state
= get_bmap_ext(agno
, b
, end
, &blen
);
632 set_bmap(agno
, b
, XR_E_FREE1
);
636 * no warning messages -- we'll catch
639 if (magic
== XFS_ABTC_MAGIC
||
640 magic
== XFS_ABTC_CRC_MAGIC
) {
641 set_bmap_ext(agno
, b
, blen
,
647 _("block (%d,%d-%d) multiply claimed by %s space tree, state - %d\n"),
648 agno
, b
, b
+ blen
- 1,
660 pp
= XFS_ALLOC_PTR_ADDR(mp
, block
, 1, mp
->m_alloc_mxr
[1]);
662 if (numrecs
> mp
->m_alloc_mxr
[1]) {
663 numrecs
= mp
->m_alloc_mxr
[1];
666 if (isroot
== 0 && numrecs
< mp
->m_alloc_mnr
[1]) {
667 numrecs
= mp
->m_alloc_mnr
[1];
672 * don't pass bogus tree flag down further if this block
673 * looked ok. bail out if two levels in a row look bad.
677 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
678 be16_to_cpu(block
->bb_numrecs
),
679 mp
->m_alloc_mnr
[1], mp
->m_alloc_mxr
[1],
684 } else if (suspect
) {
688 for (i
= 0; i
< numrecs
; i
++) {
689 xfs_agblock_t bno
= be32_to_cpu(pp
[i
]);
692 * XXX - put sibling detection right here.
693 * we know our sibling chain is good. So as we go,
694 * we check the entry before and after each entry.
695 * If either of the entries references a different block,
696 * check the sibling pointer. If there's a sibling
697 * pointer mismatch, try and extract as much data
700 if (bno
!= 0 && verify_agbno(mp
, agno
, bno
)) {
702 case XFS_ABTB_CRC_MAGIC
:
704 scan_sbtree(bno
, level
, agno
, suspect
,
705 scan_allocbt
, 0, magic
, priv
,
706 &xfs_allocbt_buf_ops
);
708 case XFS_ABTC_CRC_MAGIC
:
710 scan_sbtree(bno
, level
, agno
, suspect
,
711 scan_allocbt
, 0, magic
, priv
,
712 &xfs_allocbt_buf_ops
);
720 scan_single_ino_chunk(
732 ino_tree_node_t
*ino_rec
, *first_rec
, *last_rec
;
734 ino
= be32_to_cpu(rp
->ir_startino
);
735 off
= XFS_AGINO_TO_OFFSET(mp
, ino
);
736 agbno
= XFS_AGINO_TO_AGBNO(mp
, ino
);
737 lino
= XFS_AGINO_TO_INO(mp
, agno
, ino
);
740 * on multi-block block chunks, all chunks start
741 * at the beginning of the block. with multi-chunk
742 * blocks, all chunks must start on 64-inode boundaries
743 * since each block can hold N complete chunks. if
744 * fs has aligned inodes, all chunks must start
745 * at a fs_ino_alignment*N'th agbno. skip recs
746 * with badly aligned starting inodes.
749 (inodes_per_block
<= XFS_INODES_PER_CHUNK
&& off
!= 0) ||
750 (inodes_per_block
> XFS_INODES_PER_CHUNK
&&
751 off
% XFS_INODES_PER_CHUNK
!= 0) ||
752 (fs_aligned_inodes
&& agbno
% fs_ino_alignment
!= 0)) {
754 _("badly aligned inode rec (starting inode = %" PRIu64
")\n"),
760 * verify numeric validity of inode chunk first
761 * before inserting into a tree. don't have to
762 * worry about the overflow case because the
763 * starting ino number of a chunk can only get
764 * within 255 inodes of max (NULLAGINO). if it
765 * gets closer, the agino number will be illegal
766 * as the agbno will be too large.
768 if (verify_aginum(mp
, agno
, ino
)) {
770 _("bad starting inode # (%" PRIu64
" (0x%x 0x%x)) in ino rec, skipping rec\n"),
775 if (verify_aginum(mp
, agno
,
776 ino
+ XFS_INODES_PER_CHUNK
- 1)) {
778 _("bad ending inode # (%" PRIu64
" (0x%x 0x%zx)) in ino rec, skipping rec\n"),
779 lino
+ XFS_INODES_PER_CHUNK
- 1,
781 ino
+ XFS_INODES_PER_CHUNK
- 1);
786 * set state of each block containing inodes
788 if (off
== 0 && !suspect
) {
790 j
< XFS_INODES_PER_CHUNK
;
791 j
+= mp
->m_sb
.sb_inopblock
) {
792 agbno
= XFS_AGINO_TO_AGBNO(mp
, ino
+ j
);
794 state
= get_bmap(agno
, agbno
);
795 if (state
== XR_E_UNKNOWN
) {
796 set_bmap(agno
, agbno
, XR_E_INO
);
797 } else if (state
== XR_E_INUSE_FS
&& agno
== 0 &&
798 ino
+ j
>= first_prealloc_ino
&&
799 ino
+ j
< last_prealloc_ino
) {
800 set_bmap(agno
, agbno
, XR_E_INO
);
803 _("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
804 agno
, agbno
, mp
->m_sb
.sb_inopblock
);
806 * XXX - maybe should mark
815 * ensure only one avl entry per chunk
817 find_inode_rec_range(mp
, agno
, ino
, ino
+ XFS_INODES_PER_CHUNK
,
818 &first_rec
, &last_rec
);
819 if (first_rec
!= NULL
) {
821 * this chunk overlaps with one (or more)
822 * already in the tree
825 _("inode rec for ino %" PRIu64
" (%d/%d) overlaps existing rec (start %d/%d)\n"),
826 lino
, agno
, ino
, agno
, first_rec
->ino_startnum
);
830 * if the 2 chunks start at the same place,
831 * then we don't have to put this one
832 * in the uncertain list. go to the next one.
834 if (first_rec
->ino_startnum
== ino
)
841 * now mark all the inodes as existing and free or used.
842 * if the tree is suspect, put them into the uncertain
846 if (XFS_INOBT_IS_FREE_DISK(rp
, 0)) {
848 ino_rec
= set_inode_free_alloc(mp
, agno
, ino
);
850 ino_rec
= set_inode_used_alloc(mp
, agno
, ino
);
852 for (j
= 1; j
< XFS_INODES_PER_CHUNK
; j
++) {
853 if (XFS_INOBT_IS_FREE_DISK(rp
, j
)) {
855 set_inode_free(ino_rec
, j
);
857 set_inode_used(ino_rec
, j
);
861 for (j
= 0; j
< XFS_INODES_PER_CHUNK
; j
++) {
862 if (XFS_INOBT_IS_FREE_DISK(rp
, j
)) {
864 add_aginode_uncertain(mp
, agno
, ino
+ j
, 1);
866 add_aginode_uncertain(mp
, agno
, ino
+ j
, 0);
871 if (nfree
!= be32_to_cpu(rp
->ir_freecount
)) {
872 do_warn(_("ir_freecount/free mismatch, inode "
873 "chunk %d/%u, freecount %d nfree %d\n"),
874 agno
, ino
, be32_to_cpu(rp
->ir_freecount
), nfree
);
882 * this one walks the inode btrees sucking the info there into
883 * the incore avl tree. We try and rescue corrupted btree records
884 * to minimize our chances of losing inodes. Inode info from potentially
885 * corrupt sources could be bogus so rather than put the info straight
886 * into the tree, instead we put it on a list and try and verify the
887 * info in the next phase by examining what's on disk. At that point,
888 * we'll be able to figure out what's what and stick the corrected info
889 * into the tree. We do bail out at some point and give up on a subtree
890 * so as to avoid walking randomly all over the ag.
892 * Note that it's also ok if the free/inuse info wrong, we can correct
893 * that when we examine the on-disk inode. The important thing is to
894 * get the start and alignment of the inode chunks right. Those chunks
895 * that we aren't sure about go into the uncertain list.
899 struct xfs_btree_block
*block
,
908 struct aghdr_cnts
*agcnts
= priv
;
918 if (be32_to_cpu(block
->bb_magic
) != magic
) {
919 do_warn(_("bad magic # %#x in inobt block %d/%d\n"),
920 be32_to_cpu(block
->bb_magic
), agno
, bno
);
926 if (be16_to_cpu(block
->bb_level
) != level
) {
927 do_warn(_("expected level %d got %d in inobt block %d/%d\n"),
928 level
, be16_to_cpu(block
->bb_level
), agno
, bno
);
936 * check for btree blocks multiply claimed, any unknown/free state
937 * is ok in the bitmap block.
939 state
= get_bmap(agno
, bno
);
944 set_bmap(agno
, bno
, XR_E_FS_MAP
);
947 set_bmap(agno
, bno
, XR_E_MULT
);
949 _("inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
950 state
, agno
, bno
, suspect
);
953 numrecs
= be16_to_cpu(block
->bb_numrecs
);
956 * leaf record in btree
959 /* check for trashed btree block */
961 if (numrecs
> mp
->m_inobt_mxr
[0]) {
962 numrecs
= mp
->m_inobt_mxr
[0];
965 if (isroot
== 0 && numrecs
< mp
->m_inobt_mnr
[0]) {
966 numrecs
= mp
->m_inobt_mnr
[0];
972 do_warn(_("dubious inode btree block header %d/%d\n"),
977 rp
= XFS_INOBT_REC_ADDR(mp
, block
, 1);
980 * step through the records, each record points to
981 * a chunk of inodes. The start of inode chunks should
982 * be block-aligned. Each inode btree rec should point
983 * to the start of a block of inodes or the start of a group
984 * of INODES_PER_CHUNK (64) inodes. off is the offset into
985 * the block. skip processing of bogus records.
987 for (i
= 0; i
< numrecs
; i
++) {
988 agcnts
->agicount
+= XFS_INODES_PER_CHUNK
;
989 agcnts
->icount
+= XFS_INODES_PER_CHUNK
;
990 agcnts
->agifreecount
+= be32_to_cpu(rp
[i
].ir_freecount
);
991 agcnts
->ifreecount
+= be32_to_cpu(rp
[i
].ir_freecount
);
993 suspect
= scan_single_ino_chunk(agno
, &rp
[i
], suspect
);
1003 * interior record, continue on
1005 if (numrecs
> mp
->m_inobt_mxr
[1]) {
1006 numrecs
= mp
->m_inobt_mxr
[1];
1009 if (isroot
== 0 && numrecs
< mp
->m_inobt_mnr
[1]) {
1010 numrecs
= mp
->m_inobt_mnr
[1];
1014 pp
= XFS_INOBT_PTR_ADDR(mp
, block
, 1, mp
->m_inobt_mxr
[1]);
1017 * don't pass bogus tree flag down further if this block
1018 * looked ok. bail out if two levels in a row look bad.
1021 if (suspect
&& !hdr_errors
)
1031 for (i
= 0; i
< numrecs
; i
++) {
1032 if (be32_to_cpu(pp
[i
]) != 0 && verify_agbno(mp
, agno
,
1033 be32_to_cpu(pp
[i
])))
1034 scan_sbtree(be32_to_cpu(pp
[i
]), level
, agno
,
1035 suspect
, scan_inobt
, 0, magic
, priv
,
1036 &xfs_inobt_buf_ops
);
1043 struct aghdr_cnts
*agcnts
)
1046 xfs_agnumber_t agno
;
1052 agno
= be32_to_cpu(agf
->agf_seqno
);
1054 if (XFS_SB_BLOCK(mp
) != XFS_AGFL_BLOCK(mp
) &&
1055 XFS_AGF_BLOCK(mp
) != XFS_AGFL_BLOCK(mp
) &&
1056 XFS_AGI_BLOCK(mp
) != XFS_AGFL_BLOCK(mp
))
1057 set_bmap(agno
, XFS_AGFL_BLOCK(mp
), XR_E_FS_MAP
);
1059 if (be32_to_cpu(agf
->agf_flcount
) == 0)
1062 agflbuf
= libxfs_readbuf(mp
->m_dev
,
1063 XFS_AG_DADDR(mp
, agno
, XFS_AGFL_DADDR(mp
)),
1064 XFS_FSS_TO_BB(mp
, 1), 0, &xfs_agfl_buf_ops
);
1066 do_abort(_("can't read agfl block for ag %d\n"), agno
);
1069 freelist
= XFS_BUF_TO_AGFL_BNO(mp
, agflbuf
);
1070 i
= be32_to_cpu(agf
->agf_flfirst
);
1073 /* agf values not fixed in verify_set_agf, so recheck */
1074 if (be32_to_cpu(agf
->agf_flfirst
) >= XFS_AGFL_SIZE(mp
) ||
1075 be32_to_cpu(agf
->agf_fllast
) >= XFS_AGFL_SIZE(mp
)) {
1076 do_warn(_("agf %d freelist blocks bad, skipping "
1077 "freelist scan\n"), i
);
1084 bno
= be32_to_cpu(freelist
[i
]);
1085 if (verify_agbno(mp
, agno
, bno
))
1086 set_bmap(agno
, bno
, XR_E_FREE
);
1088 do_warn(_("bad agbno %u in agfl, agno %d\n"),
1091 if (i
== be32_to_cpu(agf
->agf_fllast
))
1093 if (++i
== XFS_AGFL_SIZE(mp
))
1096 if (count
!= be32_to_cpu(agf
->agf_flcount
)) {
1097 do_warn(_("freeblk count %d != flcount %d in ag %d\n"), count
,
1098 be32_to_cpu(agf
->agf_flcount
), agno
);
1101 agcnts
->fdblocks
+= count
;
1103 libxfs_putbuf(agflbuf
);
1108 struct xfs_agf
*agf
,
1109 xfs_agnumber_t agno
,
1110 struct aghdr_cnts
*agcnts
)
1115 bno
= be32_to_cpu(agf
->agf_roots
[XFS_BTNUM_BNO
]);
1116 if (bno
!= 0 && verify_agbno(mp
, agno
, bno
)) {
1117 magic
= xfs_sb_version_hascrc(&mp
->m_sb
) ? XFS_ABTB_CRC_MAGIC
1119 scan_sbtree(bno
, be32_to_cpu(agf
->agf_levels
[XFS_BTNUM_BNO
]),
1120 agno
, 0, scan_allocbt
, 1, magic
, agcnts
,
1121 &xfs_allocbt_buf_ops
);
1123 do_warn(_("bad agbno %u for btbno root, agno %d\n"),
1127 bno
= be32_to_cpu(agf
->agf_roots
[XFS_BTNUM_CNT
]);
1128 if (bno
!= 0 && verify_agbno(mp
, agno
, bno
)) {
1129 magic
= xfs_sb_version_hascrc(&mp
->m_sb
) ? XFS_ABTC_CRC_MAGIC
1131 scan_sbtree(bno
, be32_to_cpu(agf
->agf_levels
[XFS_BTNUM_CNT
]),
1132 agno
, 0, scan_allocbt
, 1, magic
, agcnts
,
1133 &xfs_allocbt_buf_ops
);
1135 do_warn(_("bad agbno %u for btbcnt root, agno %d\n"),
1139 if (be32_to_cpu(agf
->agf_freeblks
) != agcnts
->agffreeblks
) {
1140 do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
1141 be32_to_cpu(agf
->agf_freeblks
), agcnts
->agffreeblks
, agno
);
1144 if (be32_to_cpu(agf
->agf_longest
) != agcnts
->agflongest
) {
1145 do_warn(_("agf_longest %u, counted %u in ag %u\n"),
1146 be32_to_cpu(agf
->agf_longest
), agcnts
->agflongest
, agno
);
1149 if (xfs_sb_version_haslazysbcount(&mp
->m_sb
) &&
1150 be32_to_cpu(agf
->agf_btreeblks
) != agcnts
->agfbtreeblks
) {
1151 do_warn(_("agf_btreeblks %u, counted %" PRIu64
" in ag %u\n"),
1152 be32_to_cpu(agf
->agf_btreeblks
), agcnts
->agfbtreeblks
, agno
);
1158 struct xfs_agi
*agi
,
1159 xfs_agnumber_t agno
,
1160 struct aghdr_cnts
*agcnts
)
1166 bno
= be32_to_cpu(agi
->agi_root
);
1167 if (bno
!= 0 && verify_agbno(mp
, agno
, bno
)) {
1168 magic
= xfs_sb_version_hascrc(&mp
->m_sb
) ? XFS_IBT_CRC_MAGIC
1170 scan_sbtree(bno
, be32_to_cpu(agi
->agi_level
),
1171 agno
, 0, scan_inobt
, 1, magic
, agcnts
,
1172 &xfs_inobt_buf_ops
);
1174 do_warn(_("bad agbno %u for inobt root, agno %d\n"),
1175 be32_to_cpu(agi
->agi_root
), agno
);
1178 if (be32_to_cpu(agi
->agi_count
) != agcnts
->agicount
) {
1179 do_warn(_("agi_count %u, counted %u in ag %u\n"),
1180 be32_to_cpu(agi
->agi_count
), agcnts
->agicount
, agno
);
1183 if (be32_to_cpu(agi
->agi_freecount
) != agcnts
->agifreecount
) {
1184 do_warn(_("agi_freecount %u, counted %u in ag %u\n"),
1185 be32_to_cpu(agi
->agi_freecount
), agcnts
->agifreecount
, agno
);
1188 for (i
= 0; i
< XFS_AGI_UNLINKED_BUCKETS
; i
++) {
1189 xfs_agino_t agino
= be32_to_cpu(agi
->agi_unlinked
[i
]);
1191 if (agino
!= NULLAGINO
) {
1193 _("agi unlinked bucket %d is %u in ag %u (inode=%" PRIu64
")\n"),
1195 XFS_AGINO_TO_INO(mp
, agno
, agino
));
1201 * Scan an AG for obvious corruption.
1206 xfs_agnumber_t agno
,
1209 struct aghdr_cnts
*agcnts
= arg
;
1221 sbbuf
= libxfs_readbuf(mp
->m_dev
, XFS_AG_DADDR(mp
, agno
, XFS_SB_DADDR
),
1222 XFS_FSS_TO_BB(mp
, 1), 0, &xfs_sb_buf_ops
);
1224 do_error(_("can't get root superblock for ag %d\n"), agno
);
1228 sb
= (xfs_sb_t
*)calloc(BBSIZE
, 1);
1230 do_error(_("can't allocate memory for superblock\n"));
1231 libxfs_putbuf(sbbuf
);
1234 libxfs_sb_from_disk(sb
, XFS_BUF_TO_SBP(sbbuf
));
1236 agfbuf
= libxfs_readbuf(mp
->m_dev
,
1237 XFS_AG_DADDR(mp
, agno
, XFS_AGF_DADDR(mp
)),
1238 XFS_FSS_TO_BB(mp
, 1), 0, &xfs_agf_buf_ops
);
1240 do_error(_("can't read agf block for ag %d\n"), agno
);
1241 libxfs_putbuf(sbbuf
);
1245 agf
= XFS_BUF_TO_AGF(agfbuf
);
1247 agibuf
= libxfs_readbuf(mp
->m_dev
,
1248 XFS_AG_DADDR(mp
, agno
, XFS_AGI_DADDR(mp
)),
1249 XFS_FSS_TO_BB(mp
, 1), 0, &xfs_agi_buf_ops
);
1251 do_error(_("can't read agi block for ag %d\n"), agno
);
1252 libxfs_putbuf(agfbuf
);
1253 libxfs_putbuf(sbbuf
);
1257 agi
= XFS_BUF_TO_AGI(agibuf
);
1259 /* fix up bad ag headers */
1261 status
= verify_set_agheader(mp
, sbbuf
, sb
, agf
, agi
, agno
);
1263 if (status
& XR_AG_SB_SEC
) {
1267 * clear bad sector bit because we don't want
1268 * to skip further processing. we just want to
1269 * ensure that we write out the modified sb buffer.
1271 status
&= ~XR_AG_SB_SEC
;
1273 if (status
& XR_AG_SB
) {
1275 do_warn(_("reset bad sb for ag %d\n"), agno
);
1278 do_warn(_("would reset bad sb for ag %d\n"), agno
);
1281 if (status
& XR_AG_AGF
) {
1283 do_warn(_("reset bad agf for ag %d\n"), agno
);
1286 do_warn(_("would reset bad agf for ag %d\n"), agno
);
1289 if (status
& XR_AG_AGI
) {
1291 do_warn(_("reset bad agi for ag %d\n"), agno
);
1294 do_warn(_("would reset bad agi for ag %d\n"), agno
);
1298 if (status
&& no_modify
) {
1299 libxfs_putbuf(agibuf
);
1300 libxfs_putbuf(agfbuf
);
1301 libxfs_putbuf(sbbuf
);
1304 do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
1310 scan_freelist(agf
, agcnts
);
1312 validate_agf(agf
, agno
, agcnts
);
1313 validate_agi(agi
, agno
, agcnts
);
1315 ASSERT(agi_dirty
== 0 || (agi_dirty
&& !no_modify
));
1317 if (agi_dirty
&& !no_modify
)
1318 libxfs_writebuf(agibuf
, 0);
1320 libxfs_putbuf(agibuf
);
1322 ASSERT(agf_dirty
== 0 || (agf_dirty
&& !no_modify
));
1324 if (agf_dirty
&& !no_modify
)
1325 libxfs_writebuf(agfbuf
, 0);
1327 libxfs_putbuf(agfbuf
);
1329 ASSERT(sb_dirty
== 0 || (sb_dirty
&& !no_modify
));
1331 if (sb_dirty
&& !no_modify
) {
1333 memcpy(&mp
->m_sb
, sb
, sizeof(xfs_sb_t
));
1334 libxfs_sb_to_disk(XFS_BUF_TO_SBP(sbbuf
), sb
, XFS_SB_ALL_BITS
);
1335 libxfs_writebuf(sbbuf
, 0);
1337 libxfs_putbuf(sbbuf
);
1339 PROG_RPT_INC(prog_rpt_done
[agno
], 1);
1341 #ifdef XR_INODE_TRACE
1342 print_inode_list(i
);
1347 #define SCAN_THREADS 32
1351 struct xfs_mount
*mp
,
1354 struct aghdr_cnts
*agcnts
;
1355 __uint64_t fdblocks
= 0;
1356 __uint64_t icount
= 0;
1357 __uint64_t ifreecount
= 0;
1361 agcnts
= malloc(mp
->m_sb
.sb_agcount
* sizeof(*agcnts
));
1363 do_abort(_("no memory for ag header counts\n"));
1366 memset(agcnts
, 0, mp
->m_sb
.sb_agcount
* sizeof(*agcnts
));
1368 create_work_queue(&wq
, mp
, scan_threads
);
1370 for (i
= 0; i
< mp
->m_sb
.sb_agcount
; i
++)
1371 queue_work(&wq
, scan_ag
, i
, &agcnts
[i
]);
1373 destroy_work_queue(&wq
);
1375 /* tally up the counts */
1376 for (i
= 0; i
< mp
->m_sb
.sb_agcount
; i
++) {
1377 fdblocks
+= agcnts
[i
].fdblocks
;
1378 icount
+= agcnts
[i
].icount
;
1379 ifreecount
+= agcnts
[i
].ifreecount
;
1385 * Validate that our manual counts match the superblock.
1387 if (mp
->m_sb
.sb_icount
!= icount
) {
1388 do_warn(_("sb_icount %" PRIu64
", counted %" PRIu64
"\n"),
1389 mp
->m_sb
.sb_icount
, icount
);
1392 if (mp
->m_sb
.sb_ifree
!= ifreecount
) {
1393 do_warn(_("sb_ifree %" PRIu64
", counted %" PRIu64
"\n"),
1394 mp
->m_sb
.sb_ifree
, ifreecount
);
1397 if (mp
->m_sb
.sb_fdblocks
!= fdblocks
) {
1398 do_warn(_("sb_fdblocks %" PRIu64
", counted %" PRIu64
"\n"),
1399 mp
->m_sb
.sb_fdblocks
, fdblocks
);