]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/scan.c
xfs_db: add a superblock info command
[thirdparty/xfsprogs-dev.git] / repair / scan.c
1 /*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "libxfs.h"
20 #include "avl.h"
21 #include "globals.h"
22 #include "agheader.h"
23 #include "incore.h"
24 #include "protos.h"
25 #include "err_protos.h"
26 #include "dinode.h"
27 #include "scan.h"
28 #include "versions.h"
29 #include "bmap.h"
30 #include "progress.h"
31 #include "threads.h"
32 #include "slab.h"
33 #include "rmap.h"
34
35 static xfs_mount_t *mp = NULL;
36
37 /*
38 * Variables to validate AG header values against the manual count
39 * from the btree traversal.
40 */
41 struct aghdr_cnts {
42 xfs_agnumber_t agno;
43 xfs_extlen_t agffreeblks;
44 xfs_extlen_t agflongest;
45 uint64_t agfbtreeblks;
46 uint32_t agicount;
47 uint32_t agifreecount;
48 uint64_t fdblocks;
49 uint64_t usedblocks;
50 uint64_t ifreecount;
51 uint32_t fibtfreecount;
52 };
53
54 void
55 set_mp(xfs_mount_t *mpp)
56 {
57 libxfs_bcache_purge();
58 mp = mpp;
59 }
60
61 static void
62 scan_sbtree(
63 xfs_agblock_t root,
64 int nlevels,
65 xfs_agnumber_t agno,
66 int suspect,
67 void (*func)(struct xfs_btree_block *block,
68 int level,
69 xfs_agblock_t bno,
70 xfs_agnumber_t agno,
71 int suspect,
72 int isroot,
73 uint32_t magic,
74 void *priv),
75 int isroot,
76 uint32_t magic,
77 void *priv,
78 const struct xfs_buf_ops *ops)
79 {
80 xfs_buf_t *bp;
81
82 bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, root),
83 XFS_FSB_TO_BB(mp, 1), 0, ops);
84 if (!bp) {
85 do_error(_("can't read btree block %d/%d\n"), agno, root);
86 return;
87 }
88 if (bp->b_error == -EFSBADCRC || bp->b_error == -EFSCORRUPTED) {
89 do_warn(_("btree block %d/%d is suspect, error %d\n"),
90 agno, root, bp->b_error);
91 suspect = 1;
92 }
93
94 (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1, root, agno, suspect,
95 isroot, magic, priv);
96 libxfs_putbuf(bp);
97 }
98
99 /*
100 * returns 1 on bad news (inode needs to be cleared), 0 on good
101 */
102 int
103 scan_lbtree(
104 xfs_fsblock_t root,
105 int nlevels,
106 int (*func)(struct xfs_btree_block *block,
107 int level,
108 int type,
109 int whichfork,
110 xfs_fsblock_t bno,
111 xfs_ino_t ino,
112 xfs_rfsblock_t *tot,
113 uint64_t *nex,
114 blkmap_t **blkmapp,
115 bmap_cursor_t *bm_cursor,
116 int isroot,
117 int check_dups,
118 int *dirty,
119 uint64_t magic),
120 int type,
121 int whichfork,
122 xfs_ino_t ino,
123 xfs_rfsblock_t *tot,
124 uint64_t *nex,
125 blkmap_t **blkmapp,
126 bmap_cursor_t *bm_cursor,
127 int isroot,
128 int check_dups,
129 uint64_t magic,
130 const struct xfs_buf_ops *ops)
131 {
132 xfs_buf_t *bp;
133 int err;
134 int dirty = 0;
135 bool badcrc = false;
136
137 bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, root),
138 XFS_FSB_TO_BB(mp, 1), 0, ops);
139 if (!bp) {
140 do_error(_("can't read btree block %d/%d\n"),
141 XFS_FSB_TO_AGNO(mp, root),
142 XFS_FSB_TO_AGBNO(mp, root));
143 return(1);
144 }
145
146 /*
147 * only check for bad CRC here - caller will determine if there
148 * is a corruption or not and whether it got corrected and so needs
149 * writing back. CRC errors always imply we need to write the block.
150 */
151 if (bp->b_error == -EFSBADCRC) {
152 do_warn(_("btree block %d/%d is suspect, error %d\n"),
153 XFS_FSB_TO_AGNO(mp, root),
154 XFS_FSB_TO_AGBNO(mp, root), bp->b_error);
155 badcrc = true;
156 }
157
158 err = (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1,
159 type, whichfork, root, ino, tot, nex, blkmapp,
160 bm_cursor, isroot, check_dups, &dirty,
161 magic);
162
163 ASSERT(dirty == 0 || (dirty && !no_modify));
164
165 if ((dirty || badcrc) && !no_modify)
166 libxfs_writebuf(bp, 0);
167 else
168 libxfs_putbuf(bp);
169
170 return(err);
171 }
172
173 int
174 scan_bmapbt(
175 struct xfs_btree_block *block,
176 int level,
177 int type,
178 int whichfork,
179 xfs_fsblock_t bno,
180 xfs_ino_t ino,
181 xfs_rfsblock_t *tot,
182 uint64_t *nex,
183 blkmap_t **blkmapp,
184 bmap_cursor_t *bm_cursor,
185 int isroot,
186 int check_dups,
187 int *dirty,
188 uint64_t magic)
189 {
190 int i;
191 int err;
192 xfs_bmbt_ptr_t *pp;
193 xfs_bmbt_key_t *pkey;
194 xfs_bmbt_rec_t *rp;
195 xfs_fileoff_t first_key;
196 xfs_fileoff_t last_key;
197 char *forkname = get_forkname(whichfork);
198 int numrecs;
199 xfs_agnumber_t agno;
200 xfs_agblock_t agbno;
201 int state;
202 int error;
203
204 /*
205 * unlike the ag freeblock btrees, if anything looks wrong
206 * in an inode bmap tree, just bail. it's possible that
207 * we'll miss a case where the to-be-toasted inode and
208 * another inode are claiming the same block but that's
209 * highly unlikely.
210 */
211 if (be32_to_cpu(block->bb_magic) != magic) {
212 do_warn(
213 _("bad magic # %#x in inode %" PRIu64 " (%s fork) bmbt block %" PRIu64 "\n"),
214 be32_to_cpu(block->bb_magic), ino, forkname, bno);
215 return(1);
216 }
217 if (be16_to_cpu(block->bb_level) != level) {
218 do_warn(
219 _("expected level %d got %d in inode %" PRIu64 ", (%s fork) bmbt block %" PRIu64 "\n"),
220 level, be16_to_cpu(block->bb_level),
221 ino, forkname, bno);
222 return(1);
223 }
224
225 if (magic == XFS_BMAP_CRC_MAGIC) {
226 /* verify owner */
227 if (be64_to_cpu(block->bb_u.l.bb_owner) != ino) {
228 do_warn(
229 _("expected owner inode %" PRIu64 ", got %llu, bmbt block %" PRIu64 "\n"),
230 ino,
231 (unsigned long long)be64_to_cpu(block->bb_u.l.bb_owner),
232 bno);
233 return 1;
234 }
235 /* verify block number */
236 if (be64_to_cpu(block->bb_u.l.bb_blkno) !=
237 XFS_FSB_TO_DADDR(mp, bno)) {
238 do_warn(
239 _("expected block %" PRIu64 ", got %llu, bmbt block %" PRIu64 "\n"),
240 XFS_FSB_TO_DADDR(mp, bno),
241 (unsigned long long)be64_to_cpu(block->bb_u.l.bb_blkno),
242 bno);
243 return 1;
244 }
245 /* verify uuid */
246 if (platform_uuid_compare(&block->bb_u.l.bb_uuid,
247 &mp->m_sb.sb_meta_uuid) != 0) {
248 do_warn(
249 _("wrong FS UUID, bmbt block %" PRIu64 "\n"),
250 bno);
251 return 1;
252 }
253 }
254
255 if (check_dups == 0) {
256 /*
257 * check sibling pointers. if bad we have a conflict
258 * between the sibling pointers and the child pointers
259 * in the parent block. blow out the inode if that happens
260 */
261 if (bm_cursor->level[level].fsbno != NULLFSBLOCK) {
262 /*
263 * this is not the first block on this level
264 * so the cursor for this level has recorded the
265 * values for this's block left-sibling.
266 */
267 if (bno != bm_cursor->level[level].right_fsbno) {
268 do_warn(
269 _("bad fwd (right) sibling pointer (saw %" PRIu64 " parent block says %" PRIu64 ")\n"
270 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
271 bm_cursor->level[level].right_fsbno,
272 bno, ino, forkname,
273 bm_cursor->level[level].fsbno);
274 return(1);
275 }
276 if (be64_to_cpu(block->bb_u.l.bb_leftsib) !=
277 bm_cursor->level[level].fsbno) {
278 do_warn(
279 _("bad back (left) sibling pointer (saw %llu parent block says %" PRIu64 ")\n"
280 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
281 (unsigned long long)
282 be64_to_cpu(block->bb_u.l.bb_leftsib),
283 bm_cursor->level[level].fsbno,
284 ino, forkname, bno);
285 return(1);
286 }
287 } else {
288 /*
289 * This is the first or only block on this level.
290 * Check that the left sibling pointer is NULL
291 */
292 if (be64_to_cpu(block->bb_u.l.bb_leftsib) != NULLFSBLOCK) {
293 do_warn(
294 _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
295 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
296 (unsigned long long)
297 be64_to_cpu(block->bb_u.l.bb_leftsib),
298 ino, forkname, bno);
299 return(1);
300 }
301 }
302
303 /*
304 * update cursor block pointers to reflect this block
305 */
306 bm_cursor->level[level].fsbno = bno;
307 bm_cursor->level[level].left_fsbno =
308 be64_to_cpu(block->bb_u.l.bb_leftsib);
309 bm_cursor->level[level].right_fsbno =
310 be64_to_cpu(block->bb_u.l.bb_rightsib);
311
312 agno = XFS_FSB_TO_AGNO(mp, bno);
313 agbno = XFS_FSB_TO_AGBNO(mp, bno);
314
315 pthread_mutex_lock(&ag_locks[agno].lock);
316 state = get_bmap(agno, agbno);
317 switch (state) {
318 case XR_E_INUSE1:
319 /*
320 * block was claimed as in use data by the rmap
321 * btree, but has not been found in the data extent
322 * map for the inode. That means this bmbt block hasn't
323 * yet been claimed as in use, which means -it's ours-
324 */
325 case XR_E_UNKNOWN:
326 case XR_E_FREE1:
327 case XR_E_FREE:
328 set_bmap(agno, agbno, XR_E_INUSE);
329 break;
330 case XR_E_FS_MAP:
331 case XR_E_INUSE:
332 /*
333 * we'll try and continue searching here since
334 * the block looks like it's been claimed by file
335 * to store user data, a directory to store directory
336 * data, or the space allocation btrees but since
337 * we made it here, the block probably
338 * contains btree data.
339 */
340 set_bmap(agno, agbno, XR_E_MULT);
341 do_warn(
342 _("inode 0x%" PRIx64 "bmap block 0x%" PRIx64 " claimed, state is %d\n"),
343 ino, bno, state);
344 break;
345 case XR_E_MULT:
346 case XR_E_INUSE_FS:
347 set_bmap(agno, agbno, XR_E_MULT);
348 do_warn(
349 _("inode 0x%" PRIx64 " bmap block 0x%" PRIx64 " claimed, state is %d\n"),
350 ino, bno, state);
351 /*
352 * if we made it to here, this is probably a bmap block
353 * that is being used by *another* file as a bmap block
354 * so the block will be valid. Both files should be
355 * trashed along with any other file that impinges on
356 * any blocks referenced by either file. So we
357 * continue searching down this btree to mark all
358 * blocks duplicate
359 */
360 break;
361 case XR_E_BAD_STATE:
362 default:
363 do_warn(
364 _("bad state %d, inode %" PRIu64 " bmap block 0x%" PRIx64 "\n"),
365 state, ino, bno);
366 break;
367 }
368 pthread_mutex_unlock(&ag_locks[agno].lock);
369 } else {
370 /*
371 * attribute fork for realtime files is in the regular
372 * filesystem
373 */
374 if (type != XR_INO_RTDATA || whichfork != XFS_DATA_FORK) {
375 if (search_dup_extent(XFS_FSB_TO_AGNO(mp, bno),
376 XFS_FSB_TO_AGBNO(mp, bno),
377 XFS_FSB_TO_AGBNO(mp, bno) + 1))
378 return(1);
379 } else {
380 if (search_rt_dup_extent(mp, bno))
381 return(1);
382 }
383 }
384 (*tot)++;
385 numrecs = be16_to_cpu(block->bb_numrecs);
386
387 /* Record BMBT blocks in the reverse-mapping data. */
388 if (check_dups && collect_rmaps) {
389 agno = XFS_FSB_TO_AGNO(mp, bno);
390 pthread_mutex_lock(&ag_locks[agno].lock);
391 error = rmap_add_bmbt_rec(mp, ino, whichfork, bno);
392 pthread_mutex_unlock(&ag_locks[agno].lock);
393 if (error)
394 do_error(
395 _("couldn't add inode %"PRIu64" bmbt block %"PRIu64" reverse-mapping data."),
396 ino, bno);
397 }
398
399 if (level == 0) {
400 if (numrecs > mp->m_bmap_dmxr[0] || (isroot == 0 && numrecs <
401 mp->m_bmap_dmnr[0])) {
402 do_warn(
403 _("inode %" PRIu64 " bad # of bmap records (%u, min - %u, max - %u)\n"),
404 ino, numrecs, mp->m_bmap_dmnr[0],
405 mp->m_bmap_dmxr[0]);
406 return(1);
407 }
408 rp = XFS_BMBT_REC_ADDR(mp, block, 1);
409 *nex += numrecs;
410 /*
411 * XXX - if we were going to fix up the btree record,
412 * we'd do it right here. For now, if there's a problem,
413 * we'll bail out and presumably clear the inode.
414 */
415 if (check_dups == 0) {
416 err = process_bmbt_reclist(mp, rp, &numrecs, type, ino,
417 tot, blkmapp, &first_key,
418 &last_key, whichfork);
419 if (err)
420 return 1;
421
422 /*
423 * check that key ordering is monotonically increasing.
424 * if the last_key value in the cursor is set to
425 * NULLFILEOFF, then we know this is the first block
426 * on the leaf level and we shouldn't check the
427 * last_key value.
428 */
429 if (first_key <= bm_cursor->level[level].last_key &&
430 bm_cursor->level[level].last_key !=
431 NULLFILEOFF) {
432 do_warn(
433 _("out-of-order bmap key (file offset) in inode %" PRIu64 ", %s fork, fsbno %" PRIu64 "\n"),
434 ino, forkname, bno);
435 return(1);
436 }
437 /*
438 * update cursor keys to reflect this block.
439 * don't have to check if last_key is > first_key
440 * since that gets checked by process_bmbt_reclist.
441 */
442 bm_cursor->level[level].first_key = first_key;
443 bm_cursor->level[level].last_key = last_key;
444
445 return 0;
446 } else {
447 return scan_bmbt_reclist(mp, rp, &numrecs, type, ino,
448 tot, whichfork);
449 }
450 }
451 if (numrecs > mp->m_bmap_dmxr[1] || (isroot == 0 && numrecs <
452 mp->m_bmap_dmnr[1])) {
453 do_warn(
454 _("inode %" PRIu64 " bad # of bmap records (%u, min - %u, max - %u)\n"),
455 ino, numrecs, mp->m_bmap_dmnr[1], mp->m_bmap_dmxr[1]);
456 return(1);
457 }
458 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
459 pkey = XFS_BMBT_KEY_ADDR(mp, block, 1);
460
461 last_key = NULLFILEOFF;
462
463 for (i = 0, err = 0; i < numrecs; i++) {
464 /*
465 * XXX - if we were going to fix up the interior btree nodes,
466 * we'd do it right here. For now, if there's a problem,
467 * we'll bail out and presumably clear the inode.
468 */
469 if (!verify_dfsbno(mp, be64_to_cpu(pp[i]))) {
470 do_warn(
471 _("bad bmap btree ptr 0x%llx in ino %" PRIu64 "\n"),
472 (unsigned long long) be64_to_cpu(pp[i]), ino);
473 return(1);
474 }
475
476 err = scan_lbtree(be64_to_cpu(pp[i]), level, scan_bmapbt,
477 type, whichfork, ino, tot, nex, blkmapp,
478 bm_cursor, 0, check_dups, magic,
479 &xfs_bmbt_buf_ops);
480 if (err)
481 return(1);
482
483 /*
484 * fix key (offset) mismatches between the first key
485 * in the child block (as recorded in the cursor) and the
486 * key in the interior node referencing the child block.
487 *
488 * fixes cases where entries have been shifted between
489 * child blocks but the parent hasn't been updated. We
490 * don't have to worry about the key values in the cursor
491 * not being set since we only look at the key values of
492 * our child and those are guaranteed to be set by the
493 * call to scan_lbtree() above.
494 */
495 if (check_dups == 0 && be64_to_cpu(pkey[i].br_startoff) !=
496 bm_cursor->level[level-1].first_key) {
497 if (!no_modify) {
498 do_warn(
499 _("correcting bt key (was %llu, now %" PRIu64 ") in inode %" PRIu64 "\n"
500 "\t\t%s fork, btree block %" PRIu64 "\n"),
501 (unsigned long long)
502 be64_to_cpu(pkey[i].br_startoff),
503 bm_cursor->level[level-1].first_key,
504 ino,
505 forkname, bno);
506 *dirty = 1;
507 pkey[i].br_startoff = cpu_to_be64(
508 bm_cursor->level[level-1].first_key);
509 } else {
510 do_warn(
511 _("bad btree key (is %llu, should be %" PRIu64 ") in inode %" PRIu64 "\n"
512 "\t\t%s fork, btree block %" PRIu64 "\n"),
513 (unsigned long long)
514 be64_to_cpu(pkey[i].br_startoff),
515 bm_cursor->level[level-1].first_key,
516 ino, forkname, bno);
517 }
518 }
519 }
520
521 /*
522 * If we're the last node at our level, check that the last child
523 * block's forward sibling pointer is NULL.
524 */
525 if (check_dups == 0 &&
526 bm_cursor->level[level].right_fsbno == NULLFSBLOCK &&
527 bm_cursor->level[level - 1].right_fsbno != NULLFSBLOCK) {
528 do_warn(
529 _("bad fwd (right) sibling pointer (saw %" PRIu64 " should be NULLFSBLOCK)\n"
530 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
531 bm_cursor->level[level - 1].right_fsbno,
532 ino, forkname, bm_cursor->level[level - 1].fsbno);
533 return(1);
534 }
535
536 /*
537 * update cursor keys to reflect this block
538 */
539 if (check_dups == 0) {
540 bm_cursor->level[level].first_key =
541 be64_to_cpu(pkey[0].br_startoff);
542 bm_cursor->level[level].last_key =
543 be64_to_cpu(pkey[numrecs - 1].br_startoff);
544 }
545
546 return(0);
547 }
548
549 static void
550 scan_allocbt(
551 struct xfs_btree_block *block,
552 int level,
553 xfs_agblock_t bno,
554 xfs_agnumber_t agno,
555 int suspect,
556 int isroot,
557 uint32_t magic,
558 void *priv)
559 {
560 struct aghdr_cnts *agcnts = priv;
561 const char *name;
562 int i;
563 xfs_alloc_ptr_t *pp;
564 xfs_alloc_rec_t *rp;
565 int hdr_errors = 0;
566 int numrecs;
567 int state;
568 xfs_extlen_t lastcount = 0;
569 xfs_agblock_t lastblock = 0;
570
571 switch (magic) {
572 case XFS_ABTB_CRC_MAGIC:
573 case XFS_ABTB_MAGIC:
574 name = "bno";
575 break;
576 case XFS_ABTC_CRC_MAGIC:
577 case XFS_ABTC_MAGIC:
578 name = "cnt";
579 break;
580 default:
581 name = "(unknown)";
582 assert(0);
583 break;
584 }
585
586 if (be32_to_cpu(block->bb_magic) != magic) {
587 do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
588 be32_to_cpu(block->bb_magic), name, agno, bno);
589 hdr_errors++;
590 if (suspect)
591 return;
592 }
593
594 /*
595 * All freespace btree blocks except the roots are freed for a
596 * fully used filesystem, thus they are counted towards the
597 * free data block counter.
598 */
599 if (!isroot) {
600 agcnts->agfbtreeblks++;
601 agcnts->fdblocks++;
602 }
603
604 if (be16_to_cpu(block->bb_level) != level) {
605 do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
606 level, be16_to_cpu(block->bb_level), name, agno, bno);
607 hdr_errors++;
608 if (suspect)
609 return;
610 }
611
612 /*
613 * check for btree blocks multiply claimed
614 */
615 state = get_bmap(agno, bno);
616 if (state != XR_E_UNKNOWN) {
617 set_bmap(agno, bno, XR_E_MULT);
618 do_warn(
619 _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
620 name, state, agno, bno, suspect);
621 return;
622 }
623 set_bmap(agno, bno, XR_E_FS_MAP);
624
625 numrecs = be16_to_cpu(block->bb_numrecs);
626
627 if (level == 0) {
628 if (numrecs > mp->m_alloc_mxr[0]) {
629 numrecs = mp->m_alloc_mxr[0];
630 hdr_errors++;
631 }
632 if (isroot == 0 && numrecs < mp->m_alloc_mnr[0]) {
633 numrecs = mp->m_alloc_mnr[0];
634 hdr_errors++;
635 }
636
637 if (hdr_errors) {
638 do_warn(
639 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
640 be16_to_cpu(block->bb_numrecs),
641 mp->m_alloc_mnr[0], mp->m_alloc_mxr[0],
642 name, agno, bno);
643 suspect++;
644 }
645
646 rp = XFS_ALLOC_REC_ADDR(mp, block, 1);
647 for (i = 0; i < numrecs; i++) {
648 xfs_agblock_t b, end;
649 xfs_extlen_t len, blen;
650
651 b = be32_to_cpu(rp[i].ar_startblock);
652 len = be32_to_cpu(rp[i].ar_blockcount);
653 end = b + len;
654
655 if (b == 0 || !verify_agbno(mp, agno, b)) {
656 do_warn(
657 _("invalid start block %u in record %u of %s btree block %u/%u\n"),
658 b, i, name, agno, bno);
659 continue;
660 }
661 if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
662 do_warn(
663 _("invalid length %u in record %u of %s btree block %u/%u\n"),
664 len, i, name, agno, bno);
665 continue;
666 }
667
668 if (magic == XFS_ABTB_MAGIC ||
669 magic == XFS_ABTB_CRC_MAGIC) {
670 if (b <= lastblock) {
671 do_warn(_(
672 "out-of-order bno btree record %d (%u %u) block %u/%u\n"),
673 i, b, len, agno, bno);
674 } else {
675 lastblock = b;
676 }
677 } else {
678 agcnts->fdblocks += len;
679 agcnts->agffreeblks += len;
680 if (len > agcnts->agflongest)
681 agcnts->agflongest = len;
682 if (len < lastcount) {
683 do_warn(_(
684 "out-of-order cnt btree record %d (%u %u) block %u/%u\n"),
685 i, b, len, agno, bno);
686 } else {
687 lastcount = len;
688 }
689 }
690
691 for ( ; b < end; b += blen) {
692 state = get_bmap_ext(agno, b, end, &blen);
693 switch (state) {
694 case XR_E_UNKNOWN:
695 set_bmap(agno, b, XR_E_FREE1);
696 break;
697 case XR_E_FREE1:
698 /*
699 * no warning messages -- we'll catch
700 * FREE1 blocks later
701 */
702 if (magic == XFS_ABTC_MAGIC ||
703 magic == XFS_ABTC_CRC_MAGIC) {
704 set_bmap_ext(agno, b, blen,
705 XR_E_FREE);
706 break;
707 }
708 /* fall through */
709 default:
710 do_warn(
711 _("block (%d,%d-%d) multiply claimed by %s space tree, state - %d\n"),
712 agno, b, b + blen - 1,
713 name, state);
714 break;
715 }
716 }
717 }
718 return;
719 }
720
721 /*
722 * interior record
723 */
724 pp = XFS_ALLOC_PTR_ADDR(mp, block, 1, mp->m_alloc_mxr[1]);
725
726 if (numrecs > mp->m_alloc_mxr[1]) {
727 numrecs = mp->m_alloc_mxr[1];
728 hdr_errors++;
729 }
730 if (isroot == 0 && numrecs < mp->m_alloc_mnr[1]) {
731 numrecs = mp->m_alloc_mnr[1];
732 hdr_errors++;
733 }
734
735 /*
736 * don't pass bogus tree flag down further if this block
737 * looked ok. bail out if two levels in a row look bad.
738 */
739 if (hdr_errors) {
740 do_warn(
741 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
742 be16_to_cpu(block->bb_numrecs),
743 mp->m_alloc_mnr[1], mp->m_alloc_mxr[1],
744 name, agno, bno);
745 if (suspect)
746 return;
747 suspect++;
748 } else if (suspect) {
749 suspect = 0;
750 }
751
752 for (i = 0; i < numrecs; i++) {
753 xfs_agblock_t bno = be32_to_cpu(pp[i]);
754
755 /*
756 * XXX - put sibling detection right here.
757 * we know our sibling chain is good. So as we go,
758 * we check the entry before and after each entry.
759 * If either of the entries references a different block,
760 * check the sibling pointer. If there's a sibling
761 * pointer mismatch, try and extract as much data
762 * as possible.
763 */
764 if (bno != 0 && verify_agbno(mp, agno, bno)) {
765 switch (magic) {
766 case XFS_ABTB_CRC_MAGIC:
767 case XFS_ABTB_MAGIC:
768 scan_sbtree(bno, level, agno, suspect,
769 scan_allocbt, 0, magic, priv,
770 &xfs_allocbt_buf_ops);
771 break;
772 case XFS_ABTC_CRC_MAGIC:
773 case XFS_ABTC_MAGIC:
774 scan_sbtree(bno, level, agno, suspect,
775 scan_allocbt, 0, magic, priv,
776 &xfs_allocbt_buf_ops);
777 break;
778 }
779 }
780 }
781 }
782
783 static bool
784 ino_issparse(
785 struct xfs_inobt_rec *rp,
786 int offset)
787 {
788 if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
789 return false;
790
791 return xfs_inobt_is_sparse_disk(rp, offset);
792 }
793
794 /* See if the rmapbt owners agree with our observations. */
795 static void
796 process_rmap_rec(
797 struct xfs_mount *mp,
798 xfs_agnumber_t agno,
799 xfs_agblock_t b,
800 xfs_agblock_t end,
801 xfs_extlen_t blen,
802 int64_t owner,
803 int state,
804 const char *name)
805 {
806 switch (state) {
807 case XR_E_UNKNOWN:
808 switch (owner) {
809 case XFS_RMAP_OWN_FS:
810 case XFS_RMAP_OWN_LOG:
811 set_bmap_ext(agno, b, blen, XR_E_INUSE_FS1);
812 break;
813 case XFS_RMAP_OWN_AG:
814 case XFS_RMAP_OWN_INOBT:
815 set_bmap_ext(agno, b, blen, XR_E_FS_MAP1);
816 break;
817 case XFS_RMAP_OWN_INODES:
818 set_bmap_ext(agno, b, blen, XR_E_INO1);
819 break;
820 case XFS_RMAP_OWN_REFC:
821 set_bmap_ext(agno, b, blen, XR_E_REFC);
822 break;
823 case XFS_RMAP_OWN_COW:
824 set_bmap_ext(agno, b, blen, XR_E_COW);
825 break;
826 case XFS_RMAP_OWN_NULL:
827 /* still unknown */
828 break;
829 default:
830 /* file data */
831 set_bmap_ext(agno, b, blen, XR_E_INUSE1);
832 break;
833 }
834 break;
835 case XR_E_INUSE_FS:
836 if (owner == XFS_RMAP_OWN_FS ||
837 owner == XFS_RMAP_OWN_LOG)
838 break;
839 do_warn(
840 _("Static meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
841 agno, b, b + blen - 1,
842 name, state, owner);
843 break;
844 case XR_E_FS_MAP:
845 if (owner == XFS_RMAP_OWN_AG ||
846 owner == XFS_RMAP_OWN_INOBT)
847 break;
848 do_warn(
849 _("AG meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
850 agno, b, b + blen - 1,
851 name, state, owner);
852 break;
853 case XR_E_INO:
854 if (owner == XFS_RMAP_OWN_INODES)
855 break;
856 do_warn(
857 _("inode block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
858 agno, b, b + blen - 1,
859 name, state, owner);
860 break;
861 case XR_E_REFC:
862 if (owner == XFS_RMAP_OWN_REFC)
863 break;
864 do_warn(
865 _("AG refcount block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
866 agno, b, b + blen - 1,
867 name, state, owner);
868 break;
869 case XR_E_INUSE:
870 if (owner >= 0 &&
871 owner < mp->m_sb.sb_dblocks)
872 break;
873 do_warn(
874 _("in use block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
875 agno, b, b + blen - 1,
876 name, state, owner);
877 break;
878 case XR_E_FREE1:
879 case XR_E_FREE:
880 /*
881 * May be on the AGFL. If not, they'll
882 * be caught later.
883 */
884 break;
885 case XR_E_INUSE1:
886 /*
887 * multiple inode owners are ok with
888 * reflink enabled
889 */
890 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
891 !XFS_RMAP_NON_INODE_OWNER(owner))
892 break;
893 /* fall through */
894 default:
895 do_warn(
896 _("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"),
897 agno, b, b + blen - 1,
898 name, state, owner);
899 break;
900 }
901 }
902
903 struct rmap_priv {
904 struct aghdr_cnts *agcnts;
905 struct xfs_rmap_irec high_key;
906 struct xfs_rmap_irec last_rec;
907 xfs_agblock_t nr_blocks;
908 };
909
910 static bool
911 rmap_in_order(
912 xfs_agblock_t b,
913 xfs_agblock_t lastblock,
914 uint64_t owner,
915 uint64_t lastowner,
916 uint64_t offset,
917 uint64_t lastoffset)
918 {
919 if (b > lastblock)
920 return true;
921 else if (b < lastblock)
922 return false;
923
924 if (owner > lastowner)
925 return true;
926 else if (owner < lastowner)
927 return false;
928
929 return offset > lastoffset;
930 }
931
932 static void
933 scan_rmapbt(
934 struct xfs_btree_block *block,
935 int level,
936 xfs_agblock_t bno,
937 xfs_agnumber_t agno,
938 int suspect,
939 int isroot,
940 uint32_t magic,
941 void *priv)
942 {
943 const char *name = "rmap";
944 int i;
945 xfs_rmap_ptr_t *pp;
946 struct xfs_rmap_rec *rp;
947 struct rmap_priv *rmap_priv = priv;
948 int hdr_errors = 0;
949 int numrecs;
950 int state;
951 xfs_agblock_t lastblock = 0;
952 uint64_t lastowner = 0;
953 uint64_t lastoffset = 0;
954 struct xfs_rmap_key *kp;
955 struct xfs_rmap_irec key = {0};
956
957 if (magic != XFS_RMAP_CRC_MAGIC) {
958 name = "(unknown)";
959 hdr_errors++;
960 suspect++;
961 goto out;
962 }
963
964 if (be32_to_cpu(block->bb_magic) != magic) {
965 do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
966 be32_to_cpu(block->bb_magic), name, agno, bno);
967 hdr_errors++;
968 if (suspect)
969 goto out;
970 }
971
972 /*
973 * All RMAP btree blocks except the roots are freed for a
974 * fully empty filesystem, thus they are counted towards the
975 * free data block counter.
976 */
977 if (!isroot) {
978 rmap_priv->agcnts->agfbtreeblks++;
979 rmap_priv->agcnts->fdblocks++;
980 }
981 rmap_priv->nr_blocks++;
982
983 if (be16_to_cpu(block->bb_level) != level) {
984 do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
985 level, be16_to_cpu(block->bb_level), name, agno, bno);
986 hdr_errors++;
987 if (suspect)
988 goto out;
989 }
990
991 /* check for btree blocks multiply claimed */
992 state = get_bmap(agno, bno);
993 if (!(state == XR_E_UNKNOWN || state == XR_E_FS_MAP1)) {
994 set_bmap(agno, bno, XR_E_MULT);
995 do_warn(
996 _("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
997 name, state, agno, bno, suspect);
998 goto out;
999 }
1000 set_bmap(agno, bno, XR_E_FS_MAP);
1001
1002 numrecs = be16_to_cpu(block->bb_numrecs);
1003 if (level == 0) {
1004 if (numrecs > mp->m_rmap_mxr[0]) {
1005 numrecs = mp->m_rmap_mxr[0];
1006 hdr_errors++;
1007 }
1008 if (isroot == 0 && numrecs < mp->m_rmap_mnr[0]) {
1009 numrecs = mp->m_rmap_mnr[0];
1010 hdr_errors++;
1011 }
1012
1013 if (hdr_errors) {
1014 do_warn(
1015 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
1016 be16_to_cpu(block->bb_numrecs),
1017 mp->m_rmap_mnr[0], mp->m_rmap_mxr[0],
1018 name, agno, bno);
1019 suspect++;
1020 }
1021
1022 rp = XFS_RMAP_REC_ADDR(block, 1);
1023 for (i = 0; i < numrecs; i++) {
1024 xfs_agblock_t b, end;
1025 xfs_extlen_t len, blen;
1026 int64_t owner, offset;
1027
1028 b = be32_to_cpu(rp[i].rm_startblock);
1029 len = be32_to_cpu(rp[i].rm_blockcount);
1030 owner = be64_to_cpu(rp[i].rm_owner);
1031 offset = be64_to_cpu(rp[i].rm_offset);
1032
1033 key.rm_flags = 0;
1034 key.rm_startblock = b;
1035 key.rm_blockcount = len;
1036 key.rm_owner = owner;
1037 if (libxfs_rmap_irec_offset_unpack(offset, &key)) {
1038 /* Look for impossible flags. */
1039 do_warn(
1040 _("invalid flags in record %u of %s btree block %u/%u\n"),
1041 i, name, agno, bno);
1042 continue;
1043 }
1044
1045 end = key.rm_startblock + key.rm_blockcount;
1046
1047 /* Make sure agbno & len make sense. */
1048 if (!verify_agbno(mp, agno, b)) {
1049 do_warn(
1050 _("invalid start block %u in record %u of %s btree block %u/%u\n"),
1051 b, i, name, agno, bno);
1052 continue;
1053 }
1054 if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
1055 do_warn(
1056 _("invalid length %u in record %u of %s btree block %u/%u\n"),
1057 len, i, name, agno, bno);
1058 continue;
1059 }
1060
1061 /* Look for impossible owners. */
1062 if (!((owner > XFS_RMAP_OWN_MIN &&
1063 owner <= XFS_RMAP_OWN_FS) ||
1064 (XFS_INO_TO_AGNO(mp, owner) < mp->m_sb.sb_agcount &&
1065 XFS_AGINO_TO_AGBNO(mp,
1066 XFS_INO_TO_AGINO(mp, owner)) <
1067 mp->m_sb.sb_agblocks)))
1068 do_warn(
1069 _("invalid owner in rmap btree record %d (%"PRId64" %u) block %u/%u\n"),
1070 i, owner, len, agno, bno);
1071
1072 /* Look for impossible record field combinations. */
1073 if (XFS_RMAP_NON_INODE_OWNER(key.rm_owner)) {
1074 if (key.rm_flags)
1075 do_warn(
1076 _("record %d of block (%u/%u) in %s btree cannot have non-inode owner with flags\n"),
1077 i, agno, bno, name);
1078 if (key.rm_offset)
1079 do_warn(
1080 _("record %d of block (%u/%u) in %s btree cannot have non-inode owner with offset\n"),
1081 i, agno, bno, name);
1082 }
1083
1084 /* Check for out of order records. */
1085 if (i == 0) {
1086 advance:
1087 lastblock = b;
1088 lastowner = owner;
1089 lastoffset = offset;
1090 } else {
1091 bool bad;
1092
1093 if (xfs_sb_version_hasreflink(&mp->m_sb))
1094 bad = !rmap_in_order(b, lastblock,
1095 owner, lastowner,
1096 offset, lastoffset);
1097 else
1098 bad = b <= lastblock;
1099 if (bad)
1100 do_warn(
1101 _("out-of-order rmap btree record %d (%u %"PRId64" %"PRIx64" %u) block %u/%u\n"),
1102 i, b, owner, offset, len, agno, bno);
1103 else
1104 goto advance;
1105 }
1106
1107 /* Is this mergeable with the previous record? */
1108 if (rmaps_are_mergeable(&rmap_priv->last_rec, &key)) {
1109 do_warn(
1110 _("record %d in block (%u/%u) of %s tree should be merged with previous record\n"),
1111 i, agno, bno, name);
1112 rmap_priv->last_rec.rm_blockcount +=
1113 key.rm_blockcount;
1114 } else
1115 rmap_priv->last_rec = key;
1116
1117 /* Check that we don't go past the high key. */
1118 key.rm_startblock += key.rm_blockcount - 1;
1119 if (!XFS_RMAP_NON_INODE_OWNER(key.rm_owner) &&
1120 !(key.rm_flags & XFS_RMAP_BMBT_BLOCK))
1121 key.rm_offset += key.rm_blockcount - 1;
1122 key.rm_blockcount = 0;
1123 if (rmap_diffkeys(&key, &rmap_priv->high_key) > 0) {
1124 do_warn(
1125 _("record %d greater than high key of block (%u/%u) in %s tree\n"),
1126 i, agno, bno, name);
1127 }
1128
1129 /* Check for block owner collisions. */
1130 for ( ; b < end; b += blen) {
1131 state = get_bmap_ext(agno, b, end, &blen);
1132 process_rmap_rec(mp, agno, b, end, blen, owner,
1133 state, name);
1134 }
1135 }
1136 goto out;
1137 }
1138
1139 /*
1140 * interior record
1141 */
1142 pp = XFS_RMAP_PTR_ADDR(block, 1, mp->m_rmap_mxr[1]);
1143
1144 if (numrecs > mp->m_rmap_mxr[1]) {
1145 numrecs = mp->m_rmap_mxr[1];
1146 hdr_errors++;
1147 }
1148 if (isroot == 0 && numrecs < mp->m_rmap_mnr[1]) {
1149 numrecs = mp->m_rmap_mnr[1];
1150 hdr_errors++;
1151 }
1152
1153 /*
1154 * don't pass bogus tree flag down further if this block
1155 * looked ok. bail out if two levels in a row look bad.
1156 */
1157 if (hdr_errors) {
1158 do_warn(
1159 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
1160 be16_to_cpu(block->bb_numrecs),
1161 mp->m_rmap_mnr[1], mp->m_rmap_mxr[1],
1162 name, agno, bno);
1163 if (suspect)
1164 goto out;
1165 suspect++;
1166 } else if (suspect) {
1167 suspect = 0;
1168 }
1169
1170 /* check the node's high keys */
1171 for (i = 0; !isroot && i < numrecs; i++) {
1172 kp = XFS_RMAP_HIGH_KEY_ADDR(block, i + 1);
1173
1174 key.rm_flags = 0;
1175 key.rm_startblock = be32_to_cpu(kp->rm_startblock);
1176 key.rm_owner = be64_to_cpu(kp->rm_owner);
1177 if (libxfs_rmap_irec_offset_unpack(be64_to_cpu(kp->rm_offset),
1178 &key)) {
1179 /* Look for impossible flags. */
1180 do_warn(
1181 _("invalid flags in key %u of %s btree block %u/%u\n"),
1182 i, name, agno, bno);
1183 continue;
1184 }
1185 if (rmap_diffkeys(&key, &rmap_priv->high_key) > 0)
1186 do_warn(
1187 _("key %d greater than high key of block (%u/%u) in %s tree\n"),
1188 i, agno, bno, name);
1189 }
1190
1191 for (i = 0; i < numrecs; i++) {
1192 xfs_agblock_t bno = be32_to_cpu(pp[i]);
1193
1194 /*
1195 * XXX - put sibling detection right here.
1196 * we know our sibling chain is good. So as we go,
1197 * we check the entry before and after each entry.
1198 * If either of the entries references a different block,
1199 * check the sibling pointer. If there's a sibling
1200 * pointer mismatch, try and extract as much data
1201 * as possible.
1202 */
1203 kp = XFS_RMAP_HIGH_KEY_ADDR(block, i + 1);
1204 rmap_priv->high_key.rm_flags = 0;
1205 rmap_priv->high_key.rm_startblock =
1206 be32_to_cpu(kp->rm_startblock);
1207 rmap_priv->high_key.rm_owner =
1208 be64_to_cpu(kp->rm_owner);
1209 if (libxfs_rmap_irec_offset_unpack(be64_to_cpu(kp->rm_offset),
1210 &rmap_priv->high_key)) {
1211 /* Look for impossible flags. */
1212 do_warn(
1213 _("invalid flags in high key %u of %s btree block %u/%u\n"),
1214 i, name, agno, bno);
1215 continue;
1216 }
1217
1218 if (bno != 0 && verify_agbno(mp, agno, bno)) {
1219 scan_sbtree(bno, level, agno, suspect, scan_rmapbt, 0,
1220 magic, priv, &xfs_rmapbt_buf_ops);
1221 }
1222 }
1223
1224 out:
1225 if (suspect)
1226 rmap_avoid_check();
1227 }
1228
1229 struct refc_priv {
1230 struct xfs_refcount_irec last_rec;
1231 xfs_agblock_t nr_blocks;
1232 };
1233
1234
1235 static void
1236 scan_refcbt(
1237 struct xfs_btree_block *block,
1238 int level,
1239 xfs_agblock_t bno,
1240 xfs_agnumber_t agno,
1241 int suspect,
1242 int isroot,
1243 uint32_t magic,
1244 void *priv)
1245 {
1246 const char *name = "refcount";
1247 int i;
1248 xfs_refcount_ptr_t *pp;
1249 struct xfs_refcount_rec *rp;
1250 int hdr_errors = 0;
1251 int numrecs;
1252 int state;
1253 xfs_agblock_t lastblock = 0;
1254 struct refc_priv *refc_priv = priv;
1255
1256 if (magic != XFS_REFC_CRC_MAGIC) {
1257 name = "(unknown)";
1258 hdr_errors++;
1259 suspect++;
1260 goto out;
1261 }
1262
1263 if (be32_to_cpu(block->bb_magic) != magic) {
1264 do_warn(_("bad magic # %#x in %s btree block %d/%d\n"),
1265 be32_to_cpu(block->bb_magic), name, agno, bno);
1266 hdr_errors++;
1267 if (suspect)
1268 goto out;
1269 }
1270
1271 if (be16_to_cpu(block->bb_level) != level) {
1272 do_warn(_("expected level %d got %d in %s btree block %d/%d\n"),
1273 level, be16_to_cpu(block->bb_level), name, agno, bno);
1274 hdr_errors++;
1275 if (suspect)
1276 goto out;
1277 }
1278
1279 refc_priv->nr_blocks++;
1280
1281 /* check for btree blocks multiply claimed */
1282 state = get_bmap(agno, bno);
1283 if (!(state == XR_E_UNKNOWN || state == XR_E_REFC)) {
1284 set_bmap(agno, bno, XR_E_MULT);
1285 do_warn(
1286 _("%s btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
1287 name, state, agno, bno, suspect);
1288 goto out;
1289 }
1290 set_bmap(agno, bno, XR_E_FS_MAP);
1291
1292 numrecs = be16_to_cpu(block->bb_numrecs);
1293 if (level == 0) {
1294 if (numrecs > mp->m_refc_mxr[0]) {
1295 numrecs = mp->m_refc_mxr[0];
1296 hdr_errors++;
1297 }
1298 if (isroot == 0 && numrecs < mp->m_refc_mnr[0]) {
1299 numrecs = mp->m_refc_mnr[0];
1300 hdr_errors++;
1301 }
1302
1303 if (hdr_errors) {
1304 do_warn(
1305 _("bad btree nrecs (%u, min=%u, max=%u) in %s btree block %u/%u\n"),
1306 be16_to_cpu(block->bb_numrecs),
1307 mp->m_refc_mnr[0], mp->m_refc_mxr[0],
1308 name, agno, bno);
1309 suspect++;
1310 }
1311
1312 rp = XFS_REFCOUNT_REC_ADDR(block, 1);
1313 for (i = 0; i < numrecs; i++) {
1314 xfs_agblock_t b, agb, end;
1315 xfs_extlen_t len;
1316 xfs_nlink_t nr;
1317
1318 b = agb = be32_to_cpu(rp[i].rc_startblock);
1319 len = be32_to_cpu(rp[i].rc_blockcount);
1320 nr = be32_to_cpu(rp[i].rc_refcount);
1321 if (b >= XFS_REFC_COW_START && nr != 1)
1322 do_warn(
1323 _("leftover CoW extent has incorrect refcount in record %u of %s btree block %u/%u\n"),
1324 i, name, agno, bno);
1325 if (nr == 1) {
1326 if (agb < XFS_REFC_COW_START)
1327 do_warn(
1328 _("leftover CoW extent has invalid startblock in record %u of %s btree block %u/%u\n"),
1329 i, name, agno, bno);
1330 agb -= XFS_REFC_COW_START;
1331 }
1332 end = agb + len;
1333
1334 if (!verify_agbno(mp, agno, agb)) {
1335 do_warn(
1336 _("invalid start block %u in record %u of %s btree block %u/%u\n"),
1337 b, i, name, agno, bno);
1338 continue;
1339 }
1340 if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
1341 do_warn(
1342 _("invalid length %u in record %u of %s btree block %u/%u\n"),
1343 len, i, name, agno, bno);
1344 continue;
1345 }
1346
1347 if (nr == 1) {
1348 xfs_agblock_t c;
1349 xfs_extlen_t cnr;
1350
1351 for (c = agb; c < end; c += cnr) {
1352 state = get_bmap_ext(agno, c, end, &cnr);
1353 switch (state) {
1354 case XR_E_UNKNOWN:
1355 case XR_E_COW:
1356 do_warn(
1357 _("leftover CoW extent (%u/%u) len %u\n"),
1358 agno, c, cnr);
1359 set_bmap_ext(agno, c, cnr, XR_E_FREE);
1360 break;
1361 default:
1362 do_warn(
1363 _("extent (%u/%u) len %u claimed, state is %d\n"),
1364 agno, c, cnr, state);
1365 break;
1366 }
1367 }
1368 } else if (nr < 2 || nr > MAXREFCOUNT) {
1369 do_warn(
1370 _("invalid reference count %u in record %u of %s btree block %u/%u\n"),
1371 nr, i, name, agno, bno);
1372 continue;
1373 }
1374
1375 if (b && b <= lastblock) {
1376 do_warn(_(
1377 "out-of-order %s btree record %d (%u %u) block %u/%u\n"),
1378 name, i, b, len, agno, bno);
1379 } else {
1380 lastblock = b;
1381 }
1382
1383 /* Is this record mergeable with the last one? */
1384 if (refc_priv->last_rec.rc_startblock +
1385 refc_priv->last_rec.rc_blockcount == b &&
1386 refc_priv->last_rec.rc_refcount == nr) {
1387 do_warn(
1388 _("record %d in block (%u/%u) of %s tree should be merged with previous record\n"),
1389 i, agno, bno, name);
1390 refc_priv->last_rec.rc_blockcount += len;
1391 } else {
1392 refc_priv->last_rec.rc_startblock = b;
1393 refc_priv->last_rec.rc_blockcount = len;
1394 refc_priv->last_rec.rc_refcount = nr;
1395 }
1396
1397 /* XXX: probably want to mark the reflinked areas? */
1398 }
1399 goto out;
1400 }
1401
1402 /*
1403 * interior record
1404 */
1405 pp = XFS_REFCOUNT_PTR_ADDR(block, 1, mp->m_refc_mxr[1]);
1406
1407 if (numrecs > mp->m_refc_mxr[1]) {
1408 numrecs = mp->m_refc_mxr[1];
1409 hdr_errors++;
1410 }
1411 if (isroot == 0 && numrecs < mp->m_refc_mnr[1]) {
1412 numrecs = mp->m_refc_mnr[1];
1413 hdr_errors++;
1414 }
1415
1416 /*
1417 * don't pass bogus tree flag down further if this block
1418 * looked ok. bail out if two levels in a row look bad.
1419 */
1420 if (hdr_errors) {
1421 do_warn(
1422 _("bad btree nrecs (%u, min=%u, max=%u) in %s btree block %u/%u\n"),
1423 be16_to_cpu(block->bb_numrecs),
1424 mp->m_refc_mnr[1], mp->m_refc_mxr[1],
1425 name, agno, bno);
1426 if (suspect)
1427 goto out;
1428 suspect++;
1429 } else if (suspect) {
1430 suspect = 0;
1431 }
1432
1433 for (i = 0; i < numrecs; i++) {
1434 xfs_agblock_t bno = be32_to_cpu(pp[i]);
1435
1436 if (bno != 0 && verify_agbno(mp, agno, bno)) {
1437 scan_sbtree(bno, level, agno, suspect, scan_refcbt, 0,
1438 magic, priv, &xfs_refcountbt_buf_ops);
1439 }
1440 }
1441 out:
1442 if (suspect)
1443 refcount_avoid_check();
1444 return;
1445 }
1446
1447 /*
1448 * The following helpers are to help process and validate individual on-disk
1449 * inode btree records. We have two possible inode btrees with slightly
1450 * different semantics. Many of the validations and actions are equivalent, such
1451 * as record alignment constraints, etc. Other validations differ, such as the
1452 * fact that the inode chunk block allocation state is set by the content of the
1453 * core inobt and verified by the content of the finobt.
1454 *
1455 * The following structures are used to facilitate common validation routines
1456 * where the only difference between validation of the inobt or finobt might be
1457 * the error messages that results in the event of failure.
1458 */
1459
1460 enum inobt_type {
1461 INOBT,
1462 FINOBT
1463 };
1464 const char *inobt_names[] = {
1465 "inobt",
1466 "finobt"
1467 };
1468
1469 static int
1470 verify_single_ino_chunk_align(
1471 xfs_agnumber_t agno,
1472 enum inobt_type type,
1473 struct xfs_inobt_rec *rp,
1474 int suspect,
1475 bool *skip)
1476 {
1477 const char *inobt_name = inobt_names[type];
1478 xfs_ino_t lino;
1479 xfs_agino_t ino;
1480 xfs_agblock_t agbno;
1481 int off;
1482
1483 *skip = false;
1484 ino = be32_to_cpu(rp->ir_startino);
1485 off = XFS_AGINO_TO_OFFSET(mp, ino);
1486 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
1487 lino = XFS_AGINO_TO_INO(mp, agno, ino);
1488
1489 /*
1490 * on multi-block block chunks, all chunks start at the beginning of the
1491 * block. with multi-chunk blocks, all chunks must start on 64-inode
1492 * boundaries since each block can hold N complete chunks. if fs has
1493 * aligned inodes, all chunks must start at a fs_ino_alignment*N'th
1494 * agbno. skip recs with badly aligned starting inodes.
1495 */
1496 if (ino == 0 ||
1497 (inodes_per_block <= XFS_INODES_PER_CHUNK && off != 0) ||
1498 (inodes_per_block > XFS_INODES_PER_CHUNK &&
1499 off % XFS_INODES_PER_CHUNK != 0) ||
1500 (fs_aligned_inodes && fs_ino_alignment &&
1501 agbno % fs_ino_alignment != 0)) {
1502 do_warn(
1503 _("badly aligned %s rec (starting inode = %" PRIu64 ")\n"),
1504 inobt_name, lino);
1505 suspect++;
1506 }
1507
1508 /*
1509 * verify numeric validity of inode chunk first before inserting into a
1510 * tree. don't have to worry about the overflow case because the
1511 * starting ino number of a chunk can only get within 255 inodes of max
1512 * (NULLAGINO). if it gets closer, the agino number will be illegal as
1513 * the agbno will be too large.
1514 */
1515 if (verify_aginum(mp, agno, ino)) {
1516 do_warn(
1517 _("bad starting inode # (%" PRIu64 " (0x%x 0x%x)) in %s rec, skipping rec\n"),
1518 lino, agno, ino, inobt_name);
1519 *skip = true;
1520 return ++suspect;
1521 }
1522
1523 if (verify_aginum(mp, agno,
1524 ino + XFS_INODES_PER_CHUNK - 1)) {
1525 do_warn(
1526 _("bad ending inode # (%" PRIu64 " (0x%x 0x%zx)) in %s rec, skipping rec\n"),
1527 lino + XFS_INODES_PER_CHUNK - 1,
1528 agno,
1529 ino + XFS_INODES_PER_CHUNK - 1,
1530 inobt_name);
1531 *skip = true;
1532 return ++suspect;
1533 }
1534
1535 return suspect;
1536 }
1537
1538 /*
1539 * Process the state of individual inodes in an on-disk inobt record and import
1540 * into the appropriate in-core tree based on whether the on-disk tree is
1541 * suspect. Return the total and free inode counts based on the record free and
1542 * hole masks.
1543 */
1544 static int
1545 import_single_ino_chunk(
1546 xfs_agnumber_t agno,
1547 enum inobt_type type,
1548 struct xfs_inobt_rec *rp,
1549 int suspect,
1550 int *p_nfree,
1551 int *p_ninodes)
1552 {
1553 struct ino_tree_node *ino_rec = NULL;
1554 const char *inobt_name = inobt_names[type];
1555 xfs_agino_t ino;
1556 int j;
1557 int nfree;
1558 int ninodes;
1559
1560 ino = be32_to_cpu(rp->ir_startino);
1561
1562 if (!suspect) {
1563 if (XFS_INOBT_IS_FREE_DISK(rp, 0))
1564 ino_rec = set_inode_free_alloc(mp, agno, ino);
1565 else
1566 ino_rec = set_inode_used_alloc(mp, agno, ino);
1567 for (j = 1; j < XFS_INODES_PER_CHUNK; j++) {
1568 if (XFS_INOBT_IS_FREE_DISK(rp, j))
1569 set_inode_free(ino_rec, j);
1570 else
1571 set_inode_used(ino_rec, j);
1572 }
1573 } else {
1574 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
1575 if (XFS_INOBT_IS_FREE_DISK(rp, j))
1576 add_aginode_uncertain(mp, agno, ino + j, 1);
1577 else
1578 add_aginode_uncertain(mp, agno, ino + j, 0);
1579 }
1580 }
1581
1582 /*
1583 * Mark sparse inodes as such in the in-core tree. Verify that sparse
1584 * inodes are free and that freecount is consistent with the free mask.
1585 */
1586 nfree = ninodes = 0;
1587 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
1588 if (ino_issparse(rp, j)) {
1589 if (!suspect && !XFS_INOBT_IS_FREE_DISK(rp, j)) {
1590 do_warn(
1591 _("ir_holemask/ir_free mismatch, %s chunk %d/%u, holemask 0x%x free 0x%llx\n"),
1592 inobt_name, agno, ino,
1593 be16_to_cpu(rp->ir_u.sp.ir_holemask),
1594 (unsigned long long)be64_to_cpu(rp->ir_free));
1595 suspect++;
1596 }
1597 if (!suspect && ino_rec)
1598 set_inode_sparse(ino_rec, j);
1599 } else {
1600 /* count fields track non-sparse inos */
1601 if (XFS_INOBT_IS_FREE_DISK(rp, j))
1602 nfree++;
1603 ninodes++;
1604 }
1605 }
1606
1607 *p_nfree = nfree;
1608 *p_ninodes = ninodes;
1609
1610 return suspect;
1611 }
1612
1613 static int
1614 scan_single_ino_chunk(
1615 xfs_agnumber_t agno,
1616 xfs_inobt_rec_t *rp,
1617 int suspect)
1618 {
1619 xfs_ino_t lino;
1620 xfs_agino_t ino;
1621 xfs_agblock_t agbno;
1622 int j;
1623 int nfree;
1624 int ninodes;
1625 int off;
1626 int state;
1627 ino_tree_node_t *first_rec, *last_rec;
1628 int freecount;
1629 bool skip = false;
1630
1631 ino = be32_to_cpu(rp->ir_startino);
1632 off = XFS_AGINO_TO_OFFSET(mp, ino);
1633 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
1634 lino = XFS_AGINO_TO_INO(mp, agno, ino);
1635 freecount = inorec_get_freecount(mp, rp);
1636
1637 /*
1638 * Verify record alignment, start/end inode numbers, etc.
1639 */
1640 suspect = verify_single_ino_chunk_align(agno, INOBT, rp, suspect,
1641 &skip);
1642 if (skip)
1643 return suspect;
1644
1645 /*
1646 * set state of each block containing inodes
1647 */
1648 if (off == 0 && !suspect) {
1649 for (j = 0;
1650 j < XFS_INODES_PER_CHUNK;
1651 j += mp->m_sb.sb_inopblock) {
1652
1653 /* inodes in sparse chunks don't use blocks */
1654 if (ino_issparse(rp, j))
1655 continue;
1656
1657 agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
1658 state = get_bmap(agno, agbno);
1659 switch (state) {
1660 case XR_E_INO:
1661 break;
1662 case XR_E_UNKNOWN:
1663 case XR_E_INO1: /* seen by rmap */
1664 set_bmap(agno, agbno, XR_E_INO);
1665 break;
1666 case XR_E_INUSE_FS:
1667 case XR_E_INUSE_FS1:
1668 if (agno == 0 &&
1669 ino + j >= first_prealloc_ino &&
1670 ino + j < last_prealloc_ino) {
1671 set_bmap(agno, agbno, XR_E_INO);
1672 break;
1673 }
1674 /* fall through */
1675 default:
1676 /* XXX - maybe should mark block a duplicate */
1677 do_warn(
1678 _("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
1679 agno, agbno, mp->m_sb.sb_inopblock);
1680 return ++suspect;
1681 }
1682 }
1683 }
1684
1685 /*
1686 * ensure only one avl entry per chunk
1687 */
1688 find_inode_rec_range(mp, agno, ino, ino + XFS_INODES_PER_CHUNK,
1689 &first_rec, &last_rec);
1690 if (first_rec != NULL) {
1691 /*
1692 * this chunk overlaps with one (or more)
1693 * already in the tree
1694 */
1695 do_warn(
1696 _("inode rec for ino %" PRIu64 " (%d/%d) overlaps existing rec (start %d/%d)\n"),
1697 lino, agno, ino, agno, first_rec->ino_startnum);
1698 suspect++;
1699
1700 /*
1701 * if the 2 chunks start at the same place,
1702 * then we don't have to put this one
1703 * in the uncertain list. go to the next one.
1704 */
1705 if (first_rec->ino_startnum == ino)
1706 return suspect;
1707 }
1708
1709 /*
1710 * Import the state of individual inodes into the appropriate in-core
1711 * trees, mark them free or used, and get the resulting total and free
1712 * inode counts.
1713 */
1714 nfree = ninodes = 0;
1715 suspect = import_single_ino_chunk(agno, INOBT, rp, suspect, &nfree,
1716 &ninodes);
1717
1718 if (nfree != freecount) {
1719 do_warn(
1720 _("ir_freecount/free mismatch, inode chunk %d/%u, freecount %d nfree %d\n"),
1721 agno, ino, freecount, nfree);
1722 }
1723
1724 /* verify sparse record formats have a valid inode count */
1725 if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
1726 ninodes != rp->ir_u.sp.ir_count) {
1727 do_warn(
1728 _("invalid inode count, inode chunk %d/%u, count %d ninodes %d\n"),
1729 agno, ino, rp->ir_u.sp.ir_count, ninodes);
1730 }
1731
1732 return suspect;
1733 }
1734
1735 static int
1736 scan_single_finobt_chunk(
1737 xfs_agnumber_t agno,
1738 xfs_inobt_rec_t *rp,
1739 int suspect)
1740 {
1741 xfs_ino_t lino;
1742 xfs_agino_t ino;
1743 xfs_agblock_t agbno;
1744 int j;
1745 int nfree;
1746 int ninodes;
1747 int off;
1748 int state;
1749 ino_tree_node_t *first_rec, *last_rec;
1750 int freecount;
1751 bool skip = false;
1752
1753 ino = be32_to_cpu(rp->ir_startino);
1754 off = XFS_AGINO_TO_OFFSET(mp, ino);
1755 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
1756 lino = XFS_AGINO_TO_INO(mp, agno, ino);
1757 freecount = inorec_get_freecount(mp, rp);
1758
1759 /*
1760 * Verify record alignment, start/end inode numbers, etc.
1761 */
1762 suspect = verify_single_ino_chunk_align(agno, FINOBT, rp, suspect,
1763 &skip);
1764 if (skip)
1765 return suspect;
1766
1767 /*
1768 * cross check state of each block containing inodes referenced by the
1769 * finobt against what we have already scanned from the alloc inobt.
1770 */
1771 if (off == 0 && !suspect) {
1772 for (j = 0;
1773 j < XFS_INODES_PER_CHUNK;
1774 j += mp->m_sb.sb_inopblock) {
1775 agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
1776 state = get_bmap(agno, agbno);
1777
1778 /* sparse inodes should not refer to inode blocks */
1779 if (ino_issparse(rp, j)) {
1780 if (state == XR_E_INO) {
1781 do_warn(
1782 _("sparse inode chunk claims inode block, finobt block - agno %d, bno %d, inopb %d\n"),
1783 agno, agbno, mp->m_sb.sb_inopblock);
1784 suspect++;
1785 }
1786 continue;
1787 }
1788
1789 switch (state) {
1790 case XR_E_INO:
1791 break;
1792 case XR_E_INO1: /* seen by rmap */
1793 set_bmap(agno, agbno, XR_E_INO);
1794 break;
1795 case XR_E_UNKNOWN:
1796 do_warn(
1797 _("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
1798 agno, agbno, mp->m_sb.sb_inopblock);
1799
1800 set_bmap(agno, agbno, XR_E_INO);
1801 suspect++;
1802 break;
1803 case XR_E_INUSE_FS:
1804 case XR_E_INUSE_FS1:
1805 if (agno == 0 &&
1806 ino + j >= first_prealloc_ino &&
1807 ino + j < last_prealloc_ino) {
1808 do_warn(
1809 _("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
1810 agno, agbno, mp->m_sb.sb_inopblock);
1811
1812 set_bmap(agno, agbno, XR_E_INO);
1813 suspect++;
1814 break;
1815 }
1816 /* fall through */
1817 default:
1818 do_warn(
1819 _("inode chunk claims used block, finobt block - agno %d, bno %d, inopb %d\n"),
1820 agno, agbno, mp->m_sb.sb_inopblock);
1821 return ++suspect;
1822 }
1823 }
1824 }
1825
1826 /*
1827 * ensure we have an incore entry for each chunk
1828 */
1829 find_inode_rec_range(mp, agno, ino, ino + XFS_INODES_PER_CHUNK,
1830 &first_rec, &last_rec);
1831
1832 if (first_rec) {
1833 if (suspect)
1834 return suspect;
1835
1836 /*
1837 * verify consistency between finobt record and incore state
1838 */
1839 if (first_rec->ino_startnum != ino) {
1840 do_warn(
1841 _("finobt rec for ino %" PRIu64 " (%d/%u) does not match existing rec (%d/%d)\n"),
1842 lino, agno, ino, agno, first_rec->ino_startnum);
1843 return ++suspect;
1844 }
1845
1846 nfree = ninodes = 0;
1847 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
1848 int isfree = XFS_INOBT_IS_FREE_DISK(rp, j);
1849 int issparse = ino_issparse(rp, j);
1850
1851 if (!issparse)
1852 ninodes++;
1853 if (isfree && !issparse)
1854 nfree++;
1855
1856 /*
1857 * inode allocation state should be consistent between
1858 * the inobt and finobt
1859 */
1860 if (!suspect &&
1861 isfree != is_inode_free(first_rec, j))
1862 suspect++;
1863
1864 if (!suspect &&
1865 issparse != is_inode_sparse(first_rec, j))
1866 suspect++;
1867 }
1868
1869 goto check_freecount;
1870 }
1871
1872 /*
1873 * The finobt contains a record that the previous inobt scan never
1874 * found. Warn about it and import the inodes into the appropriate
1875 * trees.
1876 *
1877 * Note that this should do the right thing if the previous inobt scan
1878 * had added these inodes to the uncertain tree. If the finobt is not
1879 * suspect, these inodes should supercede the uncertain ones. Otherwise,
1880 * the uncertain tree helpers handle the case where uncertain inodes
1881 * already exist.
1882 */
1883 do_warn(_("undiscovered finobt record, ino %" PRIu64 " (%d/%u)\n"),
1884 lino, agno, ino);
1885
1886 nfree = ninodes = 0;
1887 suspect = import_single_ino_chunk(agno, FINOBT, rp, suspect, &nfree,
1888 &ninodes);
1889
1890 check_freecount:
1891
1892 /*
1893 * Verify that the record freecount matches the actual number of free
1894 * inodes counted in the record. Don't increment 'suspect' here, since
1895 * we have already verified the allocation state of the individual
1896 * inodes against the in-core state. This will have already incremented
1897 * 'suspect' if something is wrong. If suspect hasn't been set at this
1898 * point, these warnings mean that we have a simple freecount
1899 * inconsistency or a stray finobt record (as opposed to a broader tree
1900 * corruption). Issue a warning and continue the scan. The final btree
1901 * reconstruction will correct this naturally.
1902 */
1903 if (nfree != freecount) {
1904 do_warn(
1905 _("finobt ir_freecount/free mismatch, inode chunk %d/%u, freecount %d nfree %d\n"),
1906 agno, ino, freecount, nfree);
1907 }
1908
1909 if (!nfree) {
1910 do_warn(
1911 _("finobt record with no free inodes, inode chunk %d/%u\n"), agno, ino);
1912 }
1913
1914 /* verify sparse record formats have a valid inode count */
1915 if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
1916 ninodes != rp->ir_u.sp.ir_count) {
1917 do_warn(
1918 _("invalid inode count, inode chunk %d/%u, count %d ninodes %d\n"),
1919 agno, ino, rp->ir_u.sp.ir_count, ninodes);
1920 }
1921
1922 return suspect;
1923 }
1924
1925 /*
1926 * this one walks the inode btrees sucking the info there into
1927 * the incore avl tree. We try and rescue corrupted btree records
1928 * to minimize our chances of losing inodes. Inode info from potentially
1929 * corrupt sources could be bogus so rather than put the info straight
1930 * into the tree, instead we put it on a list and try and verify the
1931 * info in the next phase by examining what's on disk. At that point,
1932 * we'll be able to figure out what's what and stick the corrected info
1933 * into the tree. We do bail out at some point and give up on a subtree
1934 * so as to avoid walking randomly all over the ag.
1935 *
1936 * Note that it's also ok if the free/inuse info wrong, we can correct
1937 * that when we examine the on-disk inode. The important thing is to
1938 * get the start and alignment of the inode chunks right. Those chunks
1939 * that we aren't sure about go into the uncertain list.
1940 */
1941 static void
1942 scan_inobt(
1943 struct xfs_btree_block *block,
1944 int level,
1945 xfs_agblock_t bno,
1946 xfs_agnumber_t agno,
1947 int suspect,
1948 int isroot,
1949 uint32_t magic,
1950 void *priv)
1951 {
1952 struct aghdr_cnts *agcnts = priv;
1953 int i;
1954 int numrecs;
1955 int state;
1956 xfs_inobt_ptr_t *pp;
1957 xfs_inobt_rec_t *rp;
1958 int hdr_errors;
1959 int freecount;
1960
1961 hdr_errors = 0;
1962
1963 if (be32_to_cpu(block->bb_magic) != magic) {
1964 do_warn(_("bad magic # %#x in inobt block %d/%d\n"),
1965 be32_to_cpu(block->bb_magic), agno, bno);
1966 hdr_errors++;
1967 bad_ino_btree = 1;
1968 if (suspect)
1969 return;
1970 }
1971 if (be16_to_cpu(block->bb_level) != level) {
1972 do_warn(_("expected level %d got %d in inobt block %d/%d\n"),
1973 level, be16_to_cpu(block->bb_level), agno, bno);
1974 hdr_errors++;
1975 bad_ino_btree = 1;
1976 if (suspect)
1977 return;
1978 }
1979
1980 /*
1981 * check for btree blocks multiply claimed, any unknown/free state
1982 * is ok in the bitmap block.
1983 */
1984 state = get_bmap(agno, bno);
1985 switch (state) {
1986 case XR_E_FS_MAP1: /* already been seen by an rmap scan */
1987 case XR_E_UNKNOWN:
1988 case XR_E_FREE1:
1989 case XR_E_FREE:
1990 set_bmap(agno, bno, XR_E_FS_MAP);
1991 break;
1992 default:
1993 set_bmap(agno, bno, XR_E_MULT);
1994 do_warn(
1995 _("inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
1996 state, agno, bno, suspect);
1997 }
1998
1999 numrecs = be16_to_cpu(block->bb_numrecs);
2000
2001 /*
2002 * leaf record in btree
2003 */
2004 if (level == 0) {
2005 /* check for trashed btree block */
2006
2007 if (numrecs > mp->m_inobt_mxr[0]) {
2008 numrecs = mp->m_inobt_mxr[0];
2009 hdr_errors++;
2010 }
2011 if (isroot == 0 && numrecs < mp->m_inobt_mnr[0]) {
2012 numrecs = mp->m_inobt_mnr[0];
2013 hdr_errors++;
2014 }
2015
2016 if (hdr_errors) {
2017 bad_ino_btree = 1;
2018 do_warn(_("dubious inode btree block header %d/%d\n"),
2019 agno, bno);
2020 suspect++;
2021 }
2022
2023 rp = XFS_INOBT_REC_ADDR(mp, block, 1);
2024
2025 /*
2026 * step through the records, each record points to
2027 * a chunk of inodes. The start of inode chunks should
2028 * be block-aligned. Each inode btree rec should point
2029 * to the start of a block of inodes or the start of a group
2030 * of INODES_PER_CHUNK (64) inodes. off is the offset into
2031 * the block. skip processing of bogus records.
2032 */
2033 for (i = 0; i < numrecs; i++) {
2034 freecount = inorec_get_freecount(mp, &rp[i]);
2035
2036 if (magic == XFS_IBT_MAGIC ||
2037 magic == XFS_IBT_CRC_MAGIC) {
2038 int icount = XFS_INODES_PER_CHUNK;
2039
2040 /*
2041 * ir_count holds the inode count for all
2042 * records on fs' with sparse inode support
2043 */
2044 if (xfs_sb_version_hassparseinodes(&mp->m_sb))
2045 icount = rp[i].ir_u.sp.ir_count;
2046
2047 agcnts->agicount += icount;
2048 agcnts->agifreecount += freecount;
2049 agcnts->ifreecount += freecount;
2050
2051 suspect = scan_single_ino_chunk(agno, &rp[i],
2052 suspect);
2053 } else {
2054 /*
2055 * the finobt tracks records with free inodes,
2056 * so only the free inode count is expected to be
2057 * consistent with the agi
2058 */
2059 agcnts->fibtfreecount += freecount;
2060
2061 suspect = scan_single_finobt_chunk(agno, &rp[i],
2062 suspect);
2063 }
2064 }
2065
2066 if (suspect)
2067 bad_ino_btree = 1;
2068
2069 return;
2070 }
2071
2072 /*
2073 * interior record, continue on
2074 */
2075 if (numrecs > mp->m_inobt_mxr[1]) {
2076 numrecs = mp->m_inobt_mxr[1];
2077 hdr_errors++;
2078 }
2079 if (isroot == 0 && numrecs < mp->m_inobt_mnr[1]) {
2080 numrecs = mp->m_inobt_mnr[1];
2081 hdr_errors++;
2082 }
2083
2084 pp = XFS_INOBT_PTR_ADDR(mp, block, 1, mp->m_inobt_mxr[1]);
2085
2086 /*
2087 * don't pass bogus tree flag down further if this block
2088 * looked ok. bail out if two levels in a row look bad.
2089 */
2090
2091 if (suspect && !hdr_errors)
2092 suspect = 0;
2093
2094 if (hdr_errors) {
2095 bad_ino_btree = 1;
2096 if (suspect)
2097 return;
2098 else suspect++;
2099 }
2100
2101 for (i = 0; i < numrecs; i++) {
2102 if (be32_to_cpu(pp[i]) != 0 && verify_agbno(mp, agno,
2103 be32_to_cpu(pp[i])))
2104 scan_sbtree(be32_to_cpu(pp[i]), level, agno,
2105 suspect, scan_inobt, 0, magic, priv,
2106 &xfs_inobt_buf_ops);
2107 }
2108 }
2109
2110 static void
2111 scan_freelist(
2112 xfs_agf_t *agf,
2113 struct aghdr_cnts *agcnts)
2114 {
2115 xfs_buf_t *agflbuf;
2116 xfs_agnumber_t agno;
2117 xfs_agblock_t bno;
2118 int count;
2119 int i;
2120 __be32 *freelist;
2121
2122 agno = be32_to_cpu(agf->agf_seqno);
2123
2124 if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
2125 XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
2126 XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
2127 set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_INUSE_FS);
2128
2129 if (be32_to_cpu(agf->agf_flcount) == 0)
2130 return;
2131
2132 agflbuf = libxfs_readbuf(mp->m_dev,
2133 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
2134 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agfl_buf_ops);
2135 if (!agflbuf) {
2136 do_abort(_("can't read agfl block for ag %d\n"), agno);
2137 return;
2138 }
2139 if (agflbuf->b_error == -EFSBADCRC)
2140 do_warn(_("agfl has bad CRC for ag %d\n"), agno);
2141
2142 freelist = XFS_BUF_TO_AGFL_BNO(mp, agflbuf);
2143 i = be32_to_cpu(agf->agf_flfirst);
2144
2145 if (no_modify) {
2146 /* agf values not fixed in verify_set_agf, so recheck */
2147 if (be32_to_cpu(agf->agf_flfirst) >= libxfs_agfl_size(mp) ||
2148 be32_to_cpu(agf->agf_fllast) >= libxfs_agfl_size(mp)) {
2149 do_warn(_("agf %d freelist blocks bad, skipping "
2150 "freelist scan\n"), i);
2151 return;
2152 }
2153 }
2154
2155 count = 0;
2156 for (;;) {
2157 bno = be32_to_cpu(freelist[i]);
2158 if (verify_agbno(mp, agno, bno))
2159 set_bmap(agno, bno, XR_E_FREE);
2160 else
2161 do_warn(_("bad agbno %u in agfl, agno %d\n"),
2162 bno, agno);
2163 count++;
2164 if (i == be32_to_cpu(agf->agf_fllast))
2165 break;
2166 if (++i == libxfs_agfl_size(mp))
2167 i = 0;
2168 }
2169 if (count != be32_to_cpu(agf->agf_flcount)) {
2170 do_warn(_("freeblk count %d != flcount %d in ag %d\n"), count,
2171 be32_to_cpu(agf->agf_flcount), agno);
2172 }
2173
2174 agcnts->fdblocks += count;
2175
2176 libxfs_putbuf(agflbuf);
2177 }
2178
2179 static void
2180 validate_agf(
2181 struct xfs_agf *agf,
2182 xfs_agnumber_t agno,
2183 struct aghdr_cnts *agcnts)
2184 {
2185 xfs_agblock_t bno;
2186 uint32_t magic;
2187
2188 bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]);
2189 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2190 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_ABTB_CRC_MAGIC
2191 : XFS_ABTB_MAGIC;
2192 scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
2193 agno, 0, scan_allocbt, 1, magic, agcnts,
2194 &xfs_allocbt_buf_ops);
2195 } else {
2196 do_warn(_("bad agbno %u for btbno root, agno %d\n"),
2197 bno, agno);
2198 }
2199
2200 bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]);
2201 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2202 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_ABTC_CRC_MAGIC
2203 : XFS_ABTC_MAGIC;
2204 scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
2205 agno, 0, scan_allocbt, 1, magic, agcnts,
2206 &xfs_allocbt_buf_ops);
2207 } else {
2208 do_warn(_("bad agbno %u for btbcnt root, agno %d\n"),
2209 bno, agno);
2210 }
2211
2212 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
2213 struct rmap_priv priv;
2214
2215 memset(&priv.high_key, 0xFF, sizeof(priv.high_key));
2216 priv.high_key.rm_blockcount = 0;
2217 priv.agcnts = agcnts;
2218 priv.last_rec.rm_owner = XFS_RMAP_OWN_UNKNOWN;
2219 priv.nr_blocks = 0;
2220 bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
2221 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2222 scan_sbtree(bno,
2223 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]),
2224 agno, 0, scan_rmapbt, 1, XFS_RMAP_CRC_MAGIC,
2225 &priv, &xfs_rmapbt_buf_ops);
2226 if (be32_to_cpu(agf->agf_rmap_blocks) != priv.nr_blocks)
2227 do_warn(_("bad rmapbt block count %u, saw %u\n"),
2228 priv.nr_blocks,
2229 be32_to_cpu(agf->agf_rmap_blocks));
2230 } else {
2231 do_warn(_("bad agbno %u for rmapbt root, agno %d\n"),
2232 bno, agno);
2233 rmap_avoid_check();
2234 }
2235 }
2236
2237 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
2238 bno = be32_to_cpu(agf->agf_refcount_root);
2239 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2240 struct refc_priv priv;
2241
2242 memset(&priv, 0, sizeof(priv));
2243 scan_sbtree(bno,
2244 be32_to_cpu(agf->agf_refcount_level),
2245 agno, 0, scan_refcbt, 1, XFS_REFC_CRC_MAGIC,
2246 &priv, &xfs_refcountbt_buf_ops);
2247 if (be32_to_cpu(agf->agf_refcount_blocks) != priv.nr_blocks)
2248 do_warn(_("bad refcountbt block count %u, saw %u\n"),
2249 priv.nr_blocks,
2250 be32_to_cpu(agf->agf_refcount_blocks));
2251 } else {
2252 do_warn(_("bad agbno %u for refcntbt root, agno %d\n"),
2253 bno, agno);
2254 refcount_avoid_check();
2255 }
2256 }
2257
2258 if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) {
2259 do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
2260 be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, agno);
2261 }
2262
2263 if (be32_to_cpu(agf->agf_longest) != agcnts->agflongest) {
2264 do_warn(_("agf_longest %u, counted %u in ag %u\n"),
2265 be32_to_cpu(agf->agf_longest), agcnts->agflongest, agno);
2266 }
2267
2268 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2269 be32_to_cpu(agf->agf_btreeblks) != agcnts->agfbtreeblks) {
2270 do_warn(_("agf_btreeblks %u, counted %" PRIu64 " in ag %u\n"),
2271 be32_to_cpu(agf->agf_btreeblks), agcnts->agfbtreeblks, agno);
2272 }
2273
2274 }
2275
2276 static void
2277 validate_agi(
2278 struct xfs_agi *agi,
2279 xfs_agnumber_t agno,
2280 struct aghdr_cnts *agcnts)
2281 {
2282 xfs_agblock_t bno;
2283 int i;
2284 uint32_t magic;
2285
2286 bno = be32_to_cpu(agi->agi_root);
2287 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2288 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_IBT_CRC_MAGIC
2289 : XFS_IBT_MAGIC;
2290 scan_sbtree(bno, be32_to_cpu(agi->agi_level),
2291 agno, 0, scan_inobt, 1, magic, agcnts,
2292 &xfs_inobt_buf_ops);
2293 } else {
2294 do_warn(_("bad agbno %u for inobt root, agno %d\n"),
2295 be32_to_cpu(agi->agi_root), agno);
2296 }
2297
2298 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
2299 bno = be32_to_cpu(agi->agi_free_root);
2300 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2301 magic = xfs_sb_version_hascrc(&mp->m_sb) ?
2302 XFS_FIBT_CRC_MAGIC : XFS_FIBT_MAGIC;
2303 scan_sbtree(bno, be32_to_cpu(agi->agi_free_level),
2304 agno, 0, scan_inobt, 1, magic, agcnts,
2305 &xfs_inobt_buf_ops);
2306 } else {
2307 do_warn(_("bad agbno %u for finobt root, agno %d\n"),
2308 be32_to_cpu(agi->agi_free_root), agno);
2309 }
2310 }
2311
2312 if (be32_to_cpu(agi->agi_count) != agcnts->agicount) {
2313 do_warn(_("agi_count %u, counted %u in ag %u\n"),
2314 be32_to_cpu(agi->agi_count), agcnts->agicount, agno);
2315 }
2316
2317 if (be32_to_cpu(agi->agi_freecount) != agcnts->agifreecount) {
2318 do_warn(_("agi_freecount %u, counted %u in ag %u\n"),
2319 be32_to_cpu(agi->agi_freecount), agcnts->agifreecount, agno);
2320 }
2321
2322 if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
2323 be32_to_cpu(agi->agi_freecount) != agcnts->fibtfreecount) {
2324 do_warn(_("agi_freecount %u, counted %u in ag %u finobt\n"),
2325 be32_to_cpu(agi->agi_freecount), agcnts->fibtfreecount,
2326 agno);
2327 }
2328
2329 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
2330 xfs_agino_t agino = be32_to_cpu(agi->agi_unlinked[i]);
2331
2332 if (agino != NULLAGINO) {
2333 do_warn(
2334 _("agi unlinked bucket %d is %u in ag %u (inode=%" PRIu64 ")\n"),
2335 i, agino, agno,
2336 XFS_AGINO_TO_INO(mp, agno, agino));
2337 }
2338 }
2339 }
2340
2341 /*
2342 * Scan an AG for obvious corruption.
2343 */
2344 static void
2345 scan_ag(
2346 struct workqueue*wq,
2347 xfs_agnumber_t agno,
2348 void *arg)
2349 {
2350 struct aghdr_cnts *agcnts = arg;
2351 struct xfs_agf *agf;
2352 struct xfs_buf *agfbuf = NULL;
2353 int agf_dirty = 0;
2354 struct xfs_agi *agi;
2355 struct xfs_buf *agibuf = NULL;
2356 int agi_dirty = 0;
2357 struct xfs_sb *sb = NULL;
2358 struct xfs_buf *sbbuf = NULL;
2359 int sb_dirty = 0;
2360 int status;
2361 char *objname = NULL;
2362
2363 sb = (struct xfs_sb *)calloc(BBTOB(XFS_FSS_TO_BB(mp, 1)), 1);
2364 if (!sb) {
2365 do_error(_("can't allocate memory for superblock\n"));
2366 return;
2367 }
2368
2369 sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
2370 XFS_FSS_TO_BB(mp, 1), 0, &xfs_sb_buf_ops);
2371 if (!sbbuf) {
2372 objname = _("root superblock");
2373 goto out_free_sb;
2374 }
2375 libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
2376
2377 agfbuf = libxfs_readbuf(mp->m_dev,
2378 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2379 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agf_buf_ops);
2380 if (!agfbuf) {
2381 objname = _("agf block");
2382 goto out_free_sbbuf;
2383 }
2384 agf = XFS_BUF_TO_AGF(agfbuf);
2385
2386 agibuf = libxfs_readbuf(mp->m_dev,
2387 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
2388 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agi_buf_ops);
2389 if (!agibuf) {
2390 objname = _("agi block");
2391 goto out_free_agfbuf;
2392 }
2393 agi = XFS_BUF_TO_AGI(agibuf);
2394
2395 /* fix up bad ag headers */
2396
2397 status = verify_set_agheader(mp, sbbuf, sb, agf, agi, agno);
2398
2399 if (status & XR_AG_SB_SEC) {
2400 if (!no_modify)
2401 sb_dirty = 1;
2402 /*
2403 * clear bad sector bit because we don't want
2404 * to skip further processing. we just want to
2405 * ensure that we write out the modified sb buffer.
2406 */
2407 status &= ~XR_AG_SB_SEC;
2408 }
2409 if (status & XR_AG_SB) {
2410 if (!no_modify) {
2411 do_warn(_("reset bad sb for ag %d\n"), agno);
2412 sb_dirty = 1;
2413 } else {
2414 do_warn(_("would reset bad sb for ag %d\n"), agno);
2415 }
2416 }
2417 if (status & XR_AG_AGF) {
2418 if (!no_modify) {
2419 do_warn(_("reset bad agf for ag %d\n"), agno);
2420 agf_dirty = 1;
2421 } else {
2422 do_warn(_("would reset bad agf for ag %d\n"), agno);
2423 }
2424 }
2425 if (status & XR_AG_AGI) {
2426 if (!no_modify) {
2427 do_warn(_("reset bad agi for ag %d\n"), agno);
2428 agi_dirty = 1;
2429 } else {
2430 do_warn(_("would reset bad agi for ag %d\n"), agno);
2431 }
2432 }
2433
2434 if (status && no_modify) {
2435 do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
2436 agno);
2437 goto out_free_agibuf;
2438 }
2439
2440 scan_freelist(agf, agcnts);
2441
2442 validate_agf(agf, agno, agcnts);
2443 validate_agi(agi, agno, agcnts);
2444
2445 ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
2446 ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
2447 ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
2448
2449 /*
2450 * Only pay attention to CRC/verifier errors if we can correct them.
2451 * Note that we can get uncorrected EFSCORRUPTED errors here because
2452 * the verifier will flag on out of range values that we can't correct
2453 * until phase 5 when we have all the information necessary to rebuild
2454 * the freespace/inode btrees. We can correct bad CRC errors
2455 * immediately, though.
2456 */
2457 if (!no_modify) {
2458 agi_dirty += (agibuf->b_error == -EFSBADCRC);
2459 agf_dirty += (agfbuf->b_error == -EFSBADCRC);
2460 sb_dirty += (sbbuf->b_error == -EFSBADCRC);
2461 }
2462
2463 if (agi_dirty && !no_modify)
2464 libxfs_writebuf(agibuf, 0);
2465 else
2466 libxfs_putbuf(agibuf);
2467
2468 if (agf_dirty && !no_modify)
2469 libxfs_writebuf(agfbuf, 0);
2470 else
2471 libxfs_putbuf(agfbuf);
2472
2473 if (sb_dirty && !no_modify) {
2474 if (agno == 0)
2475 memcpy(&mp->m_sb, sb, sizeof(xfs_sb_t));
2476 libxfs_sb_to_disk(XFS_BUF_TO_SBP(sbbuf), sb);
2477 libxfs_writebuf(sbbuf, 0);
2478 } else
2479 libxfs_putbuf(sbbuf);
2480 free(sb);
2481 PROG_RPT_INC(prog_rpt_done[agno], 1);
2482
2483 #ifdef XR_INODE_TRACE
2484 print_inode_list(i);
2485 #endif
2486 return;
2487
2488 out_free_agibuf:
2489 libxfs_putbuf(agibuf);
2490 out_free_agfbuf:
2491 libxfs_putbuf(agfbuf);
2492 out_free_sbbuf:
2493 libxfs_putbuf(sbbuf);
2494 out_free_sb:
2495 free(sb);
2496
2497 if (objname)
2498 do_error(_("can't get %s for ag %d\n"), objname, agno);
2499 }
2500
2501 #define SCAN_THREADS 32
2502
2503 void
2504 scan_ags(
2505 struct xfs_mount *mp,
2506 int scan_threads)
2507 {
2508 struct aghdr_cnts *agcnts;
2509 uint64_t fdblocks = 0;
2510 uint64_t icount = 0;
2511 uint64_t ifreecount = 0;
2512 uint64_t usedblocks = 0;
2513 xfs_agnumber_t i;
2514 struct workqueue wq;
2515
2516 agcnts = malloc(mp->m_sb.sb_agcount * sizeof(*agcnts));
2517 if (!agcnts) {
2518 do_abort(_("no memory for ag header counts\n"));
2519 return;
2520 }
2521 memset(agcnts, 0, mp->m_sb.sb_agcount * sizeof(*agcnts));
2522
2523 create_work_queue(&wq, mp, scan_threads);
2524
2525 for (i = 0; i < mp->m_sb.sb_agcount; i++)
2526 queue_work(&wq, scan_ag, i, &agcnts[i]);
2527
2528 destroy_work_queue(&wq);
2529
2530 /* tally up the counts */
2531 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
2532 fdblocks += agcnts[i].fdblocks;
2533 icount += agcnts[i].agicount;
2534 ifreecount += agcnts[i].ifreecount;
2535 usedblocks += agcnts[i].usedblocks;
2536 }
2537
2538 free(agcnts);
2539
2540 /*
2541 * Validate that our manual counts match the superblock.
2542 */
2543 if (mp->m_sb.sb_icount != icount) {
2544 do_warn(_("sb_icount %" PRIu64 ", counted %" PRIu64 "\n"),
2545 mp->m_sb.sb_icount, icount);
2546 }
2547
2548 if (mp->m_sb.sb_ifree != ifreecount) {
2549 do_warn(_("sb_ifree %" PRIu64 ", counted %" PRIu64 "\n"),
2550 mp->m_sb.sb_ifree, ifreecount);
2551 }
2552
2553 if (mp->m_sb.sb_fdblocks != fdblocks) {
2554 do_warn(_("sb_fdblocks %" PRIu64 ", counted %" PRIu64 "\n"),
2555 mp->m_sb.sb_fdblocks, fdblocks);
2556 }
2557
2558 if (usedblocks &&
2559 usedblocks != mp->m_sb.sb_dblocks - fdblocks) {
2560 do_warn(_("used blocks %" PRIu64 ", counted %" PRIu64 "\n"),
2561 mp->m_sb.sb_dblocks - fdblocks, usedblocks);
2562 }
2563 }