]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/scan.c
libxfs: refactor manage_zones()
[thirdparty/xfsprogs-dev.git] / repair / scan.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6
7 #include "libxfs.h"
8 #include "avl.h"
9 #include "globals.h"
10 #include "agheader.h"
11 #include "incore.h"
12 #include "protos.h"
13 #include "err_protos.h"
14 #include "dinode.h"
15 #include "scan.h"
16 #include "versions.h"
17 #include "bmap.h"
18 #include "progress.h"
19 #include "threads.h"
20 #include "slab.h"
21 #include "rmap.h"
22
23 static xfs_mount_t *mp = NULL;
24
25 /*
26 * Variables to validate AG header values against the manual count
27 * from the btree traversal.
28 */
29 struct aghdr_cnts {
30 xfs_agnumber_t agno;
31 xfs_extlen_t agffreeblks;
32 xfs_extlen_t agflongest;
33 uint64_t agfbtreeblks;
34 uint32_t agicount;
35 uint32_t agifreecount;
36 uint64_t fdblocks;
37 uint64_t usedblocks;
38 uint64_t ifreecount;
39 uint32_t fibtfreecount;
40 };
41
42 void
43 set_mp(xfs_mount_t *mpp)
44 {
45 libxfs_bcache_purge();
46 mp = mpp;
47 }
48
49 static void
50 scan_sbtree(
51 xfs_agblock_t root,
52 int nlevels,
53 xfs_agnumber_t agno,
54 int suspect,
55 void (*func)(struct xfs_btree_block *block,
56 int level,
57 xfs_agblock_t bno,
58 xfs_agnumber_t agno,
59 int suspect,
60 int isroot,
61 uint32_t magic,
62 void *priv,
63 const struct xfs_buf_ops *ops),
64 int isroot,
65 uint32_t magic,
66 void *priv,
67 const struct xfs_buf_ops *ops)
68 {
69 xfs_buf_t *bp;
70
71 bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, root),
72 XFS_FSB_TO_BB(mp, 1), 0, ops);
73 if (!bp) {
74 do_error(_("can't read btree block %d/%d\n"), agno, root);
75 return;
76 }
77 if (bp->b_error == -EFSBADCRC || bp->b_error == -EFSCORRUPTED) {
78 do_warn(_("btree block %d/%d is suspect, error %d\n"),
79 agno, root, bp->b_error);
80 suspect = 1;
81 }
82
83 (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1, root, agno, suspect,
84 isroot, magic, priv, ops);
85 libxfs_putbuf(bp);
86 }
87
88 /*
89 * returns 1 on bad news (inode needs to be cleared), 0 on good
90 */
91 int
92 scan_lbtree(
93 xfs_fsblock_t root,
94 int nlevels,
95 int (*func)(struct xfs_btree_block *block,
96 int level,
97 int type,
98 int whichfork,
99 xfs_fsblock_t bno,
100 xfs_ino_t ino,
101 xfs_rfsblock_t *tot,
102 uint64_t *nex,
103 blkmap_t **blkmapp,
104 bmap_cursor_t *bm_cursor,
105 int isroot,
106 int check_dups,
107 int *dirty,
108 uint64_t magic),
109 int type,
110 int whichfork,
111 xfs_ino_t ino,
112 xfs_rfsblock_t *tot,
113 uint64_t *nex,
114 blkmap_t **blkmapp,
115 bmap_cursor_t *bm_cursor,
116 int isroot,
117 int check_dups,
118 uint64_t magic,
119 const struct xfs_buf_ops *ops)
120 {
121 xfs_buf_t *bp;
122 int err;
123 int dirty = 0;
124 bool badcrc = false;
125
126 bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, root),
127 XFS_FSB_TO_BB(mp, 1), 0, ops);
128 if (!bp) {
129 do_error(_("can't read btree block %d/%d\n"),
130 XFS_FSB_TO_AGNO(mp, root),
131 XFS_FSB_TO_AGBNO(mp, root));
132 return(1);
133 }
134
135 /*
136 * only check for bad CRC here - caller will determine if there
137 * is a corruption or not and whether it got corrected and so needs
138 * writing back. CRC errors always imply we need to write the block.
139 */
140 if (bp->b_error == -EFSBADCRC) {
141 do_warn(_("btree block %d/%d is suspect, error %d\n"),
142 XFS_FSB_TO_AGNO(mp, root),
143 XFS_FSB_TO_AGBNO(mp, root), bp->b_error);
144 badcrc = true;
145 }
146
147 err = (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1,
148 type, whichfork, root, ino, tot, nex, blkmapp,
149 bm_cursor, isroot, check_dups, &dirty,
150 magic);
151
152 ASSERT(dirty == 0 || (dirty && !no_modify));
153
154 if ((dirty || badcrc) && !no_modify)
155 libxfs_writebuf(bp, 0);
156 else
157 libxfs_putbuf(bp);
158
159 return(err);
160 }
161
162 int
163 scan_bmapbt(
164 struct xfs_btree_block *block,
165 int level,
166 int type,
167 int whichfork,
168 xfs_fsblock_t bno,
169 xfs_ino_t ino,
170 xfs_rfsblock_t *tot,
171 uint64_t *nex,
172 blkmap_t **blkmapp,
173 bmap_cursor_t *bm_cursor,
174 int isroot,
175 int check_dups,
176 int *dirty,
177 uint64_t magic)
178 {
179 int i;
180 int err;
181 xfs_bmbt_ptr_t *pp;
182 xfs_bmbt_key_t *pkey;
183 xfs_bmbt_rec_t *rp;
184 xfs_fileoff_t first_key;
185 xfs_fileoff_t last_key;
186 char *forkname = get_forkname(whichfork);
187 int numrecs;
188 xfs_agnumber_t agno;
189 xfs_agblock_t agbno;
190 int state;
191 int error;
192
193 /*
194 * unlike the ag freeblock btrees, if anything looks wrong
195 * in an inode bmap tree, just bail. it's possible that
196 * we'll miss a case where the to-be-toasted inode and
197 * another inode are claiming the same block but that's
198 * highly unlikely.
199 */
200 if (be32_to_cpu(block->bb_magic) != magic) {
201 do_warn(
202 _("bad magic # %#x in inode %" PRIu64 " (%s fork) bmbt block %" PRIu64 "\n"),
203 be32_to_cpu(block->bb_magic), ino, forkname, bno);
204 return(1);
205 }
206 if (be16_to_cpu(block->bb_level) != level) {
207 do_warn(
208 _("expected level %d got %d in inode %" PRIu64 ", (%s fork) bmbt block %" PRIu64 "\n"),
209 level, be16_to_cpu(block->bb_level),
210 ino, forkname, bno);
211 return(1);
212 }
213
214 if (magic == XFS_BMAP_CRC_MAGIC) {
215 /* verify owner */
216 if (be64_to_cpu(block->bb_u.l.bb_owner) != ino) {
217 do_warn(
218 _("expected owner inode %" PRIu64 ", got %llu, bmbt block %" PRIu64 "\n"),
219 ino,
220 (unsigned long long)be64_to_cpu(block->bb_u.l.bb_owner),
221 bno);
222 return 1;
223 }
224 /* verify block number */
225 if (be64_to_cpu(block->bb_u.l.bb_blkno) !=
226 XFS_FSB_TO_DADDR(mp, bno)) {
227 do_warn(
228 _("expected block %" PRIu64 ", got %llu, bmbt block %" PRIu64 "\n"),
229 XFS_FSB_TO_DADDR(mp, bno),
230 (unsigned long long)be64_to_cpu(block->bb_u.l.bb_blkno),
231 bno);
232 return 1;
233 }
234 /* verify uuid */
235 if (platform_uuid_compare(&block->bb_u.l.bb_uuid,
236 &mp->m_sb.sb_meta_uuid) != 0) {
237 do_warn(
238 _("wrong FS UUID, bmbt block %" PRIu64 "\n"),
239 bno);
240 return 1;
241 }
242 }
243
244 if (check_dups == 0) {
245 /*
246 * check sibling pointers. if bad we have a conflict
247 * between the sibling pointers and the child pointers
248 * in the parent block. blow out the inode if that happens
249 */
250 if (bm_cursor->level[level].fsbno != NULLFSBLOCK) {
251 /*
252 * this is not the first block on this level
253 * so the cursor for this level has recorded the
254 * values for this's block left-sibling.
255 */
256 if (bno != bm_cursor->level[level].right_fsbno) {
257 do_warn(
258 _("bad fwd (right) sibling pointer (saw %" PRIu64 " parent block says %" PRIu64 ")\n"
259 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
260 bm_cursor->level[level].right_fsbno,
261 bno, ino, forkname,
262 bm_cursor->level[level].fsbno);
263 return(1);
264 }
265 if (be64_to_cpu(block->bb_u.l.bb_leftsib) !=
266 bm_cursor->level[level].fsbno) {
267 do_warn(
268 _("bad back (left) sibling pointer (saw %llu parent block says %" PRIu64 ")\n"
269 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
270 (unsigned long long)
271 be64_to_cpu(block->bb_u.l.bb_leftsib),
272 bm_cursor->level[level].fsbno,
273 ino, forkname, bno);
274 return(1);
275 }
276 } else {
277 /*
278 * This is the first or only block on this level.
279 * Check that the left sibling pointer is NULL
280 */
281 if (be64_to_cpu(block->bb_u.l.bb_leftsib) != NULLFSBLOCK) {
282 do_warn(
283 _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
284 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
285 (unsigned long long)
286 be64_to_cpu(block->bb_u.l.bb_leftsib),
287 ino, forkname, bno);
288 return(1);
289 }
290 }
291
292 /*
293 * update cursor block pointers to reflect this block
294 */
295 bm_cursor->level[level].fsbno = bno;
296 bm_cursor->level[level].left_fsbno =
297 be64_to_cpu(block->bb_u.l.bb_leftsib);
298 bm_cursor->level[level].right_fsbno =
299 be64_to_cpu(block->bb_u.l.bb_rightsib);
300
301 agno = XFS_FSB_TO_AGNO(mp, bno);
302 agbno = XFS_FSB_TO_AGBNO(mp, bno);
303
304 pthread_mutex_lock(&ag_locks[agno].lock);
305 state = get_bmap(agno, agbno);
306 switch (state) {
307 case XR_E_INUSE1:
308 /*
309 * block was claimed as in use data by the rmap
310 * btree, but has not been found in the data extent
311 * map for the inode. That means this bmbt block hasn't
312 * yet been claimed as in use, which means -it's ours-
313 */
314 case XR_E_UNKNOWN:
315 case XR_E_FREE1:
316 case XR_E_FREE:
317 set_bmap(agno, agbno, XR_E_INUSE);
318 break;
319 case XR_E_FS_MAP:
320 case XR_E_INUSE:
321 /*
322 * we'll try and continue searching here since
323 * the block looks like it's been claimed by file
324 * to store user data, a directory to store directory
325 * data, or the space allocation btrees but since
326 * we made it here, the block probably
327 * contains btree data.
328 */
329 set_bmap(agno, agbno, XR_E_MULT);
330 do_warn(
331 _("inode 0x%" PRIx64 "bmap block 0x%" PRIx64 " claimed, state is %d\n"),
332 ino, bno, state);
333 break;
334 case XR_E_MULT:
335 case XR_E_INUSE_FS:
336 set_bmap(agno, agbno, XR_E_MULT);
337 do_warn(
338 _("inode 0x%" PRIx64 " bmap block 0x%" PRIx64 " claimed, state is %d\n"),
339 ino, bno, state);
340 /*
341 * if we made it to here, this is probably a bmap block
342 * that is being used by *another* file as a bmap block
343 * so the block will be valid. Both files should be
344 * trashed along with any other file that impinges on
345 * any blocks referenced by either file. So we
346 * continue searching down this btree to mark all
347 * blocks duplicate
348 */
349 break;
350 case XR_E_BAD_STATE:
351 default:
352 do_warn(
353 _("bad state %d, inode %" PRIu64 " bmap block 0x%" PRIx64 "\n"),
354 state, ino, bno);
355 break;
356 }
357 pthread_mutex_unlock(&ag_locks[agno].lock);
358 } else {
359 /*
360 * attribute fork for realtime files is in the regular
361 * filesystem
362 */
363 if (type != XR_INO_RTDATA || whichfork != XFS_DATA_FORK) {
364 if (search_dup_extent(XFS_FSB_TO_AGNO(mp, bno),
365 XFS_FSB_TO_AGBNO(mp, bno),
366 XFS_FSB_TO_AGBNO(mp, bno) + 1))
367 return(1);
368 } else {
369 if (search_rt_dup_extent(mp, bno))
370 return(1);
371 }
372 }
373 (*tot)++;
374 numrecs = be16_to_cpu(block->bb_numrecs);
375
376 /* Record BMBT blocks in the reverse-mapping data. */
377 if (check_dups && collect_rmaps) {
378 agno = XFS_FSB_TO_AGNO(mp, bno);
379 pthread_mutex_lock(&ag_locks[agno].lock);
380 error = rmap_add_bmbt_rec(mp, ino, whichfork, bno);
381 pthread_mutex_unlock(&ag_locks[agno].lock);
382 if (error)
383 do_error(
384 _("couldn't add inode %"PRIu64" bmbt block %"PRIu64" reverse-mapping data."),
385 ino, bno);
386 }
387
388 if (level == 0) {
389 if (numrecs > mp->m_bmap_dmxr[0] || (isroot == 0 && numrecs <
390 mp->m_bmap_dmnr[0])) {
391 do_warn(
392 _("inode %" PRIu64 " bad # of bmap records (%u, min - %u, max - %u)\n"),
393 ino, numrecs, mp->m_bmap_dmnr[0],
394 mp->m_bmap_dmxr[0]);
395 return(1);
396 }
397 rp = XFS_BMBT_REC_ADDR(mp, block, 1);
398 *nex += numrecs;
399 /*
400 * XXX - if we were going to fix up the btree record,
401 * we'd do it right here. For now, if there's a problem,
402 * we'll bail out and presumably clear the inode.
403 */
404 if (check_dups == 0) {
405 err = process_bmbt_reclist(mp, rp, &numrecs, type, ino,
406 tot, blkmapp, &first_key,
407 &last_key, whichfork);
408 if (err)
409 return 1;
410
411 /*
412 * check that key ordering is monotonically increasing.
413 * if the last_key value in the cursor is set to
414 * NULLFILEOFF, then we know this is the first block
415 * on the leaf level and we shouldn't check the
416 * last_key value.
417 */
418 if (first_key <= bm_cursor->level[level].last_key &&
419 bm_cursor->level[level].last_key !=
420 NULLFILEOFF) {
421 do_warn(
422 _("out-of-order bmap key (file offset) in inode %" PRIu64 ", %s fork, fsbno %" PRIu64 "\n"),
423 ino, forkname, bno);
424 return(1);
425 }
426 /*
427 * update cursor keys to reflect this block.
428 * don't have to check if last_key is > first_key
429 * since that gets checked by process_bmbt_reclist.
430 */
431 bm_cursor->level[level].first_key = first_key;
432 bm_cursor->level[level].last_key = last_key;
433
434 return 0;
435 } else {
436 return scan_bmbt_reclist(mp, rp, &numrecs, type, ino,
437 tot, whichfork);
438 }
439 }
440 if (numrecs > mp->m_bmap_dmxr[1] || (isroot == 0 && numrecs <
441 mp->m_bmap_dmnr[1])) {
442 do_warn(
443 _("inode %" PRIu64 " bad # of bmap records (%u, min - %u, max - %u)\n"),
444 ino, numrecs, mp->m_bmap_dmnr[1], mp->m_bmap_dmxr[1]);
445 return(1);
446 }
447 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
448 pkey = XFS_BMBT_KEY_ADDR(mp, block, 1);
449
450 last_key = NULLFILEOFF;
451
452 for (i = 0, err = 0; i < numrecs; i++) {
453 /*
454 * XXX - if we were going to fix up the interior btree nodes,
455 * we'd do it right here. For now, if there's a problem,
456 * we'll bail out and presumably clear the inode.
457 */
458 if (!verify_dfsbno(mp, be64_to_cpu(pp[i]))) {
459 do_warn(
460 _("bad bmap btree ptr 0x%llx in ino %" PRIu64 "\n"),
461 (unsigned long long) be64_to_cpu(pp[i]), ino);
462 return(1);
463 }
464
465 err = scan_lbtree(be64_to_cpu(pp[i]), level, scan_bmapbt,
466 type, whichfork, ino, tot, nex, blkmapp,
467 bm_cursor, 0, check_dups, magic,
468 &xfs_bmbt_buf_ops);
469 if (err)
470 return(1);
471
472 /*
473 * fix key (offset) mismatches between the first key
474 * in the child block (as recorded in the cursor) and the
475 * key in the interior node referencing the child block.
476 *
477 * fixes cases where entries have been shifted between
478 * child blocks but the parent hasn't been updated. We
479 * don't have to worry about the key values in the cursor
480 * not being set since we only look at the key values of
481 * our child and those are guaranteed to be set by the
482 * call to scan_lbtree() above.
483 */
484 if (check_dups == 0 && be64_to_cpu(pkey[i].br_startoff) !=
485 bm_cursor->level[level-1].first_key) {
486 if (!no_modify) {
487 do_warn(
488 _("correcting bt key (was %llu, now %" PRIu64 ") in inode %" PRIu64 "\n"
489 "\t\t%s fork, btree block %" PRIu64 "\n"),
490 (unsigned long long)
491 be64_to_cpu(pkey[i].br_startoff),
492 bm_cursor->level[level-1].first_key,
493 ino,
494 forkname, bno);
495 *dirty = 1;
496 pkey[i].br_startoff = cpu_to_be64(
497 bm_cursor->level[level-1].first_key);
498 } else {
499 do_warn(
500 _("bad btree key (is %llu, should be %" PRIu64 ") in inode %" PRIu64 "\n"
501 "\t\t%s fork, btree block %" PRIu64 "\n"),
502 (unsigned long long)
503 be64_to_cpu(pkey[i].br_startoff),
504 bm_cursor->level[level-1].first_key,
505 ino, forkname, bno);
506 }
507 }
508 }
509
510 /*
511 * If we're the last node at our level, check that the last child
512 * block's forward sibling pointer is NULL.
513 */
514 if (check_dups == 0 &&
515 bm_cursor->level[level].right_fsbno == NULLFSBLOCK &&
516 bm_cursor->level[level - 1].right_fsbno != NULLFSBLOCK) {
517 do_warn(
518 _("bad fwd (right) sibling pointer (saw %" PRIu64 " should be NULLFSBLOCK)\n"
519 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
520 bm_cursor->level[level - 1].right_fsbno,
521 ino, forkname, bm_cursor->level[level - 1].fsbno);
522 return(1);
523 }
524
525 /*
526 * update cursor keys to reflect this block
527 */
528 if (check_dups == 0) {
529 bm_cursor->level[level].first_key =
530 be64_to_cpu(pkey[0].br_startoff);
531 bm_cursor->level[level].last_key =
532 be64_to_cpu(pkey[numrecs - 1].br_startoff);
533 }
534
535 return(0);
536 }
537
538 static void
539 scan_allocbt(
540 struct xfs_btree_block *block,
541 int level,
542 xfs_agblock_t bno,
543 xfs_agnumber_t agno,
544 int suspect,
545 int isroot,
546 uint32_t magic,
547 void *priv,
548 const struct xfs_buf_ops *ops)
549 {
550 struct aghdr_cnts *agcnts = priv;
551 const char *name;
552 int i;
553 xfs_alloc_ptr_t *pp;
554 xfs_alloc_rec_t *rp;
555 int hdr_errors = 0;
556 int numrecs;
557 int state;
558 xfs_extlen_t lastcount = 0;
559 xfs_agblock_t lastblock = 0;
560
561 switch (magic) {
562 case XFS_ABTB_CRC_MAGIC:
563 case XFS_ABTB_MAGIC:
564 name = "bno";
565 break;
566 case XFS_ABTC_CRC_MAGIC:
567 case XFS_ABTC_MAGIC:
568 name = "cnt";
569 break;
570 default:
571 name = "(unknown)";
572 assert(0);
573 break;
574 }
575
576 if (be32_to_cpu(block->bb_magic) != magic) {
577 do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
578 be32_to_cpu(block->bb_magic), name, agno, bno);
579 hdr_errors++;
580 if (suspect)
581 return;
582 }
583
584 /*
585 * All freespace btree blocks except the roots are freed for a
586 * fully used filesystem, thus they are counted towards the
587 * free data block counter.
588 */
589 if (!isroot) {
590 agcnts->agfbtreeblks++;
591 agcnts->fdblocks++;
592 }
593
594 if (be16_to_cpu(block->bb_level) != level) {
595 do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
596 level, be16_to_cpu(block->bb_level), name, agno, bno);
597 hdr_errors++;
598 if (suspect)
599 return;
600 }
601
602 /*
603 * check for btree blocks multiply claimed
604 */
605 state = get_bmap(agno, bno);
606 if (state != XR_E_UNKNOWN) {
607 set_bmap(agno, bno, XR_E_MULT);
608 do_warn(
609 _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
610 name, state, agno, bno, suspect);
611 return;
612 }
613 set_bmap(agno, bno, XR_E_FS_MAP);
614
615 numrecs = be16_to_cpu(block->bb_numrecs);
616
617 if (level == 0) {
618 if (numrecs > mp->m_alloc_mxr[0]) {
619 numrecs = mp->m_alloc_mxr[0];
620 hdr_errors++;
621 }
622 if (isroot == 0 && numrecs < mp->m_alloc_mnr[0]) {
623 numrecs = mp->m_alloc_mnr[0];
624 hdr_errors++;
625 }
626
627 if (hdr_errors) {
628 do_warn(
629 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
630 be16_to_cpu(block->bb_numrecs),
631 mp->m_alloc_mnr[0], mp->m_alloc_mxr[0],
632 name, agno, bno);
633 suspect++;
634 }
635
636 rp = XFS_ALLOC_REC_ADDR(mp, block, 1);
637 for (i = 0; i < numrecs; i++) {
638 xfs_agblock_t b, end;
639 xfs_extlen_t len, blen;
640
641 b = be32_to_cpu(rp[i].ar_startblock);
642 len = be32_to_cpu(rp[i].ar_blockcount);
643 end = b + len;
644
645 if (b == 0 || !verify_agbno(mp, agno, b)) {
646 do_warn(
647 _("invalid start block %u in record %u of %s btree block %u/%u\n"),
648 b, i, name, agno, bno);
649 continue;
650 }
651 if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
652 do_warn(
653 _("invalid length %u in record %u of %s btree block %u/%u\n"),
654 len, i, name, agno, bno);
655 continue;
656 }
657
658 if (magic == XFS_ABTB_MAGIC ||
659 magic == XFS_ABTB_CRC_MAGIC) {
660 if (b <= lastblock) {
661 do_warn(_(
662 "out-of-order bno btree record %d (%u %u) block %u/%u\n"),
663 i, b, len, agno, bno);
664 } else {
665 lastblock = b;
666 }
667 } else {
668 agcnts->fdblocks += len;
669 agcnts->agffreeblks += len;
670 if (len > agcnts->agflongest)
671 agcnts->agflongest = len;
672 if (len < lastcount) {
673 do_warn(_(
674 "out-of-order cnt btree record %d (%u %u) block %u/%u\n"),
675 i, b, len, agno, bno);
676 } else {
677 lastcount = len;
678 }
679 }
680
681 for ( ; b < end; b += blen) {
682 state = get_bmap_ext(agno, b, end, &blen);
683 switch (state) {
684 case XR_E_UNKNOWN:
685 set_bmap(agno, b, XR_E_FREE1);
686 break;
687 case XR_E_FREE1:
688 /*
689 * no warning messages -- we'll catch
690 * FREE1 blocks later
691 */
692 if (magic == XFS_ABTC_MAGIC ||
693 magic == XFS_ABTC_CRC_MAGIC) {
694 set_bmap_ext(agno, b, blen,
695 XR_E_FREE);
696 break;
697 }
698 /* fall through */
699 default:
700 do_warn(
701 _("block (%d,%d-%d) multiply claimed by %s space tree, state - %d\n"),
702 agno, b, b + blen - 1,
703 name, state);
704 break;
705 }
706 }
707 }
708 return;
709 }
710
711 /*
712 * interior record
713 */
714 pp = XFS_ALLOC_PTR_ADDR(mp, block, 1, mp->m_alloc_mxr[1]);
715
716 if (numrecs > mp->m_alloc_mxr[1]) {
717 numrecs = mp->m_alloc_mxr[1];
718 hdr_errors++;
719 }
720 if (isroot == 0 && numrecs < mp->m_alloc_mnr[1]) {
721 numrecs = mp->m_alloc_mnr[1];
722 hdr_errors++;
723 }
724
725 /*
726 * don't pass bogus tree flag down further if this block
727 * looked ok. bail out if two levels in a row look bad.
728 */
729 if (hdr_errors) {
730 do_warn(
731 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
732 be16_to_cpu(block->bb_numrecs),
733 mp->m_alloc_mnr[1], mp->m_alloc_mxr[1],
734 name, agno, bno);
735 if (suspect)
736 return;
737 suspect++;
738 } else if (suspect) {
739 suspect = 0;
740 }
741
742 for (i = 0; i < numrecs; i++) {
743 xfs_agblock_t agbno = be32_to_cpu(pp[i]);
744
745 /*
746 * XXX - put sibling detection right here.
747 * we know our sibling chain is good. So as we go,
748 * we check the entry before and after each entry.
749 * If either of the entries references a different block,
750 * check the sibling pointer. If there's a sibling
751 * pointer mismatch, try and extract as much data
752 * as possible.
753 */
754 if (agbno != 0 && verify_agbno(mp, agno, agbno)) {
755 scan_sbtree(agbno, level, agno, suspect, scan_allocbt,
756 0, magic, priv, ops);
757 }
758 }
759 }
760
761 static bool
762 ino_issparse(
763 struct xfs_inobt_rec *rp,
764 int offset)
765 {
766 if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
767 return false;
768
769 return xfs_inobt_is_sparse_disk(rp, offset);
770 }
771
772 /* See if the rmapbt owners agree with our observations. */
773 static void
774 process_rmap_rec(
775 struct xfs_mount *mp,
776 xfs_agnumber_t agno,
777 xfs_agblock_t b,
778 xfs_agblock_t end,
779 xfs_extlen_t blen,
780 int64_t owner,
781 int state,
782 const char *name)
783 {
784 switch (state) {
785 case XR_E_UNKNOWN:
786 switch (owner) {
787 case XFS_RMAP_OWN_FS:
788 case XFS_RMAP_OWN_LOG:
789 set_bmap_ext(agno, b, blen, XR_E_INUSE_FS1);
790 break;
791 case XFS_RMAP_OWN_AG:
792 case XFS_RMAP_OWN_INOBT:
793 set_bmap_ext(agno, b, blen, XR_E_FS_MAP1);
794 break;
795 case XFS_RMAP_OWN_INODES:
796 set_bmap_ext(agno, b, blen, XR_E_INO1);
797 break;
798 case XFS_RMAP_OWN_REFC:
799 set_bmap_ext(agno, b, blen, XR_E_REFC);
800 break;
801 case XFS_RMAP_OWN_COW:
802 set_bmap_ext(agno, b, blen, XR_E_COW);
803 break;
804 case XFS_RMAP_OWN_NULL:
805 /* still unknown */
806 break;
807 default:
808 /* file data */
809 set_bmap_ext(agno, b, blen, XR_E_INUSE1);
810 break;
811 }
812 break;
813 case XR_E_INUSE_FS:
814 if (owner == XFS_RMAP_OWN_FS ||
815 owner == XFS_RMAP_OWN_LOG)
816 break;
817 do_warn(
818 _("Static meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
819 agno, b, b + blen - 1,
820 name, state, owner);
821 break;
822 case XR_E_FS_MAP:
823 if (owner == XFS_RMAP_OWN_AG ||
824 owner == XFS_RMAP_OWN_INOBT)
825 break;
826 do_warn(
827 _("AG meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
828 agno, b, b + blen - 1,
829 name, state, owner);
830 break;
831 case XR_E_INO:
832 if (owner == XFS_RMAP_OWN_INODES)
833 break;
834 do_warn(
835 _("inode block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
836 agno, b, b + blen - 1,
837 name, state, owner);
838 break;
839 case XR_E_REFC:
840 if (owner == XFS_RMAP_OWN_REFC)
841 break;
842 do_warn(
843 _("AG refcount block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
844 agno, b, b + blen - 1,
845 name, state, owner);
846 break;
847 case XR_E_INUSE:
848 if (owner >= 0 &&
849 owner < mp->m_sb.sb_dblocks)
850 break;
851 do_warn(
852 _("in use block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
853 agno, b, b + blen - 1,
854 name, state, owner);
855 break;
856 case XR_E_FREE1:
857 case XR_E_FREE:
858 /*
859 * May be on the AGFL. If not, they'll
860 * be caught later.
861 */
862 break;
863 case XR_E_INUSE1:
864 /*
865 * multiple inode owners are ok with
866 * reflink enabled
867 */
868 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
869 !XFS_RMAP_NON_INODE_OWNER(owner))
870 break;
871 /* fall through */
872 default:
873 do_warn(
874 _("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"),
875 agno, b, b + blen - 1,
876 name, state, owner);
877 break;
878 }
879 }
880
881 struct rmap_priv {
882 struct aghdr_cnts *agcnts;
883 struct xfs_rmap_irec high_key;
884 struct xfs_rmap_irec last_rec;
885 xfs_agblock_t nr_blocks;
886 };
887
888 static bool
889 rmap_in_order(
890 xfs_agblock_t b,
891 xfs_agblock_t lastblock,
892 uint64_t owner,
893 uint64_t lastowner,
894 uint64_t offset,
895 uint64_t lastoffset)
896 {
897 if (b > lastblock)
898 return true;
899 else if (b < lastblock)
900 return false;
901
902 if (owner > lastowner)
903 return true;
904 else if (owner < lastowner)
905 return false;
906
907 return offset > lastoffset;
908 }
909
910 static void
911 scan_rmapbt(
912 struct xfs_btree_block *block,
913 int level,
914 xfs_agblock_t bno,
915 xfs_agnumber_t agno,
916 int suspect,
917 int isroot,
918 uint32_t magic,
919 void *priv,
920 const struct xfs_buf_ops *ops)
921 {
922 const char *name = "rmap";
923 int i;
924 xfs_rmap_ptr_t *pp;
925 struct xfs_rmap_rec *rp;
926 struct rmap_priv *rmap_priv = priv;
927 int hdr_errors = 0;
928 int numrecs;
929 int state;
930 xfs_agblock_t lastblock = 0;
931 uint64_t lastowner = 0;
932 uint64_t lastoffset = 0;
933 struct xfs_rmap_key *kp;
934 struct xfs_rmap_irec key = {0};
935
936 if (magic != XFS_RMAP_CRC_MAGIC) {
937 name = "(unknown)";
938 hdr_errors++;
939 suspect++;
940 goto out;
941 }
942
943 if (be32_to_cpu(block->bb_magic) != magic) {
944 do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
945 be32_to_cpu(block->bb_magic), name, agno, bno);
946 hdr_errors++;
947 if (suspect)
948 goto out;
949 }
950
951 /*
952 * All RMAP btree blocks except the roots are freed for a
953 * fully empty filesystem, thus they are counted towards the
954 * free data block counter.
955 */
956 if (!isroot) {
957 rmap_priv->agcnts->agfbtreeblks++;
958 rmap_priv->agcnts->fdblocks++;
959 }
960 rmap_priv->nr_blocks++;
961
962 if (be16_to_cpu(block->bb_level) != level) {
963 do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
964 level, be16_to_cpu(block->bb_level), name, agno, bno);
965 hdr_errors++;
966 if (suspect)
967 goto out;
968 }
969
970 /* check for btree blocks multiply claimed */
971 state = get_bmap(agno, bno);
972 if (!(state == XR_E_UNKNOWN || state == XR_E_FS_MAP1)) {
973 set_bmap(agno, bno, XR_E_MULT);
974 do_warn(
975 _("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
976 name, state, agno, bno, suspect);
977 goto out;
978 }
979 set_bmap(agno, bno, XR_E_FS_MAP);
980
981 numrecs = be16_to_cpu(block->bb_numrecs);
982 if (level == 0) {
983 if (numrecs > mp->m_rmap_mxr[0]) {
984 numrecs = mp->m_rmap_mxr[0];
985 hdr_errors++;
986 }
987 if (isroot == 0 && numrecs < mp->m_rmap_mnr[0]) {
988 numrecs = mp->m_rmap_mnr[0];
989 hdr_errors++;
990 }
991
992 if (hdr_errors) {
993 do_warn(
994 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
995 be16_to_cpu(block->bb_numrecs),
996 mp->m_rmap_mnr[0], mp->m_rmap_mxr[0],
997 name, agno, bno);
998 suspect++;
999 }
1000
1001 rp = XFS_RMAP_REC_ADDR(block, 1);
1002 for (i = 0; i < numrecs; i++) {
1003 xfs_agblock_t b, end;
1004 xfs_extlen_t len, blen;
1005 int64_t owner, offset;
1006
1007 b = be32_to_cpu(rp[i].rm_startblock);
1008 len = be32_to_cpu(rp[i].rm_blockcount);
1009 owner = be64_to_cpu(rp[i].rm_owner);
1010 offset = be64_to_cpu(rp[i].rm_offset);
1011
1012 key.rm_flags = 0;
1013 key.rm_startblock = b;
1014 key.rm_blockcount = len;
1015 key.rm_owner = owner;
1016 if (libxfs_rmap_irec_offset_unpack(offset, &key)) {
1017 /* Look for impossible flags. */
1018 do_warn(
1019 _("invalid flags in record %u of %s btree block %u/%u\n"),
1020 i, name, agno, bno);
1021 continue;
1022 }
1023
1024 end = key.rm_startblock + key.rm_blockcount;
1025
1026 /* Make sure agbno & len make sense. */
1027 if (!verify_agbno(mp, agno, b)) {
1028 do_warn(
1029 _("invalid start block %u in record %u of %s btree block %u/%u\n"),
1030 b, i, name, agno, bno);
1031 continue;
1032 }
1033 if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
1034 do_warn(
1035 _("invalid length %u in record %u of %s btree block %u/%u\n"),
1036 len, i, name, agno, bno);
1037 continue;
1038 }
1039
1040 /* Look for impossible owners. */
1041 if (!((owner > XFS_RMAP_OWN_MIN &&
1042 owner <= XFS_RMAP_OWN_FS) ||
1043 (XFS_INO_TO_AGNO(mp, owner) < mp->m_sb.sb_agcount &&
1044 XFS_AGINO_TO_AGBNO(mp,
1045 XFS_INO_TO_AGINO(mp, owner)) <
1046 mp->m_sb.sb_agblocks)))
1047 do_warn(
1048 _("invalid owner in rmap btree record %d (%"PRId64" %u) block %u/%u\n"),
1049 i, owner, len, agno, bno);
1050
1051 /* Look for impossible record field combinations. */
1052 if (XFS_RMAP_NON_INODE_OWNER(key.rm_owner)) {
1053 if (key.rm_flags)
1054 do_warn(
1055 _("record %d of block (%u/%u) in %s btree cannot have non-inode owner with flags\n"),
1056 i, agno, bno, name);
1057 if (key.rm_offset)
1058 do_warn(
1059 _("record %d of block (%u/%u) in %s btree cannot have non-inode owner with offset\n"),
1060 i, agno, bno, name);
1061 }
1062
1063 /* Check for out of order records. */
1064 if (i == 0) {
1065 advance:
1066 lastblock = b;
1067 lastowner = owner;
1068 lastoffset = offset;
1069 } else {
1070 bool bad;
1071
1072 if (xfs_sb_version_hasreflink(&mp->m_sb))
1073 bad = !rmap_in_order(b, lastblock,
1074 owner, lastowner,
1075 offset, lastoffset);
1076 else
1077 bad = b <= lastblock;
1078 if (bad)
1079 do_warn(
1080 _("out-of-order rmap btree record %d (%u %"PRId64" %"PRIx64" %u) block %u/%u\n"),
1081 i, b, owner, offset, len, agno, bno);
1082 else
1083 goto advance;
1084 }
1085
1086 /* Is this mergeable with the previous record? */
1087 if (rmaps_are_mergeable(&rmap_priv->last_rec, &key)) {
1088 do_warn(
1089 _("record %d in block (%u/%u) of %s tree should be merged with previous record\n"),
1090 i, agno, bno, name);
1091 rmap_priv->last_rec.rm_blockcount +=
1092 key.rm_blockcount;
1093 } else
1094 rmap_priv->last_rec = key;
1095
1096 /* Check that we don't go past the high key. */
1097 key.rm_startblock += key.rm_blockcount - 1;
1098 if (!XFS_RMAP_NON_INODE_OWNER(key.rm_owner) &&
1099 !(key.rm_flags & XFS_RMAP_BMBT_BLOCK))
1100 key.rm_offset += key.rm_blockcount - 1;
1101 key.rm_blockcount = 0;
1102 if (rmap_diffkeys(&key, &rmap_priv->high_key) > 0) {
1103 do_warn(
1104 _("record %d greater than high key of block (%u/%u) in %s tree\n"),
1105 i, agno, bno, name);
1106 }
1107
1108 /* Check for block owner collisions. */
1109 for ( ; b < end; b += blen) {
1110 state = get_bmap_ext(agno, b, end, &blen);
1111 process_rmap_rec(mp, agno, b, end, blen, owner,
1112 state, name);
1113 }
1114 }
1115 goto out;
1116 }
1117
1118 /*
1119 * interior record
1120 */
1121 pp = XFS_RMAP_PTR_ADDR(block, 1, mp->m_rmap_mxr[1]);
1122
1123 if (numrecs > mp->m_rmap_mxr[1]) {
1124 numrecs = mp->m_rmap_mxr[1];
1125 hdr_errors++;
1126 }
1127 if (isroot == 0 && numrecs < mp->m_rmap_mnr[1]) {
1128 numrecs = mp->m_rmap_mnr[1];
1129 hdr_errors++;
1130 }
1131
1132 /*
1133 * don't pass bogus tree flag down further if this block
1134 * looked ok. bail out if two levels in a row look bad.
1135 */
1136 if (hdr_errors) {
1137 do_warn(
1138 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
1139 be16_to_cpu(block->bb_numrecs),
1140 mp->m_rmap_mnr[1], mp->m_rmap_mxr[1],
1141 name, agno, bno);
1142 if (suspect)
1143 goto out;
1144 suspect++;
1145 } else if (suspect) {
1146 suspect = 0;
1147 }
1148
1149 /* check the node's high keys */
1150 for (i = 0; !isroot && i < numrecs; i++) {
1151 kp = XFS_RMAP_HIGH_KEY_ADDR(block, i + 1);
1152
1153 key.rm_flags = 0;
1154 key.rm_startblock = be32_to_cpu(kp->rm_startblock);
1155 key.rm_owner = be64_to_cpu(kp->rm_owner);
1156 if (libxfs_rmap_irec_offset_unpack(be64_to_cpu(kp->rm_offset),
1157 &key)) {
1158 /* Look for impossible flags. */
1159 do_warn(
1160 _("invalid flags in key %u of %s btree block %u/%u\n"),
1161 i, name, agno, bno);
1162 continue;
1163 }
1164 if (rmap_diffkeys(&key, &rmap_priv->high_key) > 0)
1165 do_warn(
1166 _("key %d greater than high key of block (%u/%u) in %s tree\n"),
1167 i, agno, bno, name);
1168 }
1169
1170 for (i = 0; i < numrecs; i++) {
1171 xfs_agblock_t agbno = be32_to_cpu(pp[i]);
1172
1173 /*
1174 * XXX - put sibling detection right here.
1175 * we know our sibling chain is good. So as we go,
1176 * we check the entry before and after each entry.
1177 * If either of the entries references a different block,
1178 * check the sibling pointer. If there's a sibling
1179 * pointer mismatch, try and extract as much data
1180 * as possible.
1181 */
1182 kp = XFS_RMAP_HIGH_KEY_ADDR(block, i + 1);
1183 rmap_priv->high_key.rm_flags = 0;
1184 rmap_priv->high_key.rm_startblock =
1185 be32_to_cpu(kp->rm_startblock);
1186 rmap_priv->high_key.rm_owner =
1187 be64_to_cpu(kp->rm_owner);
1188 if (libxfs_rmap_irec_offset_unpack(be64_to_cpu(kp->rm_offset),
1189 &rmap_priv->high_key)) {
1190 /* Look for impossible flags. */
1191 do_warn(
1192 _("invalid flags in high key %u of %s btree block %u/%u\n"),
1193 i, name, agno, agbno);
1194 continue;
1195 }
1196
1197 if (agbno != 0 && verify_agbno(mp, agno, agbno)) {
1198 scan_sbtree(agbno, level, agno, suspect, scan_rmapbt, 0,
1199 magic, priv, ops);
1200 }
1201 }
1202
1203 out:
1204 if (suspect)
1205 rmap_avoid_check();
1206 }
1207
1208 struct refc_priv {
1209 struct xfs_refcount_irec last_rec;
1210 xfs_agblock_t nr_blocks;
1211 };
1212
1213
1214 static void
1215 scan_refcbt(
1216 struct xfs_btree_block *block,
1217 int level,
1218 xfs_agblock_t bno,
1219 xfs_agnumber_t agno,
1220 int suspect,
1221 int isroot,
1222 uint32_t magic,
1223 void *priv,
1224 const struct xfs_buf_ops *ops)
1225 {
1226 const char *name = "refcount";
1227 int i;
1228 xfs_refcount_ptr_t *pp;
1229 struct xfs_refcount_rec *rp;
1230 int hdr_errors = 0;
1231 int numrecs;
1232 int state;
1233 xfs_agblock_t lastblock = 0;
1234 struct refc_priv *refc_priv = priv;
1235
1236 if (magic != XFS_REFC_CRC_MAGIC) {
1237 name = "(unknown)";
1238 hdr_errors++;
1239 suspect++;
1240 goto out;
1241 }
1242
1243 if (be32_to_cpu(block->bb_magic) != magic) {
1244 do_warn(_("bad magic # %#x in %s btree block %d/%d\n"),
1245 be32_to_cpu(block->bb_magic), name, agno, bno);
1246 hdr_errors++;
1247 if (suspect)
1248 goto out;
1249 }
1250
1251 if (be16_to_cpu(block->bb_level) != level) {
1252 do_warn(_("expected level %d got %d in %s btree block %d/%d\n"),
1253 level, be16_to_cpu(block->bb_level), name, agno, bno);
1254 hdr_errors++;
1255 if (suspect)
1256 goto out;
1257 }
1258
1259 refc_priv->nr_blocks++;
1260
1261 /* check for btree blocks multiply claimed */
1262 state = get_bmap(agno, bno);
1263 if (!(state == XR_E_UNKNOWN || state == XR_E_REFC)) {
1264 set_bmap(agno, bno, XR_E_MULT);
1265 do_warn(
1266 _("%s btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
1267 name, state, agno, bno, suspect);
1268 goto out;
1269 }
1270 set_bmap(agno, bno, XR_E_FS_MAP);
1271
1272 numrecs = be16_to_cpu(block->bb_numrecs);
1273 if (level == 0) {
1274 if (numrecs > mp->m_refc_mxr[0]) {
1275 numrecs = mp->m_refc_mxr[0];
1276 hdr_errors++;
1277 }
1278 if (isroot == 0 && numrecs < mp->m_refc_mnr[0]) {
1279 numrecs = mp->m_refc_mnr[0];
1280 hdr_errors++;
1281 }
1282
1283 if (hdr_errors) {
1284 do_warn(
1285 _("bad btree nrecs (%u, min=%u, max=%u) in %s btree block %u/%u\n"),
1286 be16_to_cpu(block->bb_numrecs),
1287 mp->m_refc_mnr[0], mp->m_refc_mxr[0],
1288 name, agno, bno);
1289 suspect++;
1290 }
1291
1292 rp = XFS_REFCOUNT_REC_ADDR(block, 1);
1293 for (i = 0; i < numrecs; i++) {
1294 xfs_agblock_t b, agb, end;
1295 xfs_extlen_t len;
1296 xfs_nlink_t nr;
1297
1298 b = agb = be32_to_cpu(rp[i].rc_startblock);
1299 len = be32_to_cpu(rp[i].rc_blockcount);
1300 nr = be32_to_cpu(rp[i].rc_refcount);
1301 if (b >= XFS_REFC_COW_START && nr != 1)
1302 do_warn(
1303 _("leftover CoW extent has incorrect refcount in record %u of %s btree block %u/%u\n"),
1304 i, name, agno, bno);
1305 if (nr == 1) {
1306 if (agb < XFS_REFC_COW_START)
1307 do_warn(
1308 _("leftover CoW extent has invalid startblock in record %u of %s btree block %u/%u\n"),
1309 i, name, agno, bno);
1310 agb -= XFS_REFC_COW_START;
1311 }
1312 end = agb + len;
1313
1314 if (!verify_agbno(mp, agno, agb)) {
1315 do_warn(
1316 _("invalid start block %u in record %u of %s btree block %u/%u\n"),
1317 b, i, name, agno, bno);
1318 continue;
1319 }
1320 if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
1321 do_warn(
1322 _("invalid length %u in record %u of %s btree block %u/%u\n"),
1323 len, i, name, agno, bno);
1324 continue;
1325 }
1326
1327 if (nr == 1) {
1328 xfs_agblock_t c;
1329 xfs_extlen_t cnr;
1330
1331 for (c = agb; c < end; c += cnr) {
1332 state = get_bmap_ext(agno, c, end, &cnr);
1333 switch (state) {
1334 case XR_E_UNKNOWN:
1335 case XR_E_COW:
1336 do_warn(
1337 _("leftover CoW extent (%u/%u) len %u\n"),
1338 agno, c, cnr);
1339 set_bmap_ext(agno, c, cnr, XR_E_FREE);
1340 break;
1341 default:
1342 do_warn(
1343 _("extent (%u/%u) len %u claimed, state is %d\n"),
1344 agno, c, cnr, state);
1345 break;
1346 }
1347 }
1348 } else if (nr < 2 || nr > MAXREFCOUNT) {
1349 do_warn(
1350 _("invalid reference count %u in record %u of %s btree block %u/%u\n"),
1351 nr, i, name, agno, bno);
1352 continue;
1353 }
1354
1355 if (b && b <= lastblock) {
1356 do_warn(_(
1357 "out-of-order %s btree record %d (%u %u) block %u/%u\n"),
1358 name, i, b, len, agno, bno);
1359 } else {
1360 lastblock = b;
1361 }
1362
1363 /* Is this record mergeable with the last one? */
1364 if (refc_priv->last_rec.rc_startblock +
1365 refc_priv->last_rec.rc_blockcount == b &&
1366 refc_priv->last_rec.rc_refcount == nr) {
1367 do_warn(
1368 _("record %d in block (%u/%u) of %s tree should be merged with previous record\n"),
1369 i, agno, bno, name);
1370 refc_priv->last_rec.rc_blockcount += len;
1371 } else {
1372 refc_priv->last_rec.rc_startblock = b;
1373 refc_priv->last_rec.rc_blockcount = len;
1374 refc_priv->last_rec.rc_refcount = nr;
1375 }
1376
1377 /* XXX: probably want to mark the reflinked areas? */
1378 }
1379 goto out;
1380 }
1381
1382 /*
1383 * interior record
1384 */
1385 pp = XFS_REFCOUNT_PTR_ADDR(block, 1, mp->m_refc_mxr[1]);
1386
1387 if (numrecs > mp->m_refc_mxr[1]) {
1388 numrecs = mp->m_refc_mxr[1];
1389 hdr_errors++;
1390 }
1391 if (isroot == 0 && numrecs < mp->m_refc_mnr[1]) {
1392 numrecs = mp->m_refc_mnr[1];
1393 hdr_errors++;
1394 }
1395
1396 /*
1397 * don't pass bogus tree flag down further if this block
1398 * looked ok. bail out if two levels in a row look bad.
1399 */
1400 if (hdr_errors) {
1401 do_warn(
1402 _("bad btree nrecs (%u, min=%u, max=%u) in %s btree block %u/%u\n"),
1403 be16_to_cpu(block->bb_numrecs),
1404 mp->m_refc_mnr[1], mp->m_refc_mxr[1],
1405 name, agno, bno);
1406 if (suspect)
1407 goto out;
1408 suspect++;
1409 } else if (suspect) {
1410 suspect = 0;
1411 }
1412
1413 for (i = 0; i < numrecs; i++) {
1414 xfs_agblock_t agbno = be32_to_cpu(pp[i]);
1415
1416 if (agbno != 0 && verify_agbno(mp, agno, agbno)) {
1417 scan_sbtree(agbno, level, agno, suspect, scan_refcbt, 0,
1418 magic, priv, ops);
1419 }
1420 }
1421 out:
1422 if (suspect)
1423 refcount_avoid_check();
1424 return;
1425 }
1426
1427 /*
1428 * The following helpers are to help process and validate individual on-disk
1429 * inode btree records. We have two possible inode btrees with slightly
1430 * different semantics. Many of the validations and actions are equivalent, such
1431 * as record alignment constraints, etc. Other validations differ, such as the
1432 * fact that the inode chunk block allocation state is set by the content of the
1433 * core inobt and verified by the content of the finobt.
1434 *
1435 * The following structures are used to facilitate common validation routines
1436 * where the only difference between validation of the inobt or finobt might be
1437 * the error messages that results in the event of failure.
1438 */
1439
1440 enum inobt_type {
1441 INOBT,
1442 FINOBT
1443 };
1444 static const char *inobt_names[] = {
1445 "inobt",
1446 "finobt"
1447 };
1448
1449 static int
1450 verify_single_ino_chunk_align(
1451 xfs_agnumber_t agno,
1452 enum inobt_type type,
1453 struct xfs_inobt_rec *rp,
1454 int suspect,
1455 bool *skip)
1456 {
1457 const char *inobt_name = inobt_names[type];
1458 xfs_ino_t lino;
1459 xfs_agino_t ino;
1460 xfs_agblock_t agbno;
1461 int off;
1462
1463 *skip = false;
1464 ino = be32_to_cpu(rp->ir_startino);
1465 off = XFS_AGINO_TO_OFFSET(mp, ino);
1466 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
1467 lino = XFS_AGINO_TO_INO(mp, agno, ino);
1468
1469 /*
1470 * on multi-block block chunks, all chunks start at the beginning of the
1471 * block. with multi-chunk blocks, all chunks must start on 64-inode
1472 * boundaries since each block can hold N complete chunks. if fs has
1473 * aligned inodes, all chunks must start at a fs_ino_alignment*N'th
1474 * agbno. skip recs with badly aligned starting inodes.
1475 */
1476 if (ino == 0 ||
1477 (inodes_per_block <= XFS_INODES_PER_CHUNK && off != 0) ||
1478 (inodes_per_block > XFS_INODES_PER_CHUNK &&
1479 off % XFS_INODES_PER_CHUNK != 0) ||
1480 (fs_aligned_inodes && fs_ino_alignment &&
1481 agbno % fs_ino_alignment != 0)) {
1482 do_warn(
1483 _("badly aligned %s rec (starting inode = %" PRIu64 ")\n"),
1484 inobt_name, lino);
1485 suspect++;
1486 }
1487
1488 /*
1489 * verify numeric validity of inode chunk first before inserting into a
1490 * tree. don't have to worry about the overflow case because the
1491 * starting ino number of a chunk can only get within 255 inodes of max
1492 * (NULLAGINO). if it gets closer, the agino number will be illegal as
1493 * the agbno will be too large.
1494 */
1495 if (verify_aginum(mp, agno, ino)) {
1496 do_warn(
1497 _("bad starting inode # (%" PRIu64 " (0x%x 0x%x)) in %s rec, skipping rec\n"),
1498 lino, agno, ino, inobt_name);
1499 *skip = true;
1500 return ++suspect;
1501 }
1502
1503 if (verify_aginum(mp, agno,
1504 ino + XFS_INODES_PER_CHUNK - 1)) {
1505 do_warn(
1506 _("bad ending inode # (%" PRIu64 " (0x%x 0x%zx)) in %s rec, skipping rec\n"),
1507 lino + XFS_INODES_PER_CHUNK - 1,
1508 agno,
1509 ino + XFS_INODES_PER_CHUNK - 1,
1510 inobt_name);
1511 *skip = true;
1512 return ++suspect;
1513 }
1514
1515 return suspect;
1516 }
1517
1518 /*
1519 * Process the state of individual inodes in an on-disk inobt record and import
1520 * into the appropriate in-core tree based on whether the on-disk tree is
1521 * suspect. Return the total and free inode counts based on the record free and
1522 * hole masks.
1523 */
1524 static int
1525 import_single_ino_chunk(
1526 xfs_agnumber_t agno,
1527 enum inobt_type type,
1528 struct xfs_inobt_rec *rp,
1529 int suspect,
1530 int *p_nfree,
1531 int *p_ninodes)
1532 {
1533 struct ino_tree_node *ino_rec = NULL;
1534 const char *inobt_name = inobt_names[type];
1535 xfs_agino_t ino;
1536 int j;
1537 int nfree;
1538 int ninodes;
1539
1540 ino = be32_to_cpu(rp->ir_startino);
1541
1542 if (!suspect) {
1543 if (XFS_INOBT_IS_FREE_DISK(rp, 0))
1544 ino_rec = set_inode_free_alloc(mp, agno, ino);
1545 else
1546 ino_rec = set_inode_used_alloc(mp, agno, ino);
1547 for (j = 1; j < XFS_INODES_PER_CHUNK; j++) {
1548 if (XFS_INOBT_IS_FREE_DISK(rp, j))
1549 set_inode_free(ino_rec, j);
1550 else
1551 set_inode_used(ino_rec, j);
1552 }
1553 } else {
1554 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
1555 if (XFS_INOBT_IS_FREE_DISK(rp, j))
1556 add_aginode_uncertain(mp, agno, ino + j, 1);
1557 else
1558 add_aginode_uncertain(mp, agno, ino + j, 0);
1559 }
1560 }
1561
1562 /*
1563 * Mark sparse inodes as such in the in-core tree. Verify that sparse
1564 * inodes are free and that freecount is consistent with the free mask.
1565 */
1566 nfree = ninodes = 0;
1567 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
1568 if (ino_issparse(rp, j)) {
1569 if (!suspect && !XFS_INOBT_IS_FREE_DISK(rp, j)) {
1570 do_warn(
1571 _("ir_holemask/ir_free mismatch, %s chunk %d/%u, holemask 0x%x free 0x%llx\n"),
1572 inobt_name, agno, ino,
1573 be16_to_cpu(rp->ir_u.sp.ir_holemask),
1574 (unsigned long long)be64_to_cpu(rp->ir_free));
1575 suspect++;
1576 }
1577 if (!suspect && ino_rec)
1578 set_inode_sparse(ino_rec, j);
1579 } else {
1580 /* count fields track non-sparse inos */
1581 if (XFS_INOBT_IS_FREE_DISK(rp, j))
1582 nfree++;
1583 ninodes++;
1584 }
1585 }
1586
1587 *p_nfree = nfree;
1588 *p_ninodes = ninodes;
1589
1590 return suspect;
1591 }
1592
1593 static int
1594 scan_single_ino_chunk(
1595 xfs_agnumber_t agno,
1596 xfs_inobt_rec_t *rp,
1597 int suspect)
1598 {
1599 xfs_ino_t lino;
1600 xfs_agino_t ino;
1601 xfs_agblock_t agbno;
1602 int j;
1603 int nfree;
1604 int ninodes;
1605 int off;
1606 int state;
1607 ino_tree_node_t *first_rec, *last_rec;
1608 int freecount;
1609 bool skip = false;
1610
1611 ino = be32_to_cpu(rp->ir_startino);
1612 off = XFS_AGINO_TO_OFFSET(mp, ino);
1613 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
1614 lino = XFS_AGINO_TO_INO(mp, agno, ino);
1615 freecount = inorec_get_freecount(mp, rp);
1616
1617 /*
1618 * Verify record alignment, start/end inode numbers, etc.
1619 */
1620 suspect = verify_single_ino_chunk_align(agno, INOBT, rp, suspect,
1621 &skip);
1622 if (skip)
1623 return suspect;
1624
1625 /*
1626 * set state of each block containing inodes
1627 */
1628 if (off == 0 && !suspect) {
1629 for (j = 0;
1630 j < XFS_INODES_PER_CHUNK;
1631 j += mp->m_sb.sb_inopblock) {
1632
1633 /* inodes in sparse chunks don't use blocks */
1634 if (ino_issparse(rp, j))
1635 continue;
1636
1637 agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
1638 state = get_bmap(agno, agbno);
1639 switch (state) {
1640 case XR_E_INO:
1641 break;
1642 case XR_E_UNKNOWN:
1643 case XR_E_INO1: /* seen by rmap */
1644 set_bmap(agno, agbno, XR_E_INO);
1645 break;
1646 case XR_E_INUSE_FS:
1647 case XR_E_INUSE_FS1:
1648 if (agno == 0 &&
1649 ino + j >= first_prealloc_ino &&
1650 ino + j < last_prealloc_ino) {
1651 set_bmap(agno, agbno, XR_E_INO);
1652 break;
1653 }
1654 /* fall through */
1655 default:
1656 /* XXX - maybe should mark block a duplicate */
1657 do_warn(
1658 _("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
1659 agno, agbno, mp->m_sb.sb_inopblock);
1660 return ++suspect;
1661 }
1662 }
1663 }
1664
1665 /*
1666 * ensure only one avl entry per chunk
1667 */
1668 find_inode_rec_range(mp, agno, ino, ino + XFS_INODES_PER_CHUNK,
1669 &first_rec, &last_rec);
1670 if (first_rec != NULL) {
1671 /*
1672 * this chunk overlaps with one (or more)
1673 * already in the tree
1674 */
1675 do_warn(
1676 _("inode rec for ino %" PRIu64 " (%d/%d) overlaps existing rec (start %d/%d)\n"),
1677 lino, agno, ino, agno, first_rec->ino_startnum);
1678 suspect++;
1679
1680 /*
1681 * if the 2 chunks start at the same place,
1682 * then we don't have to put this one
1683 * in the uncertain list. go to the next one.
1684 */
1685 if (first_rec->ino_startnum == ino)
1686 return suspect;
1687 }
1688
1689 /*
1690 * Import the state of individual inodes into the appropriate in-core
1691 * trees, mark them free or used, and get the resulting total and free
1692 * inode counts.
1693 */
1694 nfree = ninodes = 0;
1695 suspect = import_single_ino_chunk(agno, INOBT, rp, suspect, &nfree,
1696 &ninodes);
1697
1698 if (nfree != freecount) {
1699 do_warn(
1700 _("ir_freecount/free mismatch, inode chunk %d/%u, freecount %d nfree %d\n"),
1701 agno, ino, freecount, nfree);
1702 }
1703
1704 /* verify sparse record formats have a valid inode count */
1705 if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
1706 ninodes != rp->ir_u.sp.ir_count) {
1707 do_warn(
1708 _("invalid inode count, inode chunk %d/%u, count %d ninodes %d\n"),
1709 agno, ino, rp->ir_u.sp.ir_count, ninodes);
1710 }
1711
1712 return suspect;
1713 }
1714
1715 static int
1716 scan_single_finobt_chunk(
1717 xfs_agnumber_t agno,
1718 xfs_inobt_rec_t *rp,
1719 int suspect)
1720 {
1721 xfs_ino_t lino;
1722 xfs_agino_t ino;
1723 xfs_agblock_t agbno;
1724 int j;
1725 int nfree;
1726 int ninodes;
1727 int off;
1728 int state;
1729 ino_tree_node_t *first_rec, *last_rec;
1730 int freecount;
1731 bool skip = false;
1732
1733 ino = be32_to_cpu(rp->ir_startino);
1734 off = XFS_AGINO_TO_OFFSET(mp, ino);
1735 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
1736 lino = XFS_AGINO_TO_INO(mp, agno, ino);
1737 freecount = inorec_get_freecount(mp, rp);
1738
1739 /*
1740 * Verify record alignment, start/end inode numbers, etc.
1741 */
1742 suspect = verify_single_ino_chunk_align(agno, FINOBT, rp, suspect,
1743 &skip);
1744 if (skip)
1745 return suspect;
1746
1747 /*
1748 * cross check state of each block containing inodes referenced by the
1749 * finobt against what we have already scanned from the alloc inobt.
1750 */
1751 if (off == 0 && !suspect) {
1752 for (j = 0;
1753 j < XFS_INODES_PER_CHUNK;
1754 j += mp->m_sb.sb_inopblock) {
1755 agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
1756 state = get_bmap(agno, agbno);
1757
1758 /* sparse inodes should not refer to inode blocks */
1759 if (ino_issparse(rp, j)) {
1760 if (state == XR_E_INO) {
1761 do_warn(
1762 _("sparse inode chunk claims inode block, finobt block - agno %d, bno %d, inopb %d\n"),
1763 agno, agbno, mp->m_sb.sb_inopblock);
1764 suspect++;
1765 }
1766 continue;
1767 }
1768
1769 switch (state) {
1770 case XR_E_INO:
1771 break;
1772 case XR_E_INO1: /* seen by rmap */
1773 set_bmap(agno, agbno, XR_E_INO);
1774 break;
1775 case XR_E_UNKNOWN:
1776 do_warn(
1777 _("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
1778 agno, agbno, mp->m_sb.sb_inopblock);
1779
1780 set_bmap(agno, agbno, XR_E_INO);
1781 suspect++;
1782 break;
1783 case XR_E_INUSE_FS:
1784 case XR_E_INUSE_FS1:
1785 if (agno == 0 &&
1786 ino + j >= first_prealloc_ino &&
1787 ino + j < last_prealloc_ino) {
1788 do_warn(
1789 _("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
1790 agno, agbno, mp->m_sb.sb_inopblock);
1791
1792 set_bmap(agno, agbno, XR_E_INO);
1793 suspect++;
1794 break;
1795 }
1796 /* fall through */
1797 default:
1798 do_warn(
1799 _("inode chunk claims used block, finobt block - agno %d, bno %d, inopb %d\n"),
1800 agno, agbno, mp->m_sb.sb_inopblock);
1801 return ++suspect;
1802 }
1803 }
1804 }
1805
1806 /*
1807 * ensure we have an incore entry for each chunk
1808 */
1809 find_inode_rec_range(mp, agno, ino, ino + XFS_INODES_PER_CHUNK,
1810 &first_rec, &last_rec);
1811
1812 if (first_rec) {
1813 if (suspect)
1814 return suspect;
1815
1816 /*
1817 * verify consistency between finobt record and incore state
1818 */
1819 if (first_rec->ino_startnum != ino) {
1820 do_warn(
1821 _("finobt rec for ino %" PRIu64 " (%d/%u) does not match existing rec (%d/%d)\n"),
1822 lino, agno, ino, agno, first_rec->ino_startnum);
1823 return ++suspect;
1824 }
1825
1826 nfree = ninodes = 0;
1827 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
1828 int isfree = XFS_INOBT_IS_FREE_DISK(rp, j);
1829 int issparse = ino_issparse(rp, j);
1830
1831 if (!issparse)
1832 ninodes++;
1833 if (isfree && !issparse)
1834 nfree++;
1835
1836 /*
1837 * inode allocation state should be consistent between
1838 * the inobt and finobt
1839 */
1840 if (!suspect &&
1841 isfree != is_inode_free(first_rec, j))
1842 suspect++;
1843
1844 if (!suspect &&
1845 issparse != is_inode_sparse(first_rec, j))
1846 suspect++;
1847 }
1848
1849 goto check_freecount;
1850 }
1851
1852 /*
1853 * The finobt contains a record that the previous inobt scan never
1854 * found. Warn about it and import the inodes into the appropriate
1855 * trees.
1856 *
1857 * Note that this should do the right thing if the previous inobt scan
1858 * had added these inodes to the uncertain tree. If the finobt is not
1859 * suspect, these inodes should supercede the uncertain ones. Otherwise,
1860 * the uncertain tree helpers handle the case where uncertain inodes
1861 * already exist.
1862 */
1863 do_warn(_("undiscovered finobt record, ino %" PRIu64 " (%d/%u)\n"),
1864 lino, agno, ino);
1865
1866 nfree = ninodes = 0;
1867 suspect = import_single_ino_chunk(agno, FINOBT, rp, suspect, &nfree,
1868 &ninodes);
1869
1870 check_freecount:
1871
1872 /*
1873 * Verify that the record freecount matches the actual number of free
1874 * inodes counted in the record. Don't increment 'suspect' here, since
1875 * we have already verified the allocation state of the individual
1876 * inodes against the in-core state. This will have already incremented
1877 * 'suspect' if something is wrong. If suspect hasn't been set at this
1878 * point, these warnings mean that we have a simple freecount
1879 * inconsistency or a stray finobt record (as opposed to a broader tree
1880 * corruption). Issue a warning and continue the scan. The final btree
1881 * reconstruction will correct this naturally.
1882 */
1883 if (nfree != freecount) {
1884 do_warn(
1885 _("finobt ir_freecount/free mismatch, inode chunk %d/%u, freecount %d nfree %d\n"),
1886 agno, ino, freecount, nfree);
1887 }
1888
1889 if (!nfree) {
1890 do_warn(
1891 _("finobt record with no free inodes, inode chunk %d/%u\n"), agno, ino);
1892 }
1893
1894 /* verify sparse record formats have a valid inode count */
1895 if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
1896 ninodes != rp->ir_u.sp.ir_count) {
1897 do_warn(
1898 _("invalid inode count, inode chunk %d/%u, count %d ninodes %d\n"),
1899 agno, ino, rp->ir_u.sp.ir_count, ninodes);
1900 }
1901
1902 return suspect;
1903 }
1904
1905 /*
1906 * this one walks the inode btrees sucking the info there into
1907 * the incore avl tree. We try and rescue corrupted btree records
1908 * to minimize our chances of losing inodes. Inode info from potentially
1909 * corrupt sources could be bogus so rather than put the info straight
1910 * into the tree, instead we put it on a list and try and verify the
1911 * info in the next phase by examining what's on disk. At that point,
1912 * we'll be able to figure out what's what and stick the corrected info
1913 * into the tree. We do bail out at some point and give up on a subtree
1914 * so as to avoid walking randomly all over the ag.
1915 *
1916 * Note that it's also ok if the free/inuse info wrong, we can correct
1917 * that when we examine the on-disk inode. The important thing is to
1918 * get the start and alignment of the inode chunks right. Those chunks
1919 * that we aren't sure about go into the uncertain list.
1920 */
1921 static void
1922 scan_inobt(
1923 struct xfs_btree_block *block,
1924 int level,
1925 xfs_agblock_t bno,
1926 xfs_agnumber_t agno,
1927 int suspect,
1928 int isroot,
1929 uint32_t magic,
1930 void *priv,
1931 const struct xfs_buf_ops *ops)
1932 {
1933 struct aghdr_cnts *agcnts = priv;
1934 int i;
1935 int numrecs;
1936 int state;
1937 xfs_inobt_ptr_t *pp;
1938 xfs_inobt_rec_t *rp;
1939 int hdr_errors;
1940 int freecount;
1941
1942 hdr_errors = 0;
1943
1944 if (be32_to_cpu(block->bb_magic) != magic) {
1945 do_warn(_("bad magic # %#x in inobt block %d/%d\n"),
1946 be32_to_cpu(block->bb_magic), agno, bno);
1947 hdr_errors++;
1948 bad_ino_btree = 1;
1949 if (suspect)
1950 return;
1951 }
1952 if (be16_to_cpu(block->bb_level) != level) {
1953 do_warn(_("expected level %d got %d in inobt block %d/%d\n"),
1954 level, be16_to_cpu(block->bb_level), agno, bno);
1955 hdr_errors++;
1956 bad_ino_btree = 1;
1957 if (suspect)
1958 return;
1959 }
1960
1961 /*
1962 * check for btree blocks multiply claimed, any unknown/free state
1963 * is ok in the bitmap block.
1964 */
1965 state = get_bmap(agno, bno);
1966 switch (state) {
1967 case XR_E_FS_MAP1: /* already been seen by an rmap scan */
1968 case XR_E_UNKNOWN:
1969 case XR_E_FREE1:
1970 case XR_E_FREE:
1971 set_bmap(agno, bno, XR_E_FS_MAP);
1972 break;
1973 default:
1974 set_bmap(agno, bno, XR_E_MULT);
1975 do_warn(
1976 _("inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
1977 state, agno, bno, suspect);
1978 }
1979
1980 numrecs = be16_to_cpu(block->bb_numrecs);
1981
1982 /*
1983 * leaf record in btree
1984 */
1985 if (level == 0) {
1986 /* check for trashed btree block */
1987
1988 if (numrecs > mp->m_inobt_mxr[0]) {
1989 numrecs = mp->m_inobt_mxr[0];
1990 hdr_errors++;
1991 }
1992 if (isroot == 0 && numrecs < mp->m_inobt_mnr[0]) {
1993 numrecs = mp->m_inobt_mnr[0];
1994 hdr_errors++;
1995 }
1996
1997 if (hdr_errors) {
1998 bad_ino_btree = 1;
1999 do_warn(_("dubious inode btree block header %d/%d\n"),
2000 agno, bno);
2001 suspect++;
2002 }
2003
2004 rp = XFS_INOBT_REC_ADDR(mp, block, 1);
2005
2006 /*
2007 * step through the records, each record points to
2008 * a chunk of inodes. The start of inode chunks should
2009 * be block-aligned. Each inode btree rec should point
2010 * to the start of a block of inodes or the start of a group
2011 * of INODES_PER_CHUNK (64) inodes. off is the offset into
2012 * the block. skip processing of bogus records.
2013 */
2014 for (i = 0; i < numrecs; i++) {
2015 freecount = inorec_get_freecount(mp, &rp[i]);
2016
2017 if (magic == XFS_IBT_MAGIC ||
2018 magic == XFS_IBT_CRC_MAGIC) {
2019 int icount = XFS_INODES_PER_CHUNK;
2020
2021 /*
2022 * ir_count holds the inode count for all
2023 * records on fs' with sparse inode support
2024 */
2025 if (xfs_sb_version_hassparseinodes(&mp->m_sb))
2026 icount = rp[i].ir_u.sp.ir_count;
2027
2028 agcnts->agicount += icount;
2029 agcnts->agifreecount += freecount;
2030 agcnts->ifreecount += freecount;
2031
2032 suspect = scan_single_ino_chunk(agno, &rp[i],
2033 suspect);
2034 } else {
2035 /*
2036 * the finobt tracks records with free inodes,
2037 * so only the free inode count is expected to be
2038 * consistent with the agi
2039 */
2040 agcnts->fibtfreecount += freecount;
2041
2042 suspect = scan_single_finobt_chunk(agno, &rp[i],
2043 suspect);
2044 }
2045 }
2046
2047 if (suspect)
2048 bad_ino_btree = 1;
2049
2050 return;
2051 }
2052
2053 /*
2054 * interior record, continue on
2055 */
2056 if (numrecs > mp->m_inobt_mxr[1]) {
2057 numrecs = mp->m_inobt_mxr[1];
2058 hdr_errors++;
2059 }
2060 if (isroot == 0 && numrecs < mp->m_inobt_mnr[1]) {
2061 numrecs = mp->m_inobt_mnr[1];
2062 hdr_errors++;
2063 }
2064
2065 pp = XFS_INOBT_PTR_ADDR(mp, block, 1, mp->m_inobt_mxr[1]);
2066
2067 /*
2068 * don't pass bogus tree flag down further if this block
2069 * looked ok. bail out if two levels in a row look bad.
2070 */
2071
2072 if (suspect && !hdr_errors)
2073 suspect = 0;
2074
2075 if (hdr_errors) {
2076 bad_ino_btree = 1;
2077 if (suspect)
2078 return;
2079 else suspect++;
2080 }
2081
2082 for (i = 0; i < numrecs; i++) {
2083 if (be32_to_cpu(pp[i]) != 0 && verify_agbno(mp, agno,
2084 be32_to_cpu(pp[i])))
2085 scan_sbtree(be32_to_cpu(pp[i]), level, agno,
2086 suspect, scan_inobt, 0, magic, priv,
2087 ops);
2088 }
2089 }
2090
2091 struct agfl_state {
2092 unsigned int count;
2093 xfs_agnumber_t agno;
2094 };
2095
2096 static int
2097 scan_agfl(
2098 struct xfs_mount *mp,
2099 xfs_agblock_t bno,
2100 void *priv)
2101 {
2102 struct agfl_state *as = priv;
2103
2104 if (verify_agbno(mp, as->agno, bno))
2105 set_bmap(as->agno, bno, XR_E_FREE);
2106 else
2107 do_warn(_("bad agbno %u in agfl, agno %d\n"),
2108 bno, as->agno);
2109 as->count++;
2110 return 0;
2111 }
2112
2113 static void
2114 scan_freelist(
2115 xfs_agf_t *agf,
2116 struct aghdr_cnts *agcnts)
2117 {
2118 xfs_buf_t *agflbuf;
2119 xfs_agnumber_t agno;
2120 struct agfl_state state;
2121
2122 agno = be32_to_cpu(agf->agf_seqno);
2123
2124 if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
2125 XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
2126 XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
2127 set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_INUSE_FS);
2128
2129 if (be32_to_cpu(agf->agf_flcount) == 0)
2130 return;
2131
2132 agflbuf = libxfs_readbuf(mp->m_dev,
2133 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
2134 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agfl_buf_ops);
2135 if (!agflbuf) {
2136 do_abort(_("can't read agfl block for ag %d\n"), agno);
2137 return;
2138 }
2139 if (agflbuf->b_error == -EFSBADCRC)
2140 do_warn(_("agfl has bad CRC for ag %d\n"), agno);
2141
2142 if (no_modify) {
2143 /* agf values not fixed in verify_set_agf, so recheck */
2144 if (be32_to_cpu(agf->agf_flfirst) >= libxfs_agfl_size(mp) ||
2145 be32_to_cpu(agf->agf_fllast) >= libxfs_agfl_size(mp)) {
2146 do_warn(_("agf %d freelist blocks bad, skipping "
2147 "freelist scan\n"), agno);
2148 return;
2149 }
2150 }
2151
2152 state.count = 0;
2153 state.agno = agno;
2154 libxfs_agfl_walk(mp, agf, agflbuf, scan_agfl, &state);
2155 if (state.count != be32_to_cpu(agf->agf_flcount)) {
2156 do_warn(_("freeblk count %d != flcount %d in ag %d\n"),
2157 state.count, be32_to_cpu(agf->agf_flcount),
2158 agno);
2159 }
2160
2161 agcnts->fdblocks += state.count;
2162
2163 libxfs_putbuf(agflbuf);
2164 }
2165
2166 static void
2167 validate_agf(
2168 struct xfs_agf *agf,
2169 xfs_agnumber_t agno,
2170 struct aghdr_cnts *agcnts)
2171 {
2172 xfs_agblock_t bno;
2173 uint32_t magic;
2174
2175 bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]);
2176 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2177 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_ABTB_CRC_MAGIC
2178 : XFS_ABTB_MAGIC;
2179 scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
2180 agno, 0, scan_allocbt, 1, magic, agcnts,
2181 &xfs_allocbt_buf_ops);
2182 } else {
2183 do_warn(_("bad agbno %u for btbno root, agno %d\n"),
2184 bno, agno);
2185 }
2186
2187 bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]);
2188 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2189 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_ABTC_CRC_MAGIC
2190 : XFS_ABTC_MAGIC;
2191 scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
2192 agno, 0, scan_allocbt, 1, magic, agcnts,
2193 &xfs_allocbt_buf_ops);
2194 } else {
2195 do_warn(_("bad agbno %u for btbcnt root, agno %d\n"),
2196 bno, agno);
2197 }
2198
2199 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
2200 struct rmap_priv priv;
2201
2202 memset(&priv.high_key, 0xFF, sizeof(priv.high_key));
2203 priv.high_key.rm_blockcount = 0;
2204 priv.agcnts = agcnts;
2205 priv.last_rec.rm_owner = XFS_RMAP_OWN_UNKNOWN;
2206 priv.nr_blocks = 0;
2207 bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
2208 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2209 scan_sbtree(bno,
2210 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]),
2211 agno, 0, scan_rmapbt, 1, XFS_RMAP_CRC_MAGIC,
2212 &priv, &xfs_rmapbt_buf_ops);
2213 if (be32_to_cpu(agf->agf_rmap_blocks) != priv.nr_blocks)
2214 do_warn(_("bad rmapbt block count %u, saw %u\n"),
2215 priv.nr_blocks,
2216 be32_to_cpu(agf->agf_rmap_blocks));
2217 } else {
2218 do_warn(_("bad agbno %u for rmapbt root, agno %d\n"),
2219 bno, agno);
2220 rmap_avoid_check();
2221 }
2222 }
2223
2224 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
2225 bno = be32_to_cpu(agf->agf_refcount_root);
2226 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2227 struct refc_priv priv;
2228
2229 memset(&priv, 0, sizeof(priv));
2230 scan_sbtree(bno,
2231 be32_to_cpu(agf->agf_refcount_level),
2232 agno, 0, scan_refcbt, 1, XFS_REFC_CRC_MAGIC,
2233 &priv, &xfs_refcountbt_buf_ops);
2234 if (be32_to_cpu(agf->agf_refcount_blocks) != priv.nr_blocks)
2235 do_warn(_("bad refcountbt block count %u, saw %u\n"),
2236 priv.nr_blocks,
2237 be32_to_cpu(agf->agf_refcount_blocks));
2238 } else {
2239 do_warn(_("bad agbno %u for refcntbt root, agno %d\n"),
2240 bno, agno);
2241 refcount_avoid_check();
2242 }
2243 }
2244
2245 if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) {
2246 do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
2247 be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, agno);
2248 }
2249
2250 if (be32_to_cpu(agf->agf_longest) != agcnts->agflongest) {
2251 do_warn(_("agf_longest %u, counted %u in ag %u\n"),
2252 be32_to_cpu(agf->agf_longest), agcnts->agflongest, agno);
2253 }
2254
2255 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2256 be32_to_cpu(agf->agf_btreeblks) != agcnts->agfbtreeblks) {
2257 do_warn(_("agf_btreeblks %u, counted %" PRIu64 " in ag %u\n"),
2258 be32_to_cpu(agf->agf_btreeblks), agcnts->agfbtreeblks, agno);
2259 }
2260
2261 }
2262
2263 static void
2264 validate_agi(
2265 struct xfs_agi *agi,
2266 xfs_agnumber_t agno,
2267 struct aghdr_cnts *agcnts)
2268 {
2269 xfs_agblock_t bno;
2270 int i;
2271 uint32_t magic;
2272
2273 bno = be32_to_cpu(agi->agi_root);
2274 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2275 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_IBT_CRC_MAGIC
2276 : XFS_IBT_MAGIC;
2277 scan_sbtree(bno, be32_to_cpu(agi->agi_level),
2278 agno, 0, scan_inobt, 1, magic, agcnts,
2279 &xfs_inobt_buf_ops);
2280 } else {
2281 do_warn(_("bad agbno %u for inobt root, agno %d\n"),
2282 be32_to_cpu(agi->agi_root), agno);
2283 }
2284
2285 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
2286 bno = be32_to_cpu(agi->agi_free_root);
2287 if (bno != 0 && verify_agbno(mp, agno, bno)) {
2288 magic = xfs_sb_version_hascrc(&mp->m_sb) ?
2289 XFS_FIBT_CRC_MAGIC : XFS_FIBT_MAGIC;
2290 scan_sbtree(bno, be32_to_cpu(agi->agi_free_level),
2291 agno, 0, scan_inobt, 1, magic, agcnts,
2292 &xfs_inobt_buf_ops);
2293 } else {
2294 do_warn(_("bad agbno %u for finobt root, agno %d\n"),
2295 be32_to_cpu(agi->agi_free_root), agno);
2296 }
2297 }
2298
2299 if (be32_to_cpu(agi->agi_count) != agcnts->agicount) {
2300 do_warn(_("agi_count %u, counted %u in ag %u\n"),
2301 be32_to_cpu(agi->agi_count), agcnts->agicount, agno);
2302 }
2303
2304 if (be32_to_cpu(agi->agi_freecount) != agcnts->agifreecount) {
2305 do_warn(_("agi_freecount %u, counted %u in ag %u\n"),
2306 be32_to_cpu(agi->agi_freecount), agcnts->agifreecount, agno);
2307 }
2308
2309 if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
2310 be32_to_cpu(agi->agi_freecount) != agcnts->fibtfreecount) {
2311 do_warn(_("agi_freecount %u, counted %u in ag %u finobt\n"),
2312 be32_to_cpu(agi->agi_freecount), agcnts->fibtfreecount,
2313 agno);
2314 }
2315
2316 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
2317 xfs_agino_t agino = be32_to_cpu(agi->agi_unlinked[i]);
2318
2319 if (agino != NULLAGINO) {
2320 do_warn(
2321 _("agi unlinked bucket %d is %u in ag %u (inode=%" PRIu64 ")\n"),
2322 i, agino, agno,
2323 XFS_AGINO_TO_INO(mp, agno, agino));
2324 }
2325 }
2326 }
2327
2328 /*
2329 * Scan an AG for obvious corruption.
2330 */
2331 static void
2332 scan_ag(
2333 struct workqueue*wq,
2334 xfs_agnumber_t agno,
2335 void *arg)
2336 {
2337 struct aghdr_cnts *agcnts = arg;
2338 struct xfs_agf *agf;
2339 struct xfs_buf *agfbuf = NULL;
2340 int agf_dirty = 0;
2341 struct xfs_agi *agi;
2342 struct xfs_buf *agibuf = NULL;
2343 int agi_dirty = 0;
2344 struct xfs_sb *sb = NULL;
2345 struct xfs_buf *sbbuf = NULL;
2346 int sb_dirty = 0;
2347 int status;
2348 char *objname = NULL;
2349
2350 sb = (struct xfs_sb *)calloc(BBTOB(XFS_FSS_TO_BB(mp, 1)), 1);
2351 if (!sb) {
2352 do_error(_("can't allocate memory for superblock\n"));
2353 return;
2354 }
2355
2356 sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
2357 XFS_FSS_TO_BB(mp, 1), 0, &xfs_sb_buf_ops);
2358 if (!sbbuf) {
2359 objname = _("root superblock");
2360 goto out_free_sb;
2361 }
2362 libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
2363
2364 agfbuf = libxfs_readbuf(mp->m_dev,
2365 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2366 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agf_buf_ops);
2367 if (!agfbuf) {
2368 objname = _("agf block");
2369 goto out_free_sbbuf;
2370 }
2371 agf = XFS_BUF_TO_AGF(agfbuf);
2372
2373 agibuf = libxfs_readbuf(mp->m_dev,
2374 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
2375 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agi_buf_ops);
2376 if (!agibuf) {
2377 objname = _("agi block");
2378 goto out_free_agfbuf;
2379 }
2380 agi = XFS_BUF_TO_AGI(agibuf);
2381
2382 /* fix up bad ag headers */
2383
2384 status = verify_set_agheader(mp, sbbuf, sb, agf, agi, agno);
2385
2386 if (status & XR_AG_SB_SEC) {
2387 if (!no_modify)
2388 sb_dirty = 1;
2389 /*
2390 * clear bad sector bit because we don't want
2391 * to skip further processing. we just want to
2392 * ensure that we write out the modified sb buffer.
2393 */
2394 status &= ~XR_AG_SB_SEC;
2395 }
2396 if (status & XR_AG_SB) {
2397 if (!no_modify) {
2398 do_warn(_("reset bad sb for ag %d\n"), agno);
2399 sb_dirty = 1;
2400 } else {
2401 do_warn(_("would reset bad sb for ag %d\n"), agno);
2402 }
2403 }
2404 if (status & XR_AG_AGF) {
2405 if (!no_modify) {
2406 do_warn(_("reset bad agf for ag %d\n"), agno);
2407 agf_dirty = 1;
2408 } else {
2409 do_warn(_("would reset bad agf for ag %d\n"), agno);
2410 }
2411 }
2412 if (status & XR_AG_AGI) {
2413 if (!no_modify) {
2414 do_warn(_("reset bad agi for ag %d\n"), agno);
2415 agi_dirty = 1;
2416 } else {
2417 do_warn(_("would reset bad agi for ag %d\n"), agno);
2418 }
2419 }
2420
2421 if (status && no_modify) {
2422 do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
2423 agno);
2424 goto out_free_agibuf;
2425 }
2426
2427 scan_freelist(agf, agcnts);
2428
2429 validate_agf(agf, agno, agcnts);
2430 validate_agi(agi, agno, agcnts);
2431
2432 ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
2433 ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
2434 ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
2435
2436 /*
2437 * Only pay attention to CRC/verifier errors if we can correct them.
2438 * Note that we can get uncorrected EFSCORRUPTED errors here because
2439 * the verifier will flag on out of range values that we can't correct
2440 * until phase 5 when we have all the information necessary to rebuild
2441 * the freespace/inode btrees. We can correct bad CRC errors
2442 * immediately, though.
2443 */
2444 if (!no_modify) {
2445 agi_dirty += (agibuf->b_error == -EFSBADCRC);
2446 agf_dirty += (agfbuf->b_error == -EFSBADCRC);
2447 sb_dirty += (sbbuf->b_error == -EFSBADCRC);
2448 }
2449
2450 if (agi_dirty && !no_modify)
2451 libxfs_writebuf(agibuf, 0);
2452 else
2453 libxfs_putbuf(agibuf);
2454
2455 if (agf_dirty && !no_modify)
2456 libxfs_writebuf(agfbuf, 0);
2457 else
2458 libxfs_putbuf(agfbuf);
2459
2460 if (sb_dirty && !no_modify) {
2461 if (agno == 0)
2462 memcpy(&mp->m_sb, sb, sizeof(xfs_sb_t));
2463 libxfs_sb_to_disk(XFS_BUF_TO_SBP(sbbuf), sb);
2464 libxfs_writebuf(sbbuf, 0);
2465 } else
2466 libxfs_putbuf(sbbuf);
2467 free(sb);
2468 PROG_RPT_INC(prog_rpt_done[agno], 1);
2469
2470 #ifdef XR_INODE_TRACE
2471 print_inode_list(i);
2472 #endif
2473 return;
2474
2475 out_free_agibuf:
2476 libxfs_putbuf(agibuf);
2477 out_free_agfbuf:
2478 libxfs_putbuf(agfbuf);
2479 out_free_sbbuf:
2480 libxfs_putbuf(sbbuf);
2481 out_free_sb:
2482 free(sb);
2483
2484 if (objname)
2485 do_error(_("can't get %s for ag %d\n"), objname, agno);
2486 }
2487
2488 void
2489 scan_ags(
2490 struct xfs_mount *mp,
2491 int scan_threads)
2492 {
2493 struct aghdr_cnts *agcnts;
2494 uint64_t fdblocks = 0;
2495 uint64_t icount = 0;
2496 uint64_t ifreecount = 0;
2497 uint64_t usedblocks = 0;
2498 xfs_agnumber_t i;
2499 struct workqueue wq;
2500
2501 agcnts = malloc(mp->m_sb.sb_agcount * sizeof(*agcnts));
2502 if (!agcnts) {
2503 do_abort(_("no memory for ag header counts\n"));
2504 return;
2505 }
2506 memset(agcnts, 0, mp->m_sb.sb_agcount * sizeof(*agcnts));
2507
2508 create_work_queue(&wq, mp, scan_threads);
2509
2510 for (i = 0; i < mp->m_sb.sb_agcount; i++)
2511 queue_work(&wq, scan_ag, i, &agcnts[i]);
2512
2513 destroy_work_queue(&wq);
2514
2515 /* tally up the counts */
2516 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
2517 fdblocks += agcnts[i].fdblocks;
2518 icount += agcnts[i].agicount;
2519 ifreecount += agcnts[i].ifreecount;
2520 usedblocks += agcnts[i].usedblocks;
2521 }
2522
2523 free(agcnts);
2524
2525 /*
2526 * Validate that our manual counts match the superblock.
2527 */
2528 if (mp->m_sb.sb_icount != icount) {
2529 do_warn(_("sb_icount %" PRIu64 ", counted %" PRIu64 "\n"),
2530 mp->m_sb.sb_icount, icount);
2531 }
2532
2533 if (mp->m_sb.sb_ifree != ifreecount) {
2534 do_warn(_("sb_ifree %" PRIu64 ", counted %" PRIu64 "\n"),
2535 mp->m_sb.sb_ifree, ifreecount);
2536 }
2537
2538 if (mp->m_sb.sb_fdblocks != fdblocks) {
2539 do_warn(_("sb_fdblocks %" PRIu64 ", counted %" PRIu64 "\n"),
2540 mp->m_sb.sb_fdblocks, fdblocks);
2541 }
2542
2543 if (usedblocks &&
2544 usedblocks != mp->m_sb.sb_dblocks - fdblocks) {
2545 do_warn(_("used blocks %" PRIu64 ", counted %" PRIu64 "\n"),
2546 mp->m_sb.sb_dblocks - fdblocks, usedblocks);
2547 }
2548 }