]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/scan.c
xfsprogs: Remove trailing blanks on various places
[thirdparty/xfsprogs-dev.git] / repair / scan.c
1 /*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "libxfs.h"
20 #include "avl.h"
21 #include "globals.h"
22 #include "agheader.h"
23 #include "incore.h"
24 #include "protos.h"
25 #include "err_protos.h"
26 #include "dinode.h"
27 #include "scan.h"
28 #include "versions.h"
29 #include "bmap.h"
30 #include "progress.h"
31 #include "threads.h"
32
33 static xfs_mount_t *mp = NULL;
34
35 /*
36 * Variables to validate AG header values against the manual count
37 * from the btree traversal.
38 */
39 struct aghdr_cnts {
40 xfs_agnumber_t agno;
41 xfs_extlen_t agffreeblks;
42 xfs_extlen_t agflongest;
43 __uint64_t agfbtreeblks;
44 __uint32_t agicount;
45 __uint32_t agifreecount;
46 __uint64_t fdblocks;
47 __uint64_t ifreecount;
48 __uint32_t fibtfreecount;
49 };
50
51 void
52 set_mp(xfs_mount_t *mpp)
53 {
54 libxfs_bcache_purge();
55 mp = mpp;
56 }
57
58 static void
59 scan_sbtree(
60 xfs_agblock_t root,
61 int nlevels,
62 xfs_agnumber_t agno,
63 int suspect,
64 void (*func)(struct xfs_btree_block *block,
65 int level,
66 xfs_agblock_t bno,
67 xfs_agnumber_t agno,
68 int suspect,
69 int isroot,
70 __uint32_t magic,
71 void *priv),
72 int isroot,
73 __uint32_t magic,
74 void *priv,
75 const struct xfs_buf_ops *ops)
76 {
77 xfs_buf_t *bp;
78
79 bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, root),
80 XFS_FSB_TO_BB(mp, 1), 0, ops);
81 if (!bp) {
82 do_error(_("can't read btree block %d/%d\n"), agno, root);
83 return;
84 }
85 if (bp->b_error == -EFSBADCRC || bp->b_error == -EFSCORRUPTED) {
86 do_warn(_("btree block %d/%d is suspect, error %d\n"),
87 agno, root, bp->b_error);
88 suspect = 1;
89 }
90
91 (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1, root, agno, suspect,
92 isroot, magic, priv);
93 libxfs_putbuf(bp);
94 }
95
96 /*
97 * returns 1 on bad news (inode needs to be cleared), 0 on good
98 */
99 int
100 scan_lbtree(
101 xfs_fsblock_t root,
102 int nlevels,
103 int (*func)(struct xfs_btree_block *block,
104 int level,
105 int type,
106 int whichfork,
107 xfs_fsblock_t bno,
108 xfs_ino_t ino,
109 xfs_rfsblock_t *tot,
110 __uint64_t *nex,
111 blkmap_t **blkmapp,
112 bmap_cursor_t *bm_cursor,
113 int isroot,
114 int check_dups,
115 int *dirty,
116 __uint64_t magic),
117 int type,
118 int whichfork,
119 xfs_ino_t ino,
120 xfs_rfsblock_t *tot,
121 __uint64_t *nex,
122 blkmap_t **blkmapp,
123 bmap_cursor_t *bm_cursor,
124 int isroot,
125 int check_dups,
126 __uint64_t magic,
127 const struct xfs_buf_ops *ops)
128 {
129 xfs_buf_t *bp;
130 int err;
131 int dirty = 0;
132 bool badcrc = false;
133
134 bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, root),
135 XFS_FSB_TO_BB(mp, 1), 0, ops);
136 if (!bp) {
137 do_error(_("can't read btree block %d/%d\n"),
138 XFS_FSB_TO_AGNO(mp, root),
139 XFS_FSB_TO_AGBNO(mp, root));
140 return(1);
141 }
142
143 /*
144 * only check for bad CRC here - caller will determine if there
145 * is a corruption or not and whether it got corrected and so needs
146 * writing back. CRC errors always imply we need to write the block.
147 */
148 if (bp->b_error == -EFSBADCRC) {
149 do_warn(_("btree block %d/%d is suspect, error %d\n"),
150 XFS_FSB_TO_AGNO(mp, root),
151 XFS_FSB_TO_AGBNO(mp, root), bp->b_error);
152 badcrc = true;
153 }
154
155 err = (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1,
156 type, whichfork, root, ino, tot, nex, blkmapp,
157 bm_cursor, isroot, check_dups, &dirty,
158 magic);
159
160 ASSERT(dirty == 0 || (dirty && !no_modify));
161
162 if ((dirty || badcrc) && !no_modify)
163 libxfs_writebuf(bp, 0);
164 else
165 libxfs_putbuf(bp);
166
167 return(err);
168 }
169
170 int
171 scan_bmapbt(
172 struct xfs_btree_block *block,
173 int level,
174 int type,
175 int whichfork,
176 xfs_fsblock_t bno,
177 xfs_ino_t ino,
178 xfs_rfsblock_t *tot,
179 __uint64_t *nex,
180 blkmap_t **blkmapp,
181 bmap_cursor_t *bm_cursor,
182 int isroot,
183 int check_dups,
184 int *dirty,
185 __uint64_t magic)
186 {
187 int i;
188 int err;
189 xfs_bmbt_ptr_t *pp;
190 xfs_bmbt_key_t *pkey;
191 xfs_bmbt_rec_t *rp;
192 xfs_fileoff_t first_key;
193 xfs_fileoff_t last_key;
194 char *forkname = get_forkname(whichfork);
195 int numrecs;
196 xfs_agnumber_t agno;
197 xfs_agblock_t agbno;
198 int state;
199
200 /*
201 * unlike the ag freeblock btrees, if anything looks wrong
202 * in an inode bmap tree, just bail. it's possible that
203 * we'll miss a case where the to-be-toasted inode and
204 * another inode are claiming the same block but that's
205 * highly unlikely.
206 */
207 if (be32_to_cpu(block->bb_magic) != magic) {
208 do_warn(
209 _("bad magic # %#x in inode %" PRIu64 " (%s fork) bmbt block %" PRIu64 "\n"),
210 be32_to_cpu(block->bb_magic), ino, forkname, bno);
211 return(1);
212 }
213 if (be16_to_cpu(block->bb_level) != level) {
214 do_warn(
215 _("expected level %d got %d in inode %" PRIu64 ", (%s fork) bmbt block %" PRIu64 "\n"),
216 level, be16_to_cpu(block->bb_level),
217 ino, forkname, bno);
218 return(1);
219 }
220
221 if (magic == XFS_BMAP_CRC_MAGIC) {
222 /* verify owner */
223 if (be64_to_cpu(block->bb_u.l.bb_owner) != ino) {
224 do_warn(
225 _("expected owner inode %" PRIu64 ", got %llu, bmbt block %" PRIu64 "\n"),
226 ino, be64_to_cpu(block->bb_u.l.bb_owner), bno);
227 return 1;
228 }
229 /* verify block number */
230 if (be64_to_cpu(block->bb_u.l.bb_blkno) !=
231 XFS_FSB_TO_DADDR(mp, bno)) {
232 do_warn(
233 _("expected block %" PRIu64 ", got %llu, bmbt block %" PRIu64 "\n"),
234 XFS_FSB_TO_DADDR(mp, bno),
235 be64_to_cpu(block->bb_u.l.bb_blkno), bno);
236 return 1;
237 }
238 /* verify uuid */
239 if (platform_uuid_compare(&block->bb_u.l.bb_uuid,
240 &mp->m_sb.sb_meta_uuid) != 0) {
241 do_warn(
242 _("wrong FS UUID, bmbt block %" PRIu64 "\n"),
243 bno);
244 return 1;
245 }
246 }
247
248 if (check_dups == 0) {
249 /*
250 * check sibling pointers. if bad we have a conflict
251 * between the sibling pointers and the child pointers
252 * in the parent block. blow out the inode if that happens
253 */
254 if (bm_cursor->level[level].fsbno != NULLFSBLOCK) {
255 /*
256 * this is not the first block on this level
257 * so the cursor for this level has recorded the
258 * values for this's block left-sibling.
259 */
260 if (bno != bm_cursor->level[level].right_fsbno) {
261 do_warn(
262 _("bad fwd (right) sibling pointer (saw %" PRIu64 " parent block says %" PRIu64 ")\n"
263 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
264 bm_cursor->level[level].right_fsbno,
265 bno, ino, forkname,
266 bm_cursor->level[level].fsbno);
267 return(1);
268 }
269 if (be64_to_cpu(block->bb_u.l.bb_leftsib) !=
270 bm_cursor->level[level].fsbno) {
271 do_warn(
272 _("bad back (left) sibling pointer (saw %llu parent block says %" PRIu64 ")\n"
273 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
274 (unsigned long long)
275 be64_to_cpu(block->bb_u.l.bb_leftsib),
276 bm_cursor->level[level].fsbno,
277 ino, forkname, bno);
278 return(1);
279 }
280 } else {
281 /*
282 * This is the first or only block on this level.
283 * Check that the left sibling pointer is NULL
284 */
285 if (be64_to_cpu(block->bb_u.l.bb_leftsib) != NULLFSBLOCK) {
286 do_warn(
287 _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
288 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
289 (unsigned long long)
290 be64_to_cpu(block->bb_u.l.bb_leftsib),
291 ino, forkname, bno);
292 return(1);
293 }
294 }
295
296 /*
297 * update cursor block pointers to reflect this block
298 */
299 bm_cursor->level[level].fsbno = bno;
300 bm_cursor->level[level].left_fsbno =
301 be64_to_cpu(block->bb_u.l.bb_leftsib);
302 bm_cursor->level[level].right_fsbno =
303 be64_to_cpu(block->bb_u.l.bb_rightsib);
304
305 agno = XFS_FSB_TO_AGNO(mp, bno);
306 agbno = XFS_FSB_TO_AGBNO(mp, bno);
307
308 pthread_mutex_lock(&ag_locks[agno].lock);
309 state = get_bmap(agno, agbno);
310 switch (state) {
311 case XR_E_UNKNOWN:
312 case XR_E_FREE1:
313 case XR_E_FREE:
314 set_bmap(agno, agbno, XR_E_INUSE);
315 break;
316 case XR_E_FS_MAP:
317 case XR_E_INUSE:
318 /*
319 * we'll try and continue searching here since
320 * the block looks like it's been claimed by file
321 * to store user data, a directory to store directory
322 * data, or the space allocation btrees but since
323 * we made it here, the block probably
324 * contains btree data.
325 */
326 set_bmap(agno, agbno, XR_E_MULT);
327 do_warn(
328 _("inode 0x%" PRIx64 "bmap block 0x%" PRIx64 " claimed, state is %d\n"),
329 ino, bno, state);
330 break;
331 case XR_E_MULT:
332 case XR_E_INUSE_FS:
333 set_bmap(agno, agbno, XR_E_MULT);
334 do_warn(
335 _("inode 0x%" PRIx64 " bmap block 0x%" PRIx64 " claimed, state is %d\n"),
336 ino, bno, state);
337 /*
338 * if we made it to here, this is probably a bmap block
339 * that is being used by *another* file as a bmap block
340 * so the block will be valid. Both files should be
341 * trashed along with any other file that impinges on
342 * any blocks referenced by either file. So we
343 * continue searching down this btree to mark all
344 * blocks duplicate
345 */
346 break;
347 case XR_E_BAD_STATE:
348 default:
349 do_warn(
350 _("bad state %d, inode %" PRIu64 " bmap block 0x%" PRIx64 "\n"),
351 state, ino, bno);
352 break;
353 }
354 pthread_mutex_unlock(&ag_locks[agno].lock);
355 } else {
356 /*
357 * attribute fork for realtime files is in the regular
358 * filesystem
359 */
360 if (type != XR_INO_RTDATA || whichfork != XFS_DATA_FORK) {
361 if (search_dup_extent(XFS_FSB_TO_AGNO(mp, bno),
362 XFS_FSB_TO_AGBNO(mp, bno),
363 XFS_FSB_TO_AGBNO(mp, bno) + 1))
364 return(1);
365 } else {
366 if (search_rt_dup_extent(mp, bno))
367 return(1);
368 }
369 }
370 (*tot)++;
371 numrecs = be16_to_cpu(block->bb_numrecs);
372
373 if (level == 0) {
374 if (numrecs > mp->m_bmap_dmxr[0] || (isroot == 0 && numrecs <
375 mp->m_bmap_dmnr[0])) {
376 do_warn(
377 _("inode %" PRIu64 " bad # of bmap records (%u, min - %u, max - %u)\n"),
378 ino, numrecs, mp->m_bmap_dmnr[0],
379 mp->m_bmap_dmxr[0]);
380 return(1);
381 }
382 rp = XFS_BMBT_REC_ADDR(mp, block, 1);
383 *nex += numrecs;
384 /*
385 * XXX - if we were going to fix up the btree record,
386 * we'd do it right here. For now, if there's a problem,
387 * we'll bail out and presumably clear the inode.
388 */
389 if (check_dups == 0) {
390 err = process_bmbt_reclist(mp, rp, &numrecs, type, ino,
391 tot, blkmapp, &first_key,
392 &last_key, whichfork);
393 if (err)
394 return 1;
395
396 /*
397 * check that key ordering is monotonically increasing.
398 * if the last_key value in the cursor is set to
399 * NULLFILEOFF, then we know this is the first block
400 * on the leaf level and we shouldn't check the
401 * last_key value.
402 */
403 if (first_key <= bm_cursor->level[level].last_key &&
404 bm_cursor->level[level].last_key !=
405 NULLFILEOFF) {
406 do_warn(
407 _("out-of-order bmap key (file offset) in inode %" PRIu64 ", %s fork, fsbno %" PRIu64 "\n"),
408 ino, forkname, bno);
409 return(1);
410 }
411 /*
412 * update cursor keys to reflect this block.
413 * don't have to check if last_key is > first_key
414 * since that gets checked by process_bmbt_reclist.
415 */
416 bm_cursor->level[level].first_key = first_key;
417 bm_cursor->level[level].last_key = last_key;
418
419 return 0;
420 } else {
421 return scan_bmbt_reclist(mp, rp, &numrecs, type, ino,
422 tot, whichfork);
423 }
424 }
425 if (numrecs > mp->m_bmap_dmxr[1] || (isroot == 0 && numrecs <
426 mp->m_bmap_dmnr[1])) {
427 do_warn(
428 _("inode %" PRIu64 " bad # of bmap records (%u, min - %u, max - %u)\n"),
429 ino, numrecs, mp->m_bmap_dmnr[1], mp->m_bmap_dmxr[1]);
430 return(1);
431 }
432 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
433 pkey = XFS_BMBT_KEY_ADDR(mp, block, 1);
434
435 last_key = NULLFILEOFF;
436
437 for (i = 0, err = 0; i < numrecs; i++) {
438 /*
439 * XXX - if we were going to fix up the interior btree nodes,
440 * we'd do it right here. For now, if there's a problem,
441 * we'll bail out and presumably clear the inode.
442 */
443 if (!verify_dfsbno(mp, be64_to_cpu(pp[i]))) {
444 do_warn(
445 _("bad bmap btree ptr 0x%llx in ino %" PRIu64 "\n"),
446 (unsigned long long) be64_to_cpu(pp[i]), ino);
447 return(1);
448 }
449
450 err = scan_lbtree(be64_to_cpu(pp[i]), level, scan_bmapbt,
451 type, whichfork, ino, tot, nex, blkmapp,
452 bm_cursor, 0, check_dups, magic,
453 &xfs_bmbt_buf_ops);
454 if (err)
455 return(1);
456
457 /*
458 * fix key (offset) mismatches between the first key
459 * in the child block (as recorded in the cursor) and the
460 * key in the interior node referencing the child block.
461 *
462 * fixes cases where entries have been shifted between
463 * child blocks but the parent hasn't been updated. We
464 * don't have to worry about the key values in the cursor
465 * not being set since we only look at the key values of
466 * our child and those are guaranteed to be set by the
467 * call to scan_lbtree() above.
468 */
469 if (check_dups == 0 && be64_to_cpu(pkey[i].br_startoff) !=
470 bm_cursor->level[level-1].first_key) {
471 if (!no_modify) {
472 do_warn(
473 _("correcting bt key (was %llu, now %" PRIu64 ") in inode %" PRIu64 "\n"
474 "\t\t%s fork, btree block %" PRIu64 "\n"),
475 (unsigned long long)
476 be64_to_cpu(pkey[i].br_startoff),
477 bm_cursor->level[level-1].first_key,
478 ino,
479 forkname, bno);
480 *dirty = 1;
481 pkey[i].br_startoff = cpu_to_be64(
482 bm_cursor->level[level-1].first_key);
483 } else {
484 do_warn(
485 _("bad btree key (is %llu, should be %" PRIu64 ") in inode %" PRIu64 "\n"
486 "\t\t%s fork, btree block %" PRIu64 "\n"),
487 (unsigned long long)
488 be64_to_cpu(pkey[i].br_startoff),
489 bm_cursor->level[level-1].first_key,
490 ino, forkname, bno);
491 }
492 }
493 }
494
495 /*
496 * If we're the last node at our level, check that the last child
497 * block's forward sibling pointer is NULL.
498 */
499 if (check_dups == 0 &&
500 bm_cursor->level[level].right_fsbno == NULLFSBLOCK &&
501 bm_cursor->level[level - 1].right_fsbno != NULLFSBLOCK) {
502 do_warn(
503 _("bad fwd (right) sibling pointer (saw %" PRIu64 " should be NULLFSBLOCK)\n"
504 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
505 bm_cursor->level[level - 1].right_fsbno,
506 ino, forkname, bm_cursor->level[level - 1].fsbno);
507 return(1);
508 }
509
510 /*
511 * update cursor keys to reflect this block
512 */
513 if (check_dups == 0) {
514 bm_cursor->level[level].first_key =
515 be64_to_cpu(pkey[0].br_startoff);
516 bm_cursor->level[level].last_key =
517 be64_to_cpu(pkey[numrecs - 1].br_startoff);
518 }
519
520 return(0);
521 }
522
523 static void
524 scan_allocbt(
525 struct xfs_btree_block *block,
526 int level,
527 xfs_agblock_t bno,
528 xfs_agnumber_t agno,
529 int suspect,
530 int isroot,
531 __uint32_t magic,
532 void *priv)
533 {
534 struct aghdr_cnts *agcnts = priv;
535 const char *name;
536 int i;
537 xfs_alloc_ptr_t *pp;
538 xfs_alloc_rec_t *rp;
539 int hdr_errors = 0;
540 int numrecs;
541 int state;
542 xfs_extlen_t lastcount = 0;
543 xfs_agblock_t lastblock = 0;
544
545 switch (magic) {
546 case XFS_ABTB_CRC_MAGIC:
547 case XFS_ABTB_MAGIC:
548 name = "bno";
549 break;
550 case XFS_ABTC_CRC_MAGIC:
551 case XFS_ABTC_MAGIC:
552 name = "cnt";
553 break;
554 default:
555 name = "(unknown)";
556 assert(0);
557 break;
558 }
559
560 if (be32_to_cpu(block->bb_magic) != magic) {
561 do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
562 be32_to_cpu(block->bb_magic), name, agno, bno);
563 hdr_errors++;
564 if (suspect)
565 return;
566 }
567
568 /*
569 * All freespace btree blocks except the roots are freed for a
570 * fully used filesystem, thus they are counted towards the
571 * free data block counter.
572 */
573 if (!isroot) {
574 agcnts->agfbtreeblks++;
575 agcnts->fdblocks++;
576 }
577
578 if (be16_to_cpu(block->bb_level) != level) {
579 do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
580 level, be16_to_cpu(block->bb_level), name, agno, bno);
581 hdr_errors++;
582 if (suspect)
583 return;
584 }
585
586 /*
587 * check for btree blocks multiply claimed
588 */
589 state = get_bmap(agno, bno);
590 if (state != XR_E_UNKNOWN) {
591 set_bmap(agno, bno, XR_E_MULT);
592 do_warn(
593 _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
594 name, state, agno, bno, suspect);
595 return;
596 }
597 set_bmap(agno, bno, XR_E_FS_MAP);
598
599 numrecs = be16_to_cpu(block->bb_numrecs);
600
601 if (level == 0) {
602 if (numrecs > mp->m_alloc_mxr[0]) {
603 numrecs = mp->m_alloc_mxr[0];
604 hdr_errors++;
605 }
606 if (isroot == 0 && numrecs < mp->m_alloc_mnr[0]) {
607 numrecs = mp->m_alloc_mnr[0];
608 hdr_errors++;
609 }
610
611 if (hdr_errors) {
612 do_warn(
613 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
614 be16_to_cpu(block->bb_numrecs),
615 mp->m_alloc_mnr[0], mp->m_alloc_mxr[0],
616 name, agno, bno);
617 suspect++;
618 }
619
620 rp = XFS_ALLOC_REC_ADDR(mp, block, 1);
621 for (i = 0; i < numrecs; i++) {
622 xfs_agblock_t b, end;
623 xfs_extlen_t len, blen;
624
625 b = be32_to_cpu(rp[i].ar_startblock);
626 len = be32_to_cpu(rp[i].ar_blockcount);
627 end = b + len;
628
629 if (b == 0 || !verify_agbno(mp, agno, b)) {
630 do_warn(
631 _("invalid start block %u in record %u of %s btree block %u/%u\n"),
632 b, i, name, agno, bno);
633 continue;
634 }
635 if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
636 do_warn(
637 _("invalid length %u in record %u of %s btree block %u/%u\n"),
638 len, i, name, agno, bno);
639 continue;
640 }
641
642 if (magic == XFS_ABTB_MAGIC ||
643 magic == XFS_ABTB_CRC_MAGIC) {
644 if (b <= lastblock) {
645 do_warn(_(
646 "out-of-order bno btree record %d (%u %u) block %u/%u\n"),
647 i, b, len, agno, bno);
648 } else {
649 lastblock = b;
650 }
651 } else {
652 agcnts->fdblocks += len;
653 agcnts->agffreeblks += len;
654 if (len > agcnts->agflongest)
655 agcnts->agflongest = len;
656 if (len < lastcount) {
657 do_warn(_(
658 "out-of-order cnt btree record %d (%u %u) block %u/%u\n"),
659 i, b, len, agno, bno);
660 } else {
661 lastcount = len;
662 }
663 }
664
665 for ( ; b < end; b += blen) {
666 state = get_bmap_ext(agno, b, end, &blen);
667 switch (state) {
668 case XR_E_UNKNOWN:
669 set_bmap(agno, b, XR_E_FREE1);
670 break;
671 case XR_E_FREE1:
672 /*
673 * no warning messages -- we'll catch
674 * FREE1 blocks later
675 */
676 if (magic == XFS_ABTC_MAGIC ||
677 magic == XFS_ABTC_CRC_MAGIC) {
678 set_bmap_ext(agno, b, blen,
679 XR_E_FREE);
680 break;
681 }
682 default:
683 do_warn(
684 _("block (%d,%d-%d) multiply claimed by %s space tree, state - %d\n"),
685 agno, b, b + blen - 1,
686 name, state);
687 break;
688 }
689 }
690 }
691 return;
692 }
693
694 /*
695 * interior record
696 */
697 pp = XFS_ALLOC_PTR_ADDR(mp, block, 1, mp->m_alloc_mxr[1]);
698
699 if (numrecs > mp->m_alloc_mxr[1]) {
700 numrecs = mp->m_alloc_mxr[1];
701 hdr_errors++;
702 }
703 if (isroot == 0 && numrecs < mp->m_alloc_mnr[1]) {
704 numrecs = mp->m_alloc_mnr[1];
705 hdr_errors++;
706 }
707
708 /*
709 * don't pass bogus tree flag down further if this block
710 * looked ok. bail out if two levels in a row look bad.
711 */
712 if (hdr_errors) {
713 do_warn(
714 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
715 be16_to_cpu(block->bb_numrecs),
716 mp->m_alloc_mnr[1], mp->m_alloc_mxr[1],
717 name, agno, bno);
718 if (suspect)
719 return;
720 suspect++;
721 } else if (suspect) {
722 suspect = 0;
723 }
724
725 for (i = 0; i < numrecs; i++) {
726 xfs_agblock_t bno = be32_to_cpu(pp[i]);
727
728 /*
729 * XXX - put sibling detection right here.
730 * we know our sibling chain is good. So as we go,
731 * we check the entry before and after each entry.
732 * If either of the entries references a different block,
733 * check the sibling pointer. If there's a sibling
734 * pointer mismatch, try and extract as much data
735 * as possible.
736 */
737 if (bno != 0 && verify_agbno(mp, agno, bno)) {
738 switch (magic) {
739 case XFS_ABTB_CRC_MAGIC:
740 case XFS_ABTB_MAGIC:
741 scan_sbtree(bno, level, agno, suspect,
742 scan_allocbt, 0, magic, priv,
743 &xfs_allocbt_buf_ops);
744 break;
745 case XFS_ABTC_CRC_MAGIC:
746 case XFS_ABTC_MAGIC:
747 scan_sbtree(bno, level, agno, suspect,
748 scan_allocbt, 0, magic, priv,
749 &xfs_allocbt_buf_ops);
750 break;
751 }
752 }
753 }
754 }
755
756 static bool
757 ino_issparse(
758 struct xfs_inobt_rec *rp,
759 int offset)
760 {
761 if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
762 return false;
763
764 return xfs_inobt_is_sparse_disk(rp, offset);
765 }
766
767 /*
768 * The following helpers are to help process and validate individual on-disk
769 * inode btree records. We have two possible inode btrees with slightly
770 * different semantics. Many of the validations and actions are equivalent, such
771 * as record alignment constraints, etc. Other validations differ, such as the
772 * fact that the inode chunk block allocation state is set by the content of the
773 * core inobt and verified by the content of the finobt.
774 *
775 * The following structures are used to facilitate common validation routines
776 * where the only difference between validation of the inobt or finobt might be
777 * the error messages that results in the event of failure.
778 */
779
780 enum inobt_type {
781 INOBT,
782 FINOBT
783 };
784 const char *inobt_names[] = {
785 "inobt",
786 "finobt"
787 };
788
789 static int
790 verify_single_ino_chunk_align(
791 xfs_agnumber_t agno,
792 enum inobt_type type,
793 struct xfs_inobt_rec *rp,
794 int suspect,
795 bool *skip)
796 {
797 const char *inobt_name = inobt_names[type];
798 xfs_ino_t lino;
799 xfs_agino_t ino;
800 xfs_agblock_t agbno;
801 int off;
802
803 *skip = false;
804 ino = be32_to_cpu(rp->ir_startino);
805 off = XFS_AGINO_TO_OFFSET(mp, ino);
806 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
807 lino = XFS_AGINO_TO_INO(mp, agno, ino);
808
809 /*
810 * on multi-block block chunks, all chunks start at the beginning of the
811 * block. with multi-chunk blocks, all chunks must start on 64-inode
812 * boundaries since each block can hold N complete chunks. if fs has
813 * aligned inodes, all chunks must start at a fs_ino_alignment*N'th
814 * agbno. skip recs with badly aligned starting inodes.
815 */
816 if (ino == 0 ||
817 (inodes_per_block <= XFS_INODES_PER_CHUNK && off != 0) ||
818 (inodes_per_block > XFS_INODES_PER_CHUNK &&
819 off % XFS_INODES_PER_CHUNK != 0) ||
820 (fs_aligned_inodes && fs_ino_alignment &&
821 agbno % fs_ino_alignment != 0)) {
822 do_warn(
823 _("badly aligned %s rec (starting inode = %" PRIu64 ")\n"),
824 inobt_name, lino);
825 suspect++;
826 }
827
828 /*
829 * verify numeric validity of inode chunk first before inserting into a
830 * tree. don't have to worry about the overflow case because the
831 * starting ino number of a chunk can only get within 255 inodes of max
832 * (NULLAGINO). if it gets closer, the agino number will be illegal as
833 * the agbno will be too large.
834 */
835 if (verify_aginum(mp, agno, ino)) {
836 do_warn(
837 _("bad starting inode # (%" PRIu64 " (0x%x 0x%x)) in %s rec, skipping rec\n"),
838 lino, agno, ino, inobt_name);
839 *skip = true;
840 return ++suspect;
841 }
842
843 if (verify_aginum(mp, agno,
844 ino + XFS_INODES_PER_CHUNK - 1)) {
845 do_warn(
846 _("bad ending inode # (%" PRIu64 " (0x%x 0x%zx)) in %s rec, skipping rec\n"),
847 lino + XFS_INODES_PER_CHUNK - 1,
848 agno,
849 ino + XFS_INODES_PER_CHUNK - 1,
850 inobt_name);
851 *skip = true;
852 return ++suspect;
853 }
854
855 return suspect;
856 }
857
858 /*
859 * Process the state of individual inodes in an on-disk inobt record and import
860 * into the appropriate in-core tree based on whether the on-disk tree is
861 * suspect. Return the total and free inode counts based on the record free and
862 * hole masks.
863 */
864 static int
865 import_single_ino_chunk(
866 xfs_agnumber_t agno,
867 enum inobt_type type,
868 struct xfs_inobt_rec *rp,
869 int suspect,
870 int *p_nfree,
871 int *p_ninodes)
872 {
873 struct ino_tree_node *ino_rec = NULL;
874 const char *inobt_name = inobt_names[type];
875 xfs_agino_t ino;
876 int j;
877 int nfree;
878 int ninodes;
879
880 ino = be32_to_cpu(rp->ir_startino);
881
882 if (!suspect) {
883 if (XFS_INOBT_IS_FREE_DISK(rp, 0))
884 ino_rec = set_inode_free_alloc(mp, agno, ino);
885 else
886 ino_rec = set_inode_used_alloc(mp, agno, ino);
887 for (j = 1; j < XFS_INODES_PER_CHUNK; j++) {
888 if (XFS_INOBT_IS_FREE_DISK(rp, j))
889 set_inode_free(ino_rec, j);
890 else
891 set_inode_used(ino_rec, j);
892 }
893 } else {
894 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
895 if (XFS_INOBT_IS_FREE_DISK(rp, j))
896 add_aginode_uncertain(mp, agno, ino + j, 1);
897 else
898 add_aginode_uncertain(mp, agno, ino + j, 0);
899 }
900 }
901
902 /*
903 * Mark sparse inodes as such in the in-core tree. Verify that sparse
904 * inodes are free and that freecount is consistent with the free mask.
905 */
906 nfree = ninodes = 0;
907 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
908 if (ino_issparse(rp, j)) {
909 if (!suspect && !XFS_INOBT_IS_FREE_DISK(rp, j)) {
910 do_warn(
911 _("ir_holemask/ir_free mismatch, %s chunk %d/%u, holemask 0x%x free 0x%llx\n"),
912 inobt_name, agno, ino,
913 be16_to_cpu(rp->ir_u.sp.ir_holemask),
914 be64_to_cpu(rp->ir_free));
915 suspect++;
916 }
917 if (!suspect && ino_rec)
918 set_inode_sparse(ino_rec, j);
919 } else {
920 /* count fields track non-sparse inos */
921 if (XFS_INOBT_IS_FREE_DISK(rp, j))
922 nfree++;
923 ninodes++;
924 }
925 }
926
927 *p_nfree = nfree;
928 *p_ninodes = ninodes;
929
930 return suspect;
931 }
932
933 static int
934 scan_single_ino_chunk(
935 xfs_agnumber_t agno,
936 xfs_inobt_rec_t *rp,
937 int suspect)
938 {
939 xfs_ino_t lino;
940 xfs_agino_t ino;
941 xfs_agblock_t agbno;
942 int j;
943 int nfree;
944 int ninodes;
945 int off;
946 int state;
947 ino_tree_node_t *first_rec, *last_rec;
948 int freecount;
949 bool skip = false;
950
951 ino = be32_to_cpu(rp->ir_startino);
952 off = XFS_AGINO_TO_OFFSET(mp, ino);
953 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
954 lino = XFS_AGINO_TO_INO(mp, agno, ino);
955 freecount = inorec_get_freecount(mp, rp);
956
957 /*
958 * Verify record alignment, start/end inode numbers, etc.
959 */
960 suspect = verify_single_ino_chunk_align(agno, INOBT, rp, suspect,
961 &skip);
962 if (skip)
963 return suspect;
964
965 /*
966 * set state of each block containing inodes
967 */
968 if (off == 0 && !suspect) {
969 for (j = 0;
970 j < XFS_INODES_PER_CHUNK;
971 j += mp->m_sb.sb_inopblock) {
972
973 /* inodes in sparse chunks don't use blocks */
974 if (ino_issparse(rp, j))
975 continue;
976
977 agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
978 state = get_bmap(agno, agbno);
979 if (state == XR_E_UNKNOWN) {
980 set_bmap(agno, agbno, XR_E_INO);
981 } else if (state == XR_E_INUSE_FS && agno == 0 &&
982 ino + j >= first_prealloc_ino &&
983 ino + j < last_prealloc_ino) {
984 set_bmap(agno, agbno, XR_E_INO);
985 } else {
986 do_warn(
987 _("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
988 agno, agbno, mp->m_sb.sb_inopblock);
989 /*
990 * XXX - maybe should mark
991 * block a duplicate
992 */
993 return ++suspect;
994 }
995 }
996 }
997
998 /*
999 * ensure only one avl entry per chunk
1000 */
1001 find_inode_rec_range(mp, agno, ino, ino + XFS_INODES_PER_CHUNK,
1002 &first_rec, &last_rec);
1003 if (first_rec != NULL) {
1004 /*
1005 * this chunk overlaps with one (or more)
1006 * already in the tree
1007 */
1008 do_warn(
1009 _("inode rec for ino %" PRIu64 " (%d/%d) overlaps existing rec (start %d/%d)\n"),
1010 lino, agno, ino, agno, first_rec->ino_startnum);
1011 suspect++;
1012
1013 /*
1014 * if the 2 chunks start at the same place,
1015 * then we don't have to put this one
1016 * in the uncertain list. go to the next one.
1017 */
1018 if (first_rec->ino_startnum == ino)
1019 return suspect;
1020 }
1021
1022 /*
1023 * Import the state of individual inodes into the appropriate in-core
1024 * trees, mark them free or used, and get the resulting total and free
1025 * inode counts.
1026 */
1027 nfree = ninodes = 0;
1028 suspect = import_single_ino_chunk(agno, INOBT, rp, suspect, &nfree,
1029 &ninodes);
1030
1031 if (nfree != freecount) {
1032 do_warn(
1033 _("ir_freecount/free mismatch, inode chunk %d/%u, freecount %d nfree %d\n"),
1034 agno, ino, freecount, nfree);
1035 }
1036
1037 /* verify sparse record formats have a valid inode count */
1038 if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
1039 ninodes != rp->ir_u.sp.ir_count) {
1040 do_warn(
1041 _("invalid inode count, inode chunk %d/%u, count %d ninodes %d\n"),
1042 agno, ino, rp->ir_u.sp.ir_count, ninodes);
1043 }
1044
1045 return suspect;
1046 }
1047
1048 static int
1049 scan_single_finobt_chunk(
1050 xfs_agnumber_t agno,
1051 xfs_inobt_rec_t *rp,
1052 int suspect)
1053 {
1054 xfs_ino_t lino;
1055 xfs_agino_t ino;
1056 xfs_agblock_t agbno;
1057 int j;
1058 int nfree;
1059 int ninodes;
1060 int off;
1061 int state;
1062 ino_tree_node_t *first_rec, *last_rec;
1063 int freecount;
1064 bool skip = false;
1065
1066 ino = be32_to_cpu(rp->ir_startino);
1067 off = XFS_AGINO_TO_OFFSET(mp, ino);
1068 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
1069 lino = XFS_AGINO_TO_INO(mp, agno, ino);
1070 freecount = inorec_get_freecount(mp, rp);
1071
1072 /*
1073 * Verify record alignment, start/end inode numbers, etc.
1074 */
1075 suspect = verify_single_ino_chunk_align(agno, FINOBT, rp, suspect,
1076 &skip);
1077 if (skip)
1078 return suspect;
1079
1080 /*
1081 * cross check state of each block containing inodes referenced by the
1082 * finobt against what we have already scanned from the alloc inobt.
1083 */
1084 if (off == 0 && !suspect) {
1085 for (j = 0;
1086 j < XFS_INODES_PER_CHUNK;
1087 j += mp->m_sb.sb_inopblock) {
1088 agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
1089 state = get_bmap(agno, agbno);
1090
1091 /* sparse inodes should not refer to inode blocks */
1092 if (ino_issparse(rp, j)) {
1093 if (state == XR_E_INO) {
1094 do_warn(
1095 _("sparse inode chunk claims inode block, finobt block - agno %d, bno %d, inopb %d\n"),
1096 agno, agbno, mp->m_sb.sb_inopblock);
1097 suspect++;
1098 }
1099 continue;
1100 }
1101
1102 if (state == XR_E_INO) {
1103 continue;
1104 } else if ((state == XR_E_UNKNOWN) ||
1105 (state == XR_E_INUSE_FS && agno == 0 &&
1106 ino + j >= first_prealloc_ino &&
1107 ino + j < last_prealloc_ino)) {
1108 do_warn(
1109 _("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
1110 agno, agbno, mp->m_sb.sb_inopblock);
1111
1112 set_bmap(agno, agbno, XR_E_INO);
1113 suspect++;
1114 } else {
1115 do_warn(
1116 _("inode chunk claims used block, finobt block - agno %d, bno %d, inopb %d\n"),
1117 agno, agbno, mp->m_sb.sb_inopblock);
1118 return ++suspect;
1119 }
1120 }
1121 }
1122
1123 /*
1124 * ensure we have an incore entry for each chunk
1125 */
1126 find_inode_rec_range(mp, agno, ino, ino + XFS_INODES_PER_CHUNK,
1127 &first_rec, &last_rec);
1128
1129 if (first_rec) {
1130 if (suspect)
1131 return suspect;
1132
1133 /*
1134 * verify consistency between finobt record and incore state
1135 */
1136 if (first_rec->ino_startnum != ino) {
1137 do_warn(
1138 _("finobt rec for ino %" PRIu64 " (%d/%u) does not match existing rec (%d/%d)\n"),
1139 lino, agno, ino, agno, first_rec->ino_startnum);
1140 return ++suspect;
1141 }
1142
1143 nfree = ninodes = 0;
1144 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
1145 int isfree = XFS_INOBT_IS_FREE_DISK(rp, j);
1146 int issparse = ino_issparse(rp, j);
1147
1148 if (!issparse)
1149 ninodes++;
1150 if (isfree && !issparse)
1151 nfree++;
1152
1153 /*
1154 * inode allocation state should be consistent between
1155 * the inobt and finobt
1156 */
1157 if (!suspect &&
1158 isfree != is_inode_free(first_rec, j))
1159 suspect++;
1160
1161 if (!suspect &&
1162 issparse != is_inode_sparse(first_rec, j))
1163 suspect++;
1164 }
1165
1166 goto check_freecount;
1167 }
1168
1169 /*
1170 * The finobt contains a record that the previous inobt scan never
1171 * found. Warn about it and import the inodes into the appropriate
1172 * trees.
1173 *
1174 * Note that this should do the right thing if the previous inobt scan
1175 * had added these inodes to the uncertain tree. If the finobt is not
1176 * suspect, these inodes should supercede the uncertain ones. Otherwise,
1177 * the uncertain tree helpers handle the case where uncertain inodes
1178 * already exist.
1179 */
1180 do_warn(_("undiscovered finobt record, ino %" PRIu64 " (%d/%u)\n"),
1181 lino, agno, ino);
1182
1183 nfree = ninodes = 0;
1184 suspect = import_single_ino_chunk(agno, FINOBT, rp, suspect, &nfree,
1185 &ninodes);
1186
1187 check_freecount:
1188
1189 /*
1190 * Verify that the record freecount matches the actual number of free
1191 * inodes counted in the record. Don't increment 'suspect' here, since
1192 * we have already verified the allocation state of the individual
1193 * inodes against the in-core state. This will have already incremented
1194 * 'suspect' if something is wrong. If suspect hasn't been set at this
1195 * point, these warnings mean that we have a simple freecount
1196 * inconsistency or a stray finobt record (as opposed to a broader tree
1197 * corruption). Issue a warning and continue the scan. The final btree
1198 * reconstruction will correct this naturally.
1199 */
1200 if (nfree != freecount) {
1201 do_warn(
1202 _("finobt ir_freecount/free mismatch, inode chunk %d/%u, freecount %d nfree %d\n"),
1203 agno, ino, freecount, nfree);
1204 }
1205
1206 if (!nfree) {
1207 do_warn(
1208 _("finobt record with no free inodes, inode chunk %d/%u\n"), agno, ino);
1209 }
1210
1211 /* verify sparse record formats have a valid inode count */
1212 if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
1213 ninodes != rp->ir_u.sp.ir_count) {
1214 do_warn(
1215 _("invalid inode count, inode chunk %d/%u, count %d ninodes %d\n"),
1216 agno, ino, rp->ir_u.sp.ir_count, ninodes);
1217 }
1218
1219 return suspect;
1220 }
1221
1222 /*
1223 * this one walks the inode btrees sucking the info there into
1224 * the incore avl tree. We try and rescue corrupted btree records
1225 * to minimize our chances of losing inodes. Inode info from potentially
1226 * corrupt sources could be bogus so rather than put the info straight
1227 * into the tree, instead we put it on a list and try and verify the
1228 * info in the next phase by examining what's on disk. At that point,
1229 * we'll be able to figure out what's what and stick the corrected info
1230 * into the tree. We do bail out at some point and give up on a subtree
1231 * so as to avoid walking randomly all over the ag.
1232 *
1233 * Note that it's also ok if the free/inuse info wrong, we can correct
1234 * that when we examine the on-disk inode. The important thing is to
1235 * get the start and alignment of the inode chunks right. Those chunks
1236 * that we aren't sure about go into the uncertain list.
1237 */
1238 static void
1239 scan_inobt(
1240 struct xfs_btree_block *block,
1241 int level,
1242 xfs_agblock_t bno,
1243 xfs_agnumber_t agno,
1244 int suspect,
1245 int isroot,
1246 __uint32_t magic,
1247 void *priv)
1248 {
1249 struct aghdr_cnts *agcnts = priv;
1250 int i;
1251 int numrecs;
1252 int state;
1253 xfs_inobt_ptr_t *pp;
1254 xfs_inobt_rec_t *rp;
1255 int hdr_errors;
1256 int freecount;
1257
1258 hdr_errors = 0;
1259
1260 if (be32_to_cpu(block->bb_magic) != magic) {
1261 do_warn(_("bad magic # %#x in inobt block %d/%d\n"),
1262 be32_to_cpu(block->bb_magic), agno, bno);
1263 hdr_errors++;
1264 bad_ino_btree = 1;
1265 if (suspect)
1266 return;
1267 }
1268 if (be16_to_cpu(block->bb_level) != level) {
1269 do_warn(_("expected level %d got %d in inobt block %d/%d\n"),
1270 level, be16_to_cpu(block->bb_level), agno, bno);
1271 hdr_errors++;
1272 bad_ino_btree = 1;
1273 if (suspect)
1274 return;
1275 }
1276
1277 /*
1278 * check for btree blocks multiply claimed, any unknown/free state
1279 * is ok in the bitmap block.
1280 */
1281 state = get_bmap(agno, bno);
1282 switch (state) {
1283 case XR_E_UNKNOWN:
1284 case XR_E_FREE1:
1285 case XR_E_FREE:
1286 set_bmap(agno, bno, XR_E_FS_MAP);
1287 break;
1288 default:
1289 set_bmap(agno, bno, XR_E_MULT);
1290 do_warn(
1291 _("inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
1292 state, agno, bno, suspect);
1293 }
1294
1295 numrecs = be16_to_cpu(block->bb_numrecs);
1296
1297 /*
1298 * leaf record in btree
1299 */
1300 if (level == 0) {
1301 /* check for trashed btree block */
1302
1303 if (numrecs > mp->m_inobt_mxr[0]) {
1304 numrecs = mp->m_inobt_mxr[0];
1305 hdr_errors++;
1306 }
1307 if (isroot == 0 && numrecs < mp->m_inobt_mnr[0]) {
1308 numrecs = mp->m_inobt_mnr[0];
1309 hdr_errors++;
1310 }
1311
1312 if (hdr_errors) {
1313 bad_ino_btree = 1;
1314 do_warn(_("dubious inode btree block header %d/%d\n"),
1315 agno, bno);
1316 suspect++;
1317 }
1318
1319 rp = XFS_INOBT_REC_ADDR(mp, block, 1);
1320
1321 /*
1322 * step through the records, each record points to
1323 * a chunk of inodes. The start of inode chunks should
1324 * be block-aligned. Each inode btree rec should point
1325 * to the start of a block of inodes or the start of a group
1326 * of INODES_PER_CHUNK (64) inodes. off is the offset into
1327 * the block. skip processing of bogus records.
1328 */
1329 for (i = 0; i < numrecs; i++) {
1330 freecount = inorec_get_freecount(mp, &rp[i]);
1331
1332 if (magic == XFS_IBT_MAGIC ||
1333 magic == XFS_IBT_CRC_MAGIC) {
1334 int icount = XFS_INODES_PER_CHUNK;
1335
1336 /*
1337 * ir_count holds the inode count for all
1338 * records on fs' with sparse inode support
1339 */
1340 if (xfs_sb_version_hassparseinodes(&mp->m_sb))
1341 icount = rp[i].ir_u.sp.ir_count;
1342
1343 agcnts->agicount += icount;
1344 agcnts->agifreecount += freecount;
1345 agcnts->ifreecount += freecount;
1346
1347 suspect = scan_single_ino_chunk(agno, &rp[i],
1348 suspect);
1349 } else {
1350 /*
1351 * the finobt tracks records with free inodes,
1352 * so only the free inode count is expected to be
1353 * consistent with the agi
1354 */
1355 agcnts->fibtfreecount += freecount;
1356
1357 suspect = scan_single_finobt_chunk(agno, &rp[i],
1358 suspect);
1359 }
1360 }
1361
1362 if (suspect)
1363 bad_ino_btree = 1;
1364
1365 return;
1366 }
1367
1368 /*
1369 * interior record, continue on
1370 */
1371 if (numrecs > mp->m_inobt_mxr[1]) {
1372 numrecs = mp->m_inobt_mxr[1];
1373 hdr_errors++;
1374 }
1375 if (isroot == 0 && numrecs < mp->m_inobt_mnr[1]) {
1376 numrecs = mp->m_inobt_mnr[1];
1377 hdr_errors++;
1378 }
1379
1380 pp = XFS_INOBT_PTR_ADDR(mp, block, 1, mp->m_inobt_mxr[1]);
1381
1382 /*
1383 * don't pass bogus tree flag down further if this block
1384 * looked ok. bail out if two levels in a row look bad.
1385 */
1386
1387 if (suspect && !hdr_errors)
1388 suspect = 0;
1389
1390 if (hdr_errors) {
1391 bad_ino_btree = 1;
1392 if (suspect)
1393 return;
1394 else suspect++;
1395 }
1396
1397 for (i = 0; i < numrecs; i++) {
1398 if (be32_to_cpu(pp[i]) != 0 && verify_agbno(mp, agno,
1399 be32_to_cpu(pp[i])))
1400 scan_sbtree(be32_to_cpu(pp[i]), level, agno,
1401 suspect, scan_inobt, 0, magic, priv,
1402 &xfs_inobt_buf_ops);
1403 }
1404 }
1405
1406 static void
1407 scan_freelist(
1408 xfs_agf_t *agf,
1409 struct aghdr_cnts *agcnts)
1410 {
1411 xfs_buf_t *agflbuf;
1412 xfs_agnumber_t agno;
1413 xfs_agblock_t bno;
1414 int count;
1415 int i;
1416 __be32 *freelist;
1417
1418 agno = be32_to_cpu(agf->agf_seqno);
1419
1420 if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
1421 XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
1422 XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
1423 set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
1424
1425 if (be32_to_cpu(agf->agf_flcount) == 0)
1426 return;
1427
1428 agflbuf = libxfs_readbuf(mp->m_dev,
1429 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
1430 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agfl_buf_ops);
1431 if (!agflbuf) {
1432 do_abort(_("can't read agfl block for ag %d\n"), agno);
1433 return;
1434 }
1435 if (agflbuf->b_error == -EFSBADCRC)
1436 do_warn(_("agfl has bad CRC for ag %d\n"), agno);
1437
1438 freelist = XFS_BUF_TO_AGFL_BNO(mp, agflbuf);
1439 i = be32_to_cpu(agf->agf_flfirst);
1440
1441 if (no_modify) {
1442 /* agf values not fixed in verify_set_agf, so recheck */
1443 if (be32_to_cpu(agf->agf_flfirst) >= XFS_AGFL_SIZE(mp) ||
1444 be32_to_cpu(agf->agf_fllast) >= XFS_AGFL_SIZE(mp)) {
1445 do_warn(_("agf %d freelist blocks bad, skipping "
1446 "freelist scan\n"), i);
1447 return;
1448 }
1449 }
1450
1451 count = 0;
1452 for (;;) {
1453 bno = be32_to_cpu(freelist[i]);
1454 if (verify_agbno(mp, agno, bno))
1455 set_bmap(agno, bno, XR_E_FREE);
1456 else
1457 do_warn(_("bad agbno %u in agfl, agno %d\n"),
1458 bno, agno);
1459 count++;
1460 if (i == be32_to_cpu(agf->agf_fllast))
1461 break;
1462 if (++i == XFS_AGFL_SIZE(mp))
1463 i = 0;
1464 }
1465 if (count != be32_to_cpu(agf->agf_flcount)) {
1466 do_warn(_("freeblk count %d != flcount %d in ag %d\n"), count,
1467 be32_to_cpu(agf->agf_flcount), agno);
1468 }
1469
1470 agcnts->fdblocks += count;
1471
1472 libxfs_putbuf(agflbuf);
1473 }
1474
1475 static void
1476 validate_agf(
1477 struct xfs_agf *agf,
1478 xfs_agnumber_t agno,
1479 struct aghdr_cnts *agcnts)
1480 {
1481 xfs_agblock_t bno;
1482 __uint32_t magic;
1483
1484 bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]);
1485 if (bno != 0 && verify_agbno(mp, agno, bno)) {
1486 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_ABTB_CRC_MAGIC
1487 : XFS_ABTB_MAGIC;
1488 scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
1489 agno, 0, scan_allocbt, 1, magic, agcnts,
1490 &xfs_allocbt_buf_ops);
1491 } else {
1492 do_warn(_("bad agbno %u for btbno root, agno %d\n"),
1493 bno, agno);
1494 }
1495
1496 bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]);
1497 if (bno != 0 && verify_agbno(mp, agno, bno)) {
1498 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_ABTC_CRC_MAGIC
1499 : XFS_ABTC_MAGIC;
1500 scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
1501 agno, 0, scan_allocbt, 1, magic, agcnts,
1502 &xfs_allocbt_buf_ops);
1503 } else {
1504 do_warn(_("bad agbno %u for btbcnt root, agno %d\n"),
1505 bno, agno);
1506 }
1507
1508 if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) {
1509 do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
1510 be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, agno);
1511 }
1512
1513 if (be32_to_cpu(agf->agf_longest) != agcnts->agflongest) {
1514 do_warn(_("agf_longest %u, counted %u in ag %u\n"),
1515 be32_to_cpu(agf->agf_longest), agcnts->agflongest, agno);
1516 }
1517
1518 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
1519 be32_to_cpu(agf->agf_btreeblks) != agcnts->agfbtreeblks) {
1520 do_warn(_("agf_btreeblks %u, counted %" PRIu64 " in ag %u\n"),
1521 be32_to_cpu(agf->agf_btreeblks), agcnts->agfbtreeblks, agno);
1522 }
1523 }
1524
1525 static void
1526 validate_agi(
1527 struct xfs_agi *agi,
1528 xfs_agnumber_t agno,
1529 struct aghdr_cnts *agcnts)
1530 {
1531 xfs_agblock_t bno;
1532 int i;
1533 __uint32_t magic;
1534
1535 bno = be32_to_cpu(agi->agi_root);
1536 if (bno != 0 && verify_agbno(mp, agno, bno)) {
1537 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_IBT_CRC_MAGIC
1538 : XFS_IBT_MAGIC;
1539 scan_sbtree(bno, be32_to_cpu(agi->agi_level),
1540 agno, 0, scan_inobt, 1, magic, agcnts,
1541 &xfs_inobt_buf_ops);
1542 } else {
1543 do_warn(_("bad agbno %u for inobt root, agno %d\n"),
1544 be32_to_cpu(agi->agi_root), agno);
1545 }
1546
1547 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
1548 bno = be32_to_cpu(agi->agi_free_root);
1549 if (bno != 0 && verify_agbno(mp, agno, bno)) {
1550 magic = xfs_sb_version_hascrc(&mp->m_sb) ?
1551 XFS_FIBT_CRC_MAGIC : XFS_FIBT_MAGIC;
1552 scan_sbtree(bno, be32_to_cpu(agi->agi_free_level),
1553 agno, 0, scan_inobt, 1, magic, agcnts,
1554 &xfs_inobt_buf_ops);
1555 } else {
1556 do_warn(_("bad agbno %u for finobt root, agno %d\n"),
1557 be32_to_cpu(agi->agi_free_root), agno);
1558 }
1559 }
1560
1561 if (be32_to_cpu(agi->agi_count) != agcnts->agicount) {
1562 do_warn(_("agi_count %u, counted %u in ag %u\n"),
1563 be32_to_cpu(agi->agi_count), agcnts->agicount, agno);
1564 }
1565
1566 if (be32_to_cpu(agi->agi_freecount) != agcnts->agifreecount) {
1567 do_warn(_("agi_freecount %u, counted %u in ag %u\n"),
1568 be32_to_cpu(agi->agi_freecount), agcnts->agifreecount, agno);
1569 }
1570
1571 if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
1572 be32_to_cpu(agi->agi_freecount) != agcnts->fibtfreecount) {
1573 do_warn(_("agi_freecount %u, counted %u in ag %u finobt\n"),
1574 be32_to_cpu(agi->agi_freecount), agcnts->fibtfreecount,
1575 agno);
1576 }
1577
1578 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
1579 xfs_agino_t agino = be32_to_cpu(agi->agi_unlinked[i]);
1580
1581 if (agino != NULLAGINO) {
1582 do_warn(
1583 _("agi unlinked bucket %d is %u in ag %u (inode=%" PRIu64 ")\n"),
1584 i, agino, agno,
1585 XFS_AGINO_TO_INO(mp, agno, agino));
1586 }
1587 }
1588 }
1589
1590 /*
1591 * Scan an AG for obvious corruption.
1592 */
1593 static void
1594 scan_ag(
1595 work_queue_t *wq,
1596 xfs_agnumber_t agno,
1597 void *arg)
1598 {
1599 struct aghdr_cnts *agcnts = arg;
1600 struct xfs_agf *agf;
1601 struct xfs_buf *agfbuf = NULL;
1602 int agf_dirty = 0;
1603 struct xfs_agi *agi;
1604 struct xfs_buf *agibuf = NULL;
1605 int agi_dirty = 0;
1606 struct xfs_sb *sb = NULL;
1607 struct xfs_buf *sbbuf = NULL;
1608 int sb_dirty = 0;
1609 int status;
1610 char *objname = NULL;
1611
1612 sb = (struct xfs_sb *)calloc(BBTOB(XFS_FSS_TO_BB(mp, 1)), 1);
1613 if (!sb) {
1614 do_error(_("can't allocate memory for superblock\n"));
1615 return;
1616 }
1617
1618 sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
1619 XFS_FSS_TO_BB(mp, 1), 0, &xfs_sb_buf_ops);
1620 if (!sbbuf) {
1621 objname = _("root superblock");
1622 goto out_free_sb;
1623 }
1624 libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
1625 libxfs_sb_quota_from_disk(sb);
1626
1627 agfbuf = libxfs_readbuf(mp->m_dev,
1628 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
1629 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agf_buf_ops);
1630 if (!agfbuf) {
1631 objname = _("agf block");
1632 goto out_free_sbbuf;
1633 }
1634 agf = XFS_BUF_TO_AGF(agfbuf);
1635
1636 agibuf = libxfs_readbuf(mp->m_dev,
1637 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
1638 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agi_buf_ops);
1639 if (!agibuf) {
1640 objname = _("agi block");
1641 goto out_free_agfbuf;
1642 }
1643 agi = XFS_BUF_TO_AGI(agibuf);
1644
1645 /* fix up bad ag headers */
1646
1647 status = verify_set_agheader(mp, sbbuf, sb, agf, agi, agno);
1648
1649 if (status & XR_AG_SB_SEC) {
1650 if (!no_modify)
1651 sb_dirty = 1;
1652 /*
1653 * clear bad sector bit because we don't want
1654 * to skip further processing. we just want to
1655 * ensure that we write out the modified sb buffer.
1656 */
1657 status &= ~XR_AG_SB_SEC;
1658 }
1659 if (status & XR_AG_SB) {
1660 if (!no_modify) {
1661 do_warn(_("reset bad sb for ag %d\n"), agno);
1662 sb_dirty = 1;
1663 } else {
1664 do_warn(_("would reset bad sb for ag %d\n"), agno);
1665 }
1666 }
1667 if (status & XR_AG_AGF) {
1668 if (!no_modify) {
1669 do_warn(_("reset bad agf for ag %d\n"), agno);
1670 agf_dirty = 1;
1671 } else {
1672 do_warn(_("would reset bad agf for ag %d\n"), agno);
1673 }
1674 }
1675 if (status & XR_AG_AGI) {
1676 if (!no_modify) {
1677 do_warn(_("reset bad agi for ag %d\n"), agno);
1678 agi_dirty = 1;
1679 } else {
1680 do_warn(_("would reset bad agi for ag %d\n"), agno);
1681 }
1682 }
1683
1684 if (status && no_modify) {
1685 do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
1686 agno);
1687 goto out_free_agibuf;
1688 }
1689
1690 scan_freelist(agf, agcnts);
1691
1692 validate_agf(agf, agno, agcnts);
1693 validate_agi(agi, agno, agcnts);
1694
1695 ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
1696 ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
1697 ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
1698
1699 /*
1700 * Only pay attention to CRC/verifier errors if we can correct them.
1701 * Note that we can get uncorrected EFSCORRUPTED errors here because
1702 * the verifier will flag on out of range values that we can't correct
1703 * until phase 5 when we have all the information necessary to rebuild
1704 * the freespace/inode btrees. We can correct bad CRC errors
1705 * immediately, though.
1706 */
1707 if (!no_modify) {
1708 agi_dirty += (agibuf->b_error == -EFSBADCRC);
1709 agf_dirty += (agfbuf->b_error == -EFSBADCRC);
1710 sb_dirty += (sbbuf->b_error == -EFSBADCRC);
1711 }
1712
1713 if (agi_dirty && !no_modify)
1714 libxfs_writebuf(agibuf, 0);
1715 else
1716 libxfs_putbuf(agibuf);
1717
1718 if (agf_dirty && !no_modify)
1719 libxfs_writebuf(agfbuf, 0);
1720 else
1721 libxfs_putbuf(agfbuf);
1722
1723 if (sb_dirty && !no_modify) {
1724 if (agno == 0)
1725 memcpy(&mp->m_sb, sb, sizeof(xfs_sb_t));
1726 libxfs_sb_to_disk(XFS_BUF_TO_SBP(sbbuf), sb);
1727 libxfs_writebuf(sbbuf, 0);
1728 } else
1729 libxfs_putbuf(sbbuf);
1730 free(sb);
1731 PROG_RPT_INC(prog_rpt_done[agno], 1);
1732
1733 #ifdef XR_INODE_TRACE
1734 print_inode_list(i);
1735 #endif
1736 return;
1737
1738 out_free_agibuf:
1739 libxfs_putbuf(agibuf);
1740 out_free_agfbuf:
1741 libxfs_putbuf(agfbuf);
1742 out_free_sbbuf:
1743 libxfs_putbuf(sbbuf);
1744 out_free_sb:
1745 free(sb);
1746
1747 if (objname)
1748 do_error(_("can't get %s for ag %d\n"), objname, agno);
1749 }
1750
1751 #define SCAN_THREADS 32
1752
1753 void
1754 scan_ags(
1755 struct xfs_mount *mp,
1756 int scan_threads)
1757 {
1758 struct aghdr_cnts *agcnts;
1759 __uint64_t fdblocks = 0;
1760 __uint64_t icount = 0;
1761 __uint64_t ifreecount = 0;
1762 xfs_agnumber_t i;
1763 work_queue_t wq;
1764
1765 agcnts = malloc(mp->m_sb.sb_agcount * sizeof(*agcnts));
1766 if (!agcnts) {
1767 do_abort(_("no memory for ag header counts\n"));
1768 return;
1769 }
1770 memset(agcnts, 0, mp->m_sb.sb_agcount * sizeof(*agcnts));
1771
1772 create_work_queue(&wq, mp, scan_threads);
1773
1774 for (i = 0; i < mp->m_sb.sb_agcount; i++)
1775 queue_work(&wq, scan_ag, i, &agcnts[i]);
1776
1777 destroy_work_queue(&wq);
1778
1779 /* tally up the counts */
1780 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
1781 fdblocks += agcnts[i].fdblocks;
1782 icount += agcnts[i].agicount;
1783 ifreecount += agcnts[i].ifreecount;
1784 }
1785
1786 free(agcnts);
1787
1788 /*
1789 * Validate that our manual counts match the superblock.
1790 */
1791 if (mp->m_sb.sb_icount != icount) {
1792 do_warn(_("sb_icount %" PRIu64 ", counted %" PRIu64 "\n"),
1793 mp->m_sb.sb_icount, icount);
1794 }
1795
1796 if (mp->m_sb.sb_ifree != ifreecount) {
1797 do_warn(_("sb_ifree %" PRIu64 ", counted %" PRIu64 "\n"),
1798 mp->m_sb.sb_ifree, ifreecount);
1799 }
1800
1801 if (mp->m_sb.sb_fdblocks != fdblocks) {
1802 do_warn(_("sb_fdblocks %" PRIu64 ", counted %" PRIu64 "\n"),
1803 mp->m_sb.sb_fdblocks, fdblocks);
1804 }
1805 }