]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/scan.c
repair: scan and track sparse inode chunks correctly
[thirdparty/xfsprogs-dev.git] / repair / scan.c
1 /*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include <libxfs.h>
20 #include "avl.h"
21 #include "globals.h"
22 #include "agheader.h"
23 #include "incore.h"
24 #include "protos.h"
25 #include "err_protos.h"
26 #include "dinode.h"
27 #include "scan.h"
28 #include "versions.h"
29 #include "bmap.h"
30 #include "progress.h"
31 #include "threads.h"
32
33 static xfs_mount_t *mp = NULL;
34
35 /*
36 * Variables to validate AG header values against the manual count
37 * from the btree traversal.
38 */
39 struct aghdr_cnts {
40 xfs_agnumber_t agno;
41 xfs_extlen_t agffreeblks;
42 xfs_extlen_t agflongest;
43 __uint64_t agfbtreeblks;
44 __uint32_t agicount;
45 __uint32_t agifreecount;
46 __uint64_t fdblocks;
47 __uint64_t ifreecount;
48 __uint32_t fibtfreecount;
49 };
50
51 void
52 set_mp(xfs_mount_t *mpp)
53 {
54 libxfs_bcache_purge();
55 mp = mpp;
56 }
57
58 static void
59 scan_sbtree(
60 xfs_agblock_t root,
61 int nlevels,
62 xfs_agnumber_t agno,
63 int suspect,
64 void (*func)(struct xfs_btree_block *block,
65 int level,
66 xfs_agblock_t bno,
67 xfs_agnumber_t agno,
68 int suspect,
69 int isroot,
70 __uint32_t magic,
71 void *priv),
72 int isroot,
73 __uint32_t magic,
74 void *priv,
75 const struct xfs_buf_ops *ops)
76 {
77 xfs_buf_t *bp;
78
79 bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, root),
80 XFS_FSB_TO_BB(mp, 1), 0, ops);
81 if (!bp) {
82 do_error(_("can't read btree block %d/%d\n"), agno, root);
83 return;
84 }
85 if (bp->b_error == EFSBADCRC || bp->b_error == EFSCORRUPTED) {
86 do_warn(_("btree block %d/%d is suspect, error %d\n"),
87 agno, root, bp->b_error);
88 suspect = 1;
89 }
90
91 (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1, root, agno, suspect,
92 isroot, magic, priv);
93 libxfs_putbuf(bp);
94 }
95
96 /*
97 * returns 1 on bad news (inode needs to be cleared), 0 on good
98 */
99 int
100 scan_lbtree(
101 xfs_fsblock_t root,
102 int nlevels,
103 int (*func)(struct xfs_btree_block *block,
104 int level,
105 int type,
106 int whichfork,
107 xfs_fsblock_t bno,
108 xfs_ino_t ino,
109 xfs_rfsblock_t *tot,
110 __uint64_t *nex,
111 blkmap_t **blkmapp,
112 bmap_cursor_t *bm_cursor,
113 int isroot,
114 int check_dups,
115 int *dirty,
116 __uint64_t magic),
117 int type,
118 int whichfork,
119 xfs_ino_t ino,
120 xfs_rfsblock_t *tot,
121 __uint64_t *nex,
122 blkmap_t **blkmapp,
123 bmap_cursor_t *bm_cursor,
124 int isroot,
125 int check_dups,
126 __uint64_t magic,
127 const struct xfs_buf_ops *ops)
128 {
129 xfs_buf_t *bp;
130 int err;
131 int dirty = 0;
132 bool badcrc = false;
133
134 bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, root),
135 XFS_FSB_TO_BB(mp, 1), 0, ops);
136 if (!bp) {
137 do_error(_("can't read btree block %d/%d\n"),
138 XFS_FSB_TO_AGNO(mp, root),
139 XFS_FSB_TO_AGBNO(mp, root));
140 return(1);
141 }
142
143 /*
144 * only check for bad CRC here - caller will determine if there
145 * is a corruption or not and whether it got corrected and so needs
146 * writing back. CRC errors always imply we need to write the block.
147 */
148 if (bp->b_error == EFSBADCRC) {
149 do_warn(_("btree block %d/%d is suspect, error %d\n"),
150 XFS_FSB_TO_AGNO(mp, root),
151 XFS_FSB_TO_AGBNO(mp, root), bp->b_error);
152 badcrc = true;
153 }
154
155 err = (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1,
156 type, whichfork, root, ino, tot, nex, blkmapp,
157 bm_cursor, isroot, check_dups, &dirty,
158 magic);
159
160 ASSERT(dirty == 0 || (dirty && !no_modify));
161
162 if ((dirty || badcrc) && !no_modify)
163 libxfs_writebuf(bp, 0);
164 else
165 libxfs_putbuf(bp);
166
167 return(err);
168 }
169
170 int
171 scan_bmapbt(
172 struct xfs_btree_block *block,
173 int level,
174 int type,
175 int whichfork,
176 xfs_fsblock_t bno,
177 xfs_ino_t ino,
178 xfs_rfsblock_t *tot,
179 __uint64_t *nex,
180 blkmap_t **blkmapp,
181 bmap_cursor_t *bm_cursor,
182 int isroot,
183 int check_dups,
184 int *dirty,
185 __uint64_t magic)
186 {
187 int i;
188 int err;
189 xfs_bmbt_ptr_t *pp;
190 xfs_bmbt_key_t *pkey;
191 xfs_bmbt_rec_t *rp;
192 xfs_fileoff_t first_key;
193 xfs_fileoff_t last_key;
194 char *forkname = get_forkname(whichfork);
195 int numrecs;
196 xfs_agnumber_t agno;
197 xfs_agblock_t agbno;
198 int state;
199
200 /*
201 * unlike the ag freeblock btrees, if anything looks wrong
202 * in an inode bmap tree, just bail. it's possible that
203 * we'll miss a case where the to-be-toasted inode and
204 * another inode are claiming the same block but that's
205 * highly unlikely.
206 */
207 if (be32_to_cpu(block->bb_magic) != magic) {
208 do_warn(
209 _("bad magic # %#x in inode %" PRIu64 " (%s fork) bmbt block %" PRIu64 "\n"),
210 be32_to_cpu(block->bb_magic), ino, forkname, bno);
211 return(1);
212 }
213 if (be16_to_cpu(block->bb_level) != level) {
214 do_warn(
215 _("expected level %d got %d in inode %" PRIu64 ", (%s fork) bmbt block %" PRIu64 "\n"),
216 level, be16_to_cpu(block->bb_level),
217 ino, forkname, bno);
218 return(1);
219 }
220
221 if (magic == XFS_BMAP_CRC_MAGIC) {
222 /* verify owner */
223 if (be64_to_cpu(block->bb_u.l.bb_owner) != ino) {
224 do_warn(
225 _("expected owner inode %" PRIu64 ", got %llu, bmbt block %" PRIu64 "\n"),
226 ino, be64_to_cpu(block->bb_u.l.bb_owner), bno);
227 return 1;
228 }
229 /* verify block number */
230 if (be64_to_cpu(block->bb_u.l.bb_blkno) !=
231 XFS_FSB_TO_DADDR(mp, bno)) {
232 do_warn(
233 _("expected block %" PRIu64 ", got %llu, bmbt block %" PRIu64 "\n"),
234 XFS_FSB_TO_DADDR(mp, bno),
235 be64_to_cpu(block->bb_u.l.bb_blkno), bno);
236 return 1;
237 }
238 /* verify uuid */
239 if (platform_uuid_compare(&block->bb_u.l.bb_uuid,
240 &mp->m_sb.sb_uuid) != 0) {
241 do_warn(
242 _("wrong FS UUID, bmbt block %" PRIu64 "\n"),
243 bno);
244 return 1;
245 }
246 }
247
248 if (check_dups == 0) {
249 /*
250 * check sibling pointers. if bad we have a conflict
251 * between the sibling pointers and the child pointers
252 * in the parent block. blow out the inode if that happens
253 */
254 if (bm_cursor->level[level].fsbno != NULLFSBLOCK) {
255 /*
256 * this is not the first block on this level
257 * so the cursor for this level has recorded the
258 * values for this's block left-sibling.
259 */
260 if (bno != bm_cursor->level[level].right_fsbno) {
261 do_warn(
262 _("bad fwd (right) sibling pointer (saw %" PRIu64 " parent block says %" PRIu64 ")\n"
263 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
264 bm_cursor->level[level].right_fsbno,
265 bno, ino, forkname,
266 bm_cursor->level[level].fsbno);
267 return(1);
268 }
269 if (be64_to_cpu(block->bb_u.l.bb_leftsib) !=
270 bm_cursor->level[level].fsbno) {
271 do_warn(
272 _("bad back (left) sibling pointer (saw %llu parent block says %" PRIu64 ")\n"
273 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
274 (unsigned long long)
275 be64_to_cpu(block->bb_u.l.bb_leftsib),
276 bm_cursor->level[level].fsbno,
277 ino, forkname, bno);
278 return(1);
279 }
280 } else {
281 /*
282 * This is the first or only block on this level.
283 * Check that the left sibling pointer is NULL
284 */
285 if (be64_to_cpu(block->bb_u.l.bb_leftsib) != NULLFSBLOCK) {
286 do_warn(
287 _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
288 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
289 (unsigned long long)
290 be64_to_cpu(block->bb_u.l.bb_leftsib),
291 ino, forkname, bno);
292 return(1);
293 }
294 }
295
296 /*
297 * update cursor block pointers to reflect this block
298 */
299 bm_cursor->level[level].fsbno = bno;
300 bm_cursor->level[level].left_fsbno =
301 be64_to_cpu(block->bb_u.l.bb_leftsib);
302 bm_cursor->level[level].right_fsbno =
303 be64_to_cpu(block->bb_u.l.bb_rightsib);
304
305 agno = XFS_FSB_TO_AGNO(mp, bno);
306 agbno = XFS_FSB_TO_AGBNO(mp, bno);
307
308 pthread_mutex_lock(&ag_locks[agno].lock);
309 state = get_bmap(agno, agbno);
310 switch (state) {
311 case XR_E_UNKNOWN:
312 case XR_E_FREE1:
313 case XR_E_FREE:
314 set_bmap(agno, agbno, XR_E_INUSE);
315 break;
316 case XR_E_FS_MAP:
317 case XR_E_INUSE:
318 /*
319 * we'll try and continue searching here since
320 * the block looks like it's been claimed by file
321 * to store user data, a directory to store directory
322 * data, or the space allocation btrees but since
323 * we made it here, the block probably
324 * contains btree data.
325 */
326 set_bmap(agno, agbno, XR_E_MULT);
327 do_warn(
328 _("inode 0x%" PRIx64 "bmap block 0x%" PRIx64 " claimed, state is %d\n"),
329 ino, bno, state);
330 break;
331 case XR_E_MULT:
332 case XR_E_INUSE_FS:
333 set_bmap(agno, agbno, XR_E_MULT);
334 do_warn(
335 _("inode 0x%" PRIx64 " bmap block 0x%" PRIx64 " claimed, state is %d\n"),
336 ino, bno, state);
337 /*
338 * if we made it to here, this is probably a bmap block
339 * that is being used by *another* file as a bmap block
340 * so the block will be valid. Both files should be
341 * trashed along with any other file that impinges on
342 * any blocks referenced by either file. So we
343 * continue searching down this btree to mark all
344 * blocks duplicate
345 */
346 break;
347 case XR_E_BAD_STATE:
348 default:
349 do_warn(
350 _("bad state %d, inode %" PRIu64 " bmap block 0x%" PRIx64 "\n"),
351 state, ino, bno);
352 break;
353 }
354 pthread_mutex_unlock(&ag_locks[agno].lock);
355 } else {
356 /*
357 * attribute fork for realtime files is in the regular
358 * filesystem
359 */
360 if (type != XR_INO_RTDATA || whichfork != XFS_DATA_FORK) {
361 if (search_dup_extent(XFS_FSB_TO_AGNO(mp, bno),
362 XFS_FSB_TO_AGBNO(mp, bno),
363 XFS_FSB_TO_AGBNO(mp, bno) + 1))
364 return(1);
365 } else {
366 if (search_rt_dup_extent(mp, bno))
367 return(1);
368 }
369 }
370 (*tot)++;
371 numrecs = be16_to_cpu(block->bb_numrecs);
372
373 if (level == 0) {
374 if (numrecs > mp->m_bmap_dmxr[0] || (isroot == 0 && numrecs <
375 mp->m_bmap_dmnr[0])) {
376 do_warn(
377 _("inode %" PRIu64 " bad # of bmap records (%u, min - %u, max - %u)\n"),
378 ino, numrecs, mp->m_bmap_dmnr[0],
379 mp->m_bmap_dmxr[0]);
380 return(1);
381 }
382 rp = XFS_BMBT_REC_ADDR(mp, block, 1);
383 *nex += numrecs;
384 /*
385 * XXX - if we were going to fix up the btree record,
386 * we'd do it right here. For now, if there's a problem,
387 * we'll bail out and presumably clear the inode.
388 */
389 if (check_dups == 0) {
390 err = process_bmbt_reclist(mp, rp, &numrecs, type, ino,
391 tot, blkmapp, &first_key,
392 &last_key, whichfork);
393 if (err)
394 return 1;
395
396 /*
397 * check that key ordering is monotonically increasing.
398 * if the last_key value in the cursor is set to
399 * NULLFILEOFF, then we know this is the first block
400 * on the leaf level and we shouldn't check the
401 * last_key value.
402 */
403 if (first_key <= bm_cursor->level[level].last_key &&
404 bm_cursor->level[level].last_key !=
405 NULLFILEOFF) {
406 do_warn(
407 _("out-of-order bmap key (file offset) in inode %" PRIu64 ", %s fork, fsbno %" PRIu64 "\n"),
408 ino, forkname, bno);
409 return(1);
410 }
411 /*
412 * update cursor keys to reflect this block.
413 * don't have to check if last_key is > first_key
414 * since that gets checked by process_bmbt_reclist.
415 */
416 bm_cursor->level[level].first_key = first_key;
417 bm_cursor->level[level].last_key = last_key;
418
419 return 0;
420 } else {
421 return scan_bmbt_reclist(mp, rp, &numrecs, type, ino,
422 tot, whichfork);
423 }
424 }
425 if (numrecs > mp->m_bmap_dmxr[1] || (isroot == 0 && numrecs <
426 mp->m_bmap_dmnr[1])) {
427 do_warn(
428 _("inode %" PRIu64 " bad # of bmap records (%u, min - %u, max - %u)\n"),
429 ino, numrecs, mp->m_bmap_dmnr[1], mp->m_bmap_dmxr[1]);
430 return(1);
431 }
432 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
433 pkey = XFS_BMBT_KEY_ADDR(mp, block, 1);
434
435 last_key = NULLFILEOFF;
436
437 for (i = 0, err = 0; i < numrecs; i++) {
438 /*
439 * XXX - if we were going to fix up the interior btree nodes,
440 * we'd do it right here. For now, if there's a problem,
441 * we'll bail out and presumably clear the inode.
442 */
443 if (!verify_dfsbno(mp, be64_to_cpu(pp[i]))) {
444 do_warn(
445 _("bad bmap btree ptr 0x%llx in ino %" PRIu64 "\n"),
446 (unsigned long long) be64_to_cpu(pp[i]), ino);
447 return(1);
448 }
449
450 err = scan_lbtree(be64_to_cpu(pp[i]), level, scan_bmapbt,
451 type, whichfork, ino, tot, nex, blkmapp,
452 bm_cursor, 0, check_dups, magic,
453 &xfs_bmbt_buf_ops);
454 if (err)
455 return(1);
456
457 /*
458 * fix key (offset) mismatches between the first key
459 * in the child block (as recorded in the cursor) and the
460 * key in the interior node referencing the child block.
461 *
462 * fixes cases where entries have been shifted between
463 * child blocks but the parent hasn't been updated. We
464 * don't have to worry about the key values in the cursor
465 * not being set since we only look at the key values of
466 * our child and those are guaranteed to be set by the
467 * call to scan_lbtree() above.
468 */
469 if (check_dups == 0 && be64_to_cpu(pkey[i].br_startoff) !=
470 bm_cursor->level[level-1].first_key) {
471 if (!no_modify) {
472 do_warn(
473 _("correcting bt key (was %llu, now %" PRIu64 ") in inode %" PRIu64 "\n"
474 "\t\t%s fork, btree block %" PRIu64 "\n"),
475 (unsigned long long)
476 be64_to_cpu(pkey[i].br_startoff),
477 bm_cursor->level[level-1].first_key,
478 ino,
479 forkname, bno);
480 *dirty = 1;
481 pkey[i].br_startoff = cpu_to_be64(
482 bm_cursor->level[level-1].first_key);
483 } else {
484 do_warn(
485 _("bad btree key (is %llu, should be %" PRIu64 ") in inode %" PRIu64 "\n"
486 "\t\t%s fork, btree block %" PRIu64 "\n"),
487 (unsigned long long)
488 be64_to_cpu(pkey[i].br_startoff),
489 bm_cursor->level[level-1].first_key,
490 ino, forkname, bno);
491 }
492 }
493 }
494
495 /*
496 * If we're the last node at our level, check that the last child
497 * block's forward sibling pointer is NULL.
498 */
499 if (check_dups == 0 &&
500 bm_cursor->level[level].right_fsbno == NULLFSBLOCK &&
501 bm_cursor->level[level - 1].right_fsbno != NULLFSBLOCK) {
502 do_warn(
503 _("bad fwd (right) sibling pointer (saw %" PRIu64 " should be NULLFSBLOCK)\n"
504 "\tin inode %" PRIu64 " (%s fork) bmap btree block %" PRIu64 "\n"),
505 bm_cursor->level[level - 1].right_fsbno,
506 ino, forkname, bm_cursor->level[level - 1].fsbno);
507 return(1);
508 }
509
510 /*
511 * update cursor keys to reflect this block
512 */
513 if (check_dups == 0) {
514 bm_cursor->level[level].first_key =
515 be64_to_cpu(pkey[0].br_startoff);
516 bm_cursor->level[level].last_key =
517 be64_to_cpu(pkey[numrecs - 1].br_startoff);
518 }
519
520 return(0);
521 }
522
523 static void
524 scan_allocbt(
525 struct xfs_btree_block *block,
526 int level,
527 xfs_agblock_t bno,
528 xfs_agnumber_t agno,
529 int suspect,
530 int isroot,
531 __uint32_t magic,
532 void *priv)
533 {
534 struct aghdr_cnts *agcnts = priv;
535 const char *name;
536 int i;
537 xfs_alloc_ptr_t *pp;
538 xfs_alloc_rec_t *rp;
539 int hdr_errors = 0;
540 int numrecs;
541 int state;
542 xfs_extlen_t lastcount = 0;
543 xfs_agblock_t lastblock = 0;
544
545 switch (magic) {
546 case XFS_ABTB_CRC_MAGIC:
547 case XFS_ABTB_MAGIC:
548 name = "bno";
549 break;
550 case XFS_ABTC_CRC_MAGIC:
551 case XFS_ABTC_MAGIC:
552 name = "cnt";
553 break;
554 default:
555 name = "(unknown)";
556 assert(0);
557 break;
558 }
559
560 if (be32_to_cpu(block->bb_magic) != magic) {
561 do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
562 be32_to_cpu(block->bb_magic), name, agno, bno);
563 hdr_errors++;
564 if (suspect)
565 return;
566 }
567
568 /*
569 * All freespace btree blocks except the roots are freed for a
570 * fully used filesystem, thus they are counted towards the
571 * free data block counter.
572 */
573 if (!isroot) {
574 agcnts->agfbtreeblks++;
575 agcnts->fdblocks++;
576 }
577
578 if (be16_to_cpu(block->bb_level) != level) {
579 do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
580 level, be16_to_cpu(block->bb_level), name, agno, bno);
581 hdr_errors++;
582 if (suspect)
583 return;
584 }
585
586 /*
587 * check for btree blocks multiply claimed
588 */
589 state = get_bmap(agno, bno);
590 if (state != XR_E_UNKNOWN) {
591 set_bmap(agno, bno, XR_E_MULT);
592 do_warn(
593 _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
594 name, state, agno, bno, suspect);
595 return;
596 }
597 set_bmap(agno, bno, XR_E_FS_MAP);
598
599 numrecs = be16_to_cpu(block->bb_numrecs);
600
601 if (level == 0) {
602 if (numrecs > mp->m_alloc_mxr[0]) {
603 numrecs = mp->m_alloc_mxr[0];
604 hdr_errors++;
605 }
606 if (isroot == 0 && numrecs < mp->m_alloc_mnr[0]) {
607 numrecs = mp->m_alloc_mnr[0];
608 hdr_errors++;
609 }
610
611 if (hdr_errors) {
612 do_warn(
613 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
614 be16_to_cpu(block->bb_numrecs),
615 mp->m_alloc_mnr[0], mp->m_alloc_mxr[0],
616 name, agno, bno);
617 suspect++;
618 }
619
620 rp = XFS_ALLOC_REC_ADDR(mp, block, 1);
621 for (i = 0; i < numrecs; i++) {
622 xfs_agblock_t b, end;
623 xfs_extlen_t len, blen;
624
625 b = be32_to_cpu(rp[i].ar_startblock);
626 len = be32_to_cpu(rp[i].ar_blockcount);
627 end = b + len;
628
629 if (b == 0 || !verify_agbno(mp, agno, b)) {
630 do_warn(
631 _("invalid start block %u in record %u of %s btree block %u/%u\n"),
632 b, i, name, agno, bno);
633 continue;
634 }
635 if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
636 do_warn(
637 _("invalid length %u in record %u of %s btree block %u/%u\n"),
638 len, i, name, agno, bno);
639 continue;
640 }
641
642 if (magic == XFS_ABTB_MAGIC ||
643 magic == XFS_ABTB_CRC_MAGIC) {
644 if (b <= lastblock) {
645 do_warn(_(
646 "out-of-order bno btree record %d (%u %u) block %u/%u\n"),
647 i, b, len, agno, bno);
648 } else {
649 lastblock = b;
650 }
651 } else {
652 agcnts->fdblocks += len;
653 agcnts->agffreeblks += len;
654 if (len > agcnts->agflongest)
655 agcnts->agflongest = len;
656 if (len < lastcount) {
657 do_warn(_(
658 "out-of-order cnt btree record %d (%u %u) block %u/%u\n"),
659 i, b, len, agno, bno);
660 } else {
661 lastcount = len;
662 }
663 }
664
665 for ( ; b < end; b += blen) {
666 state = get_bmap_ext(agno, b, end, &blen);
667 switch (state) {
668 case XR_E_UNKNOWN:
669 set_bmap(agno, b, XR_E_FREE1);
670 break;
671 case XR_E_FREE1:
672 /*
673 * no warning messages -- we'll catch
674 * FREE1 blocks later
675 */
676 if (magic == XFS_ABTC_MAGIC ||
677 magic == XFS_ABTC_CRC_MAGIC) {
678 set_bmap_ext(agno, b, blen,
679 XR_E_FREE);
680 break;
681 }
682 default:
683 do_warn(
684 _("block (%d,%d-%d) multiply claimed by %s space tree, state - %d\n"),
685 agno, b, b + blen - 1,
686 name, state);
687 break;
688 }
689 }
690 }
691 return;
692 }
693
694 /*
695 * interior record
696 */
697 pp = XFS_ALLOC_PTR_ADDR(mp, block, 1, mp->m_alloc_mxr[1]);
698
699 if (numrecs > mp->m_alloc_mxr[1]) {
700 numrecs = mp->m_alloc_mxr[1];
701 hdr_errors++;
702 }
703 if (isroot == 0 && numrecs < mp->m_alloc_mnr[1]) {
704 numrecs = mp->m_alloc_mnr[1];
705 hdr_errors++;
706 }
707
708 /*
709 * don't pass bogus tree flag down further if this block
710 * looked ok. bail out if two levels in a row look bad.
711 */
712 if (hdr_errors) {
713 do_warn(
714 _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
715 be16_to_cpu(block->bb_numrecs),
716 mp->m_alloc_mnr[1], mp->m_alloc_mxr[1],
717 name, agno, bno);
718 if (suspect)
719 return;
720 suspect++;
721 } else if (suspect) {
722 suspect = 0;
723 }
724
725 for (i = 0; i < numrecs; i++) {
726 xfs_agblock_t bno = be32_to_cpu(pp[i]);
727
728 /*
729 * XXX - put sibling detection right here.
730 * we know our sibling chain is good. So as we go,
731 * we check the entry before and after each entry.
732 * If either of the entries references a different block,
733 * check the sibling pointer. If there's a sibling
734 * pointer mismatch, try and extract as much data
735 * as possible.
736 */
737 if (bno != 0 && verify_agbno(mp, agno, bno)) {
738 switch (magic) {
739 case XFS_ABTB_CRC_MAGIC:
740 case XFS_ABTB_MAGIC:
741 scan_sbtree(bno, level, agno, suspect,
742 scan_allocbt, 0, magic, priv,
743 &xfs_allocbt_buf_ops);
744 break;
745 case XFS_ABTC_CRC_MAGIC:
746 case XFS_ABTC_MAGIC:
747 scan_sbtree(bno, level, agno, suspect,
748 scan_allocbt, 0, magic, priv,
749 &xfs_allocbt_buf_ops);
750 break;
751 }
752 }
753 }
754 }
755
756 static bool
757 ino_issparse(
758 struct xfs_inobt_rec *rp,
759 int offset)
760 {
761 if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
762 return false;
763
764 return xfs_inobt_is_sparse_disk(rp, offset);
765 }
766
767 static int
768 scan_single_ino_chunk(
769 xfs_agnumber_t agno,
770 xfs_inobt_rec_t *rp,
771 int suspect)
772 {
773 xfs_ino_t lino;
774 xfs_agino_t ino;
775 xfs_agblock_t agbno;
776 int j;
777 int nfree;
778 int off;
779 int state;
780 ino_tree_node_t *ino_rec = NULL;
781 ino_tree_node_t *first_rec, *last_rec;
782 int freecount;
783
784 ino = be32_to_cpu(rp->ir_startino);
785 off = XFS_AGINO_TO_OFFSET(mp, ino);
786 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
787 lino = XFS_AGINO_TO_INO(mp, agno, ino);
788 if (xfs_sb_version_hassparseinodes(&mp->m_sb))
789 freecount = rp->ir_u.sp.ir_freecount;
790 else
791 freecount = be32_to_cpu(rp->ir_u.f.ir_freecount);
792
793 /*
794 * on multi-block block chunks, all chunks start
795 * at the beginning of the block. with multi-chunk
796 * blocks, all chunks must start on 64-inode boundaries
797 * since each block can hold N complete chunks. if
798 * fs has aligned inodes, all chunks must start
799 * at a fs_ino_alignment*N'th agbno. skip recs
800 * with badly aligned starting inodes.
801 */
802 if (ino == 0 ||
803 (inodes_per_block <= XFS_INODES_PER_CHUNK && off != 0) ||
804 (inodes_per_block > XFS_INODES_PER_CHUNK &&
805 off % XFS_INODES_PER_CHUNK != 0) ||
806 (fs_aligned_inodes && fs_ino_alignment &&
807 agbno % fs_ino_alignment != 0)) {
808 do_warn(
809 _("badly aligned inode rec (starting inode = %" PRIu64 ")\n"),
810 lino);
811 suspect++;
812 }
813
814 /*
815 * verify numeric validity of inode chunk first
816 * before inserting into a tree. don't have to
817 * worry about the overflow case because the
818 * starting ino number of a chunk can only get
819 * within 255 inodes of max (NULLAGINO). if it
820 * gets closer, the agino number will be illegal
821 * as the agbno will be too large.
822 */
823 if (verify_aginum(mp, agno, ino)) {
824 do_warn(
825 _("bad starting inode # (%" PRIu64 " (0x%x 0x%x)) in ino rec, skipping rec\n"),
826 lino, agno, ino);
827 return ++suspect;
828 }
829
830 if (verify_aginum(mp, agno,
831 ino + XFS_INODES_PER_CHUNK - 1)) {
832 do_warn(
833 _("bad ending inode # (%" PRIu64 " (0x%x 0x%zx)) in ino rec, skipping rec\n"),
834 lino + XFS_INODES_PER_CHUNK - 1,
835 agno,
836 ino + XFS_INODES_PER_CHUNK - 1);
837 return ++suspect;
838 }
839
840 /*
841 * set state of each block containing inodes
842 */
843 if (off == 0 && !suspect) {
844 for (j = 0;
845 j < XFS_INODES_PER_CHUNK;
846 j += mp->m_sb.sb_inopblock) {
847
848 /* inodes in sparse chunks don't use blocks */
849 if (ino_issparse(rp, j))
850 continue;
851
852 agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
853 state = get_bmap(agno, agbno);
854 if (state == XR_E_UNKNOWN) {
855 set_bmap(agno, agbno, XR_E_INO);
856 } else if (state == XR_E_INUSE_FS && agno == 0 &&
857 ino + j >= first_prealloc_ino &&
858 ino + j < last_prealloc_ino) {
859 set_bmap(agno, agbno, XR_E_INO);
860 } else {
861 do_warn(
862 _("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
863 agno, agbno, mp->m_sb.sb_inopblock);
864 /*
865 * XXX - maybe should mark
866 * block a duplicate
867 */
868 return ++suspect;
869 }
870 }
871 }
872
873 /*
874 * ensure only one avl entry per chunk
875 */
876 find_inode_rec_range(mp, agno, ino, ino + XFS_INODES_PER_CHUNK,
877 &first_rec, &last_rec);
878 if (first_rec != NULL) {
879 /*
880 * this chunk overlaps with one (or more)
881 * already in the tree
882 */
883 do_warn(
884 _("inode rec for ino %" PRIu64 " (%d/%d) overlaps existing rec (start %d/%d)\n"),
885 lino, agno, ino, agno, first_rec->ino_startnum);
886 suspect++;
887
888 /*
889 * if the 2 chunks start at the same place,
890 * then we don't have to put this one
891 * in the uncertain list. go to the next one.
892 */
893 if (first_rec->ino_startnum == ino)
894 return suspect;
895 }
896
897 /*
898 * now mark all the inodes as existing and free or used.
899 * if the tree is suspect, put them into the uncertain
900 * inode tree.
901 */
902 if (!suspect) {
903 if (XFS_INOBT_IS_FREE_DISK(rp, 0)) {
904 ino_rec = set_inode_free_alloc(mp, agno, ino);
905 } else {
906 ino_rec = set_inode_used_alloc(mp, agno, ino);
907 }
908 for (j = 1; j < XFS_INODES_PER_CHUNK; j++) {
909 if (XFS_INOBT_IS_FREE_DISK(rp, j)) {
910 set_inode_free(ino_rec, j);
911 } else {
912 set_inode_used(ino_rec, j);
913 }
914 }
915 } else {
916 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
917 if (XFS_INOBT_IS_FREE_DISK(rp, j)) {
918 add_aginode_uncertain(mp, agno, ino + j, 1);
919 } else {
920 add_aginode_uncertain(mp, agno, ino + j, 0);
921 }
922 }
923 }
924
925 /*
926 * Mark sparse inodes as such in the in-core tree. Verify that sparse
927 * inodes are free and that freecount is consistent with the free mask.
928 */
929 nfree = 0;
930 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
931 if (ino_issparse(rp, j)) {
932 if (!suspect && !XFS_INOBT_IS_FREE_DISK(rp, j)) {
933 do_warn(
934 _("ir_holemask/ir_free mismatch, inode chunk %d/%u, holemask 0x%x free 0x%llx\n"),
935 agno, ino,
936 be16_to_cpu(rp->ir_u.sp.ir_holemask),
937 be64_to_cpu(rp->ir_free));
938 suspect++;
939 }
940 if (!suspect && ino_rec)
941 set_inode_sparse(ino_rec, j);
942 } else if (XFS_INOBT_IS_FREE_DISK(rp, j)) {
943 /* freecount only tracks non-sparse inos */
944 nfree++;
945 }
946 }
947
948 if (nfree != freecount) {
949 do_warn(
950 _("ir_freecount/free mismatch, inode chunk %d/%u, freecount %d nfree %d\n"),
951 agno, ino, freecount, nfree);
952 }
953
954 return suspect;
955 }
956
957 static int
958 scan_single_finobt_chunk(
959 xfs_agnumber_t agno,
960 xfs_inobt_rec_t *rp,
961 int suspect)
962 {
963 xfs_ino_t lino;
964 xfs_agino_t ino;
965 xfs_agblock_t agbno;
966 int j;
967 int nfree;
968 int off;
969 int state;
970 ino_tree_node_t *first_rec, *last_rec, *ino_rec;
971 int freecount;
972
973 ino = be32_to_cpu(rp->ir_startino);
974 off = XFS_AGINO_TO_OFFSET(mp, ino);
975 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
976 lino = XFS_AGINO_TO_INO(mp, agno, ino);
977 if (xfs_sb_version_hassparseinodes(&mp->m_sb))
978 freecount = rp->ir_u.sp.ir_freecount;
979 else
980 freecount = be32_to_cpu(rp->ir_u.f.ir_freecount);
981
982 /*
983 * on multi-block block chunks, all chunks start at the beginning of the
984 * block. with multi-chunk blocks, all chunks must start on 64-inode
985 * boundaries since each block can hold N complete chunks. if fs has
986 * aligned inodes, all chunks must start at a fs_ino_alignment*N'th
987 * agbno. skip recs with badly aligned starting inodes.
988 */
989 if (ino == 0 ||
990 (inodes_per_block <= XFS_INODES_PER_CHUNK && off != 0) ||
991 (inodes_per_block > XFS_INODES_PER_CHUNK &&
992 off % XFS_INODES_PER_CHUNK != 0) ||
993 (fs_aligned_inodes && fs_ino_alignment &&
994 agbno % fs_ino_alignment != 0)) {
995 do_warn(
996 _("badly aligned finobt inode rec (starting inode = %" PRIu64 ")\n"),
997 lino);
998 suspect++;
999 }
1000
1001 /*
1002 * verify numeric validity of inode chunk first before inserting into a
1003 * tree. don't have to worry about the overflow case because the
1004 * starting ino number of a chunk can only get within 255 inodes of max
1005 * (NULLAGINO). if it gets closer, the agino number will be illegal as
1006 * the agbno will be too large.
1007 */
1008 if (verify_aginum(mp, agno, ino)) {
1009 do_warn(
1010 _("bad starting inode # (%" PRIu64 " (0x%x 0x%x)) in finobt rec, skipping rec\n"),
1011 lino, agno, ino);
1012 return ++suspect;
1013 }
1014
1015 if (verify_aginum(mp, agno,
1016 ino + XFS_INODES_PER_CHUNK - 1)) {
1017 do_warn(
1018 _("bad ending inode # (%" PRIu64 " (0x%x 0x%zx)) in finobt rec, skipping rec\n"),
1019 lino + XFS_INODES_PER_CHUNK - 1,
1020 agno,
1021 ino + XFS_INODES_PER_CHUNK - 1);
1022 return ++suspect;
1023 }
1024
1025 /*
1026 * cross check state of each block containing inodes referenced by the
1027 * finobt against what we have already scanned from the alloc inobt.
1028 */
1029 if (off == 0 && !suspect) {
1030 for (j = 0;
1031 j < XFS_INODES_PER_CHUNK;
1032 j += mp->m_sb.sb_inopblock) {
1033 agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
1034
1035 state = get_bmap(agno, agbno);
1036 if (state == XR_E_INO) {
1037 continue;
1038 } else if ((state == XR_E_UNKNOWN) ||
1039 (state == XR_E_INUSE_FS && agno == 0 &&
1040 ino + j >= first_prealloc_ino &&
1041 ino + j < last_prealloc_ino)) {
1042 do_warn(
1043 _("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
1044 agno, agbno, mp->m_sb.sb_inopblock);
1045
1046 set_bmap(agno, agbno, XR_E_INO);
1047 suspect++;
1048 } else {
1049 do_warn(
1050 _("inode chunk claims used block, finobt block - agno %d, bno %d, inopb %d\n"),
1051 agno, agbno, mp->m_sb.sb_inopblock);
1052 return ++suspect;
1053 }
1054 }
1055 }
1056
1057 /*
1058 * ensure we have an incore entry for each chunk
1059 */
1060 find_inode_rec_range(mp, agno, ino, ino + XFS_INODES_PER_CHUNK,
1061 &first_rec, &last_rec);
1062
1063 if (first_rec) {
1064 if (suspect)
1065 return suspect;
1066
1067 /*
1068 * verify consistency between finobt record and incore state
1069 */
1070 if (first_rec->ino_startnum != ino) {
1071 do_warn(
1072 _("finobt rec for ino %" PRIu64 " (%d/%u) does not match existing rec (%d/%d)\n"),
1073 lino, agno, ino, agno, first_rec->ino_startnum);
1074 return ++suspect;
1075 }
1076
1077 nfree = 0;
1078 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
1079 int isfree = XFS_INOBT_IS_FREE_DISK(rp, j);
1080
1081 if (isfree)
1082 nfree++;
1083
1084 /*
1085 * inode allocation state should be consistent between
1086 * the inobt and finobt
1087 */
1088 if (!suspect &&
1089 isfree != is_inode_free(first_rec, j))
1090 suspect++;
1091 }
1092
1093 goto check_freecount;
1094 }
1095
1096 /*
1097 * the finobt contains a record that the previous alloc inobt scan never
1098 * found. insert the inodes into the appropriate tree.
1099 */
1100 do_warn(_("undiscovered finobt record, ino %" PRIu64 " (%d/%u)\n"),
1101 lino, agno, ino);
1102
1103 if (!suspect) {
1104 /*
1105 * inodes previously inserted into the uncertain tree should be
1106 * superceded by these when the uncertain tree is processed
1107 */
1108 nfree = 0;
1109 if (XFS_INOBT_IS_FREE_DISK(rp, 0)) {
1110 nfree++;
1111 ino_rec = set_inode_free_alloc(mp, agno, ino);
1112 } else {
1113 ino_rec = set_inode_used_alloc(mp, agno, ino);
1114 }
1115 for (j = 1; j < XFS_INODES_PER_CHUNK; j++) {
1116 if (XFS_INOBT_IS_FREE_DISK(rp, j)) {
1117 nfree++;
1118 set_inode_free(ino_rec, j);
1119 } else {
1120 set_inode_used(ino_rec, j);
1121 }
1122 }
1123 } else {
1124 /*
1125 * this should handle the case where the inobt scan may have
1126 * already added uncertain inodes
1127 */
1128 nfree = 0;
1129 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
1130 if (XFS_INOBT_IS_FREE_DISK(rp, j)) {
1131 add_aginode_uncertain(mp, agno, ino + j, 1);
1132 nfree++;
1133 } else {
1134 add_aginode_uncertain(mp, agno, ino + j, 0);
1135 }
1136 }
1137 }
1138
1139 check_freecount:
1140
1141 /*
1142 * Verify that the record freecount matches the actual number of free
1143 * inodes counted in the record. Don't increment 'suspect' here, since
1144 * we have already verified the allocation state of the individual
1145 * inodes against the in-core state. This will have already incremented
1146 * 'suspect' if something is wrong. If suspect hasn't been set at this
1147 * point, these warnings mean that we have a simple freecount
1148 * inconsistency or a stray finobt record (as opposed to a broader tree
1149 * corruption). Issue a warning and continue the scan. The final btree
1150 * reconstruction will correct this naturally.
1151 */
1152 if (nfree != freecount) {
1153 do_warn(
1154 _("finobt ir_freecount/free mismatch, inode chunk %d/%u, freecount %d nfree %d\n"),
1155 agno, ino, freecount, nfree);
1156 }
1157
1158 if (!nfree) {
1159 do_warn(
1160 _("finobt record with no free inodes, inode chunk %d/%u\n"), agno, ino);
1161 }
1162
1163 return suspect;
1164 }
1165
1166 /*
1167 * this one walks the inode btrees sucking the info there into
1168 * the incore avl tree. We try and rescue corrupted btree records
1169 * to minimize our chances of losing inodes. Inode info from potentially
1170 * corrupt sources could be bogus so rather than put the info straight
1171 * into the tree, instead we put it on a list and try and verify the
1172 * info in the next phase by examining what's on disk. At that point,
1173 * we'll be able to figure out what's what and stick the corrected info
1174 * into the tree. We do bail out at some point and give up on a subtree
1175 * so as to avoid walking randomly all over the ag.
1176 *
1177 * Note that it's also ok if the free/inuse info wrong, we can correct
1178 * that when we examine the on-disk inode. The important thing is to
1179 * get the start and alignment of the inode chunks right. Those chunks
1180 * that we aren't sure about go into the uncertain list.
1181 */
1182 static void
1183 scan_inobt(
1184 struct xfs_btree_block *block,
1185 int level,
1186 xfs_agblock_t bno,
1187 xfs_agnumber_t agno,
1188 int suspect,
1189 int isroot,
1190 __uint32_t magic,
1191 void *priv)
1192 {
1193 struct aghdr_cnts *agcnts = priv;
1194 int i;
1195 int numrecs;
1196 int state;
1197 xfs_inobt_ptr_t *pp;
1198 xfs_inobt_rec_t *rp;
1199 int hdr_errors;
1200 int freecount;
1201
1202 hdr_errors = 0;
1203
1204 if (be32_to_cpu(block->bb_magic) != magic) {
1205 do_warn(_("bad magic # %#x in inobt block %d/%d\n"),
1206 be32_to_cpu(block->bb_magic), agno, bno);
1207 hdr_errors++;
1208 bad_ino_btree = 1;
1209 if (suspect)
1210 return;
1211 }
1212 if (be16_to_cpu(block->bb_level) != level) {
1213 do_warn(_("expected level %d got %d in inobt block %d/%d\n"),
1214 level, be16_to_cpu(block->bb_level), agno, bno);
1215 hdr_errors++;
1216 bad_ino_btree = 1;
1217 if (suspect)
1218 return;
1219 }
1220
1221 /*
1222 * check for btree blocks multiply claimed, any unknown/free state
1223 * is ok in the bitmap block.
1224 */
1225 state = get_bmap(agno, bno);
1226 switch (state) {
1227 case XR_E_UNKNOWN:
1228 case XR_E_FREE1:
1229 case XR_E_FREE:
1230 set_bmap(agno, bno, XR_E_FS_MAP);
1231 break;
1232 default:
1233 set_bmap(agno, bno, XR_E_MULT);
1234 do_warn(
1235 _("inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
1236 state, agno, bno, suspect);
1237 }
1238
1239 numrecs = be16_to_cpu(block->bb_numrecs);
1240
1241 /*
1242 * leaf record in btree
1243 */
1244 if (level == 0) {
1245 /* check for trashed btree block */
1246
1247 if (numrecs > mp->m_inobt_mxr[0]) {
1248 numrecs = mp->m_inobt_mxr[0];
1249 hdr_errors++;
1250 }
1251 if (isroot == 0 && numrecs < mp->m_inobt_mnr[0]) {
1252 numrecs = mp->m_inobt_mnr[0];
1253 hdr_errors++;
1254 }
1255
1256 if (hdr_errors) {
1257 bad_ino_btree = 1;
1258 do_warn(_("dubious inode btree block header %d/%d\n"),
1259 agno, bno);
1260 suspect++;
1261 }
1262
1263 rp = XFS_INOBT_REC_ADDR(mp, block, 1);
1264
1265 /*
1266 * step through the records, each record points to
1267 * a chunk of inodes. The start of inode chunks should
1268 * be block-aligned. Each inode btree rec should point
1269 * to the start of a block of inodes or the start of a group
1270 * of INODES_PER_CHUNK (64) inodes. off is the offset into
1271 * the block. skip processing of bogus records.
1272 */
1273 for (i = 0; i < numrecs; i++) {
1274 if (xfs_sb_version_hassparseinodes(&mp->m_sb))
1275 freecount = rp[i].ir_u.sp.ir_freecount;
1276 else
1277 freecount = be32_to_cpu(rp[i].ir_u.f.ir_freecount);
1278
1279 if (magic == XFS_IBT_MAGIC ||
1280 magic == XFS_IBT_CRC_MAGIC) {
1281 int icount = XFS_INODES_PER_CHUNK;
1282
1283 /*
1284 * ir_count holds the inode count for all
1285 * records on fs' with sparse inode support
1286 */
1287 if (xfs_sb_version_hassparseinodes(&mp->m_sb))
1288 icount = rp[i].ir_u.sp.ir_count;
1289
1290 agcnts->agicount += icount;
1291 agcnts->agifreecount += freecount;
1292 agcnts->ifreecount += freecount;
1293
1294 suspect = scan_single_ino_chunk(agno, &rp[i],
1295 suspect);
1296 } else {
1297 /*
1298 * the finobt tracks records with free inodes,
1299 * so only the free inode count is expected to be
1300 * consistent with the agi
1301 */
1302 agcnts->fibtfreecount += freecount;
1303
1304 suspect = scan_single_finobt_chunk(agno, &rp[i],
1305 suspect);
1306 }
1307 }
1308
1309 if (suspect)
1310 bad_ino_btree = 1;
1311
1312 return;
1313 }
1314
1315 /*
1316 * interior record, continue on
1317 */
1318 if (numrecs > mp->m_inobt_mxr[1]) {
1319 numrecs = mp->m_inobt_mxr[1];
1320 hdr_errors++;
1321 }
1322 if (isroot == 0 && numrecs < mp->m_inobt_mnr[1]) {
1323 numrecs = mp->m_inobt_mnr[1];
1324 hdr_errors++;
1325 }
1326
1327 pp = XFS_INOBT_PTR_ADDR(mp, block, 1, mp->m_inobt_mxr[1]);
1328
1329 /*
1330 * don't pass bogus tree flag down further if this block
1331 * looked ok. bail out if two levels in a row look bad.
1332 */
1333
1334 if (suspect && !hdr_errors)
1335 suspect = 0;
1336
1337 if (hdr_errors) {
1338 bad_ino_btree = 1;
1339 if (suspect)
1340 return;
1341 else suspect++;
1342 }
1343
1344 for (i = 0; i < numrecs; i++) {
1345 if (be32_to_cpu(pp[i]) != 0 && verify_agbno(mp, agno,
1346 be32_to_cpu(pp[i])))
1347 scan_sbtree(be32_to_cpu(pp[i]), level, agno,
1348 suspect, scan_inobt, 0, magic, priv,
1349 &xfs_inobt_buf_ops);
1350 }
1351 }
1352
1353 static void
1354 scan_freelist(
1355 xfs_agf_t *agf,
1356 struct aghdr_cnts *agcnts)
1357 {
1358 xfs_buf_t *agflbuf;
1359 xfs_agnumber_t agno;
1360 xfs_agblock_t bno;
1361 int count;
1362 int i;
1363 __be32 *freelist;
1364
1365 agno = be32_to_cpu(agf->agf_seqno);
1366
1367 if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
1368 XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
1369 XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
1370 set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
1371
1372 if (be32_to_cpu(agf->agf_flcount) == 0)
1373 return;
1374
1375 agflbuf = libxfs_readbuf(mp->m_dev,
1376 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
1377 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agfl_buf_ops);
1378 if (!agflbuf) {
1379 do_abort(_("can't read agfl block for ag %d\n"), agno);
1380 return;
1381 }
1382 if (agflbuf->b_error == EFSBADCRC)
1383 do_warn(_("agfl has bad CRC for ag %d\n"), agno);
1384
1385 freelist = XFS_BUF_TO_AGFL_BNO(mp, agflbuf);
1386 i = be32_to_cpu(agf->agf_flfirst);
1387
1388 if (no_modify) {
1389 /* agf values not fixed in verify_set_agf, so recheck */
1390 if (be32_to_cpu(agf->agf_flfirst) >= XFS_AGFL_SIZE(mp) ||
1391 be32_to_cpu(agf->agf_fllast) >= XFS_AGFL_SIZE(mp)) {
1392 do_warn(_("agf %d freelist blocks bad, skipping "
1393 "freelist scan\n"), i);
1394 return;
1395 }
1396 }
1397
1398 count = 0;
1399 for (;;) {
1400 bno = be32_to_cpu(freelist[i]);
1401 if (verify_agbno(mp, agno, bno))
1402 set_bmap(agno, bno, XR_E_FREE);
1403 else
1404 do_warn(_("bad agbno %u in agfl, agno %d\n"),
1405 bno, agno);
1406 count++;
1407 if (i == be32_to_cpu(agf->agf_fllast))
1408 break;
1409 if (++i == XFS_AGFL_SIZE(mp))
1410 i = 0;
1411 }
1412 if (count != be32_to_cpu(agf->agf_flcount)) {
1413 do_warn(_("freeblk count %d != flcount %d in ag %d\n"), count,
1414 be32_to_cpu(agf->agf_flcount), agno);
1415 }
1416
1417 agcnts->fdblocks += count;
1418
1419 libxfs_putbuf(agflbuf);
1420 }
1421
1422 static void
1423 validate_agf(
1424 struct xfs_agf *agf,
1425 xfs_agnumber_t agno,
1426 struct aghdr_cnts *agcnts)
1427 {
1428 xfs_agblock_t bno;
1429 __uint32_t magic;
1430
1431 bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]);
1432 if (bno != 0 && verify_agbno(mp, agno, bno)) {
1433 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_ABTB_CRC_MAGIC
1434 : XFS_ABTB_MAGIC;
1435 scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
1436 agno, 0, scan_allocbt, 1, magic, agcnts,
1437 &xfs_allocbt_buf_ops);
1438 } else {
1439 do_warn(_("bad agbno %u for btbno root, agno %d\n"),
1440 bno, agno);
1441 }
1442
1443 bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]);
1444 if (bno != 0 && verify_agbno(mp, agno, bno)) {
1445 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_ABTC_CRC_MAGIC
1446 : XFS_ABTC_MAGIC;
1447 scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
1448 agno, 0, scan_allocbt, 1, magic, agcnts,
1449 &xfs_allocbt_buf_ops);
1450 } else {
1451 do_warn(_("bad agbno %u for btbcnt root, agno %d\n"),
1452 bno, agno);
1453 }
1454
1455 if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) {
1456 do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
1457 be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, agno);
1458 }
1459
1460 if (be32_to_cpu(agf->agf_longest) != agcnts->agflongest) {
1461 do_warn(_("agf_longest %u, counted %u in ag %u\n"),
1462 be32_to_cpu(agf->agf_longest), agcnts->agflongest, agno);
1463 }
1464
1465 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
1466 be32_to_cpu(agf->agf_btreeblks) != agcnts->agfbtreeblks) {
1467 do_warn(_("agf_btreeblks %u, counted %" PRIu64 " in ag %u\n"),
1468 be32_to_cpu(agf->agf_btreeblks), agcnts->agfbtreeblks, agno);
1469 }
1470 }
1471
1472 static void
1473 validate_agi(
1474 struct xfs_agi *agi,
1475 xfs_agnumber_t agno,
1476 struct aghdr_cnts *agcnts)
1477 {
1478 xfs_agblock_t bno;
1479 int i;
1480 __uint32_t magic;
1481
1482 bno = be32_to_cpu(agi->agi_root);
1483 if (bno != 0 && verify_agbno(mp, agno, bno)) {
1484 magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_IBT_CRC_MAGIC
1485 : XFS_IBT_MAGIC;
1486 scan_sbtree(bno, be32_to_cpu(agi->agi_level),
1487 agno, 0, scan_inobt, 1, magic, agcnts,
1488 &xfs_inobt_buf_ops);
1489 } else {
1490 do_warn(_("bad agbno %u for inobt root, agno %d\n"),
1491 be32_to_cpu(agi->agi_root), agno);
1492 }
1493
1494 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
1495 bno = be32_to_cpu(agi->agi_free_root);
1496 if (bno != 0 && verify_agbno(mp, agno, bno)) {
1497 magic = xfs_sb_version_hascrc(&mp->m_sb) ?
1498 XFS_FIBT_CRC_MAGIC : XFS_FIBT_MAGIC;
1499 scan_sbtree(bno, be32_to_cpu(agi->agi_free_level),
1500 agno, 0, scan_inobt, 1, magic, agcnts,
1501 &xfs_inobt_buf_ops);
1502 } else {
1503 do_warn(_("bad agbno %u for finobt root, agno %d\n"),
1504 be32_to_cpu(agi->agi_free_root), agno);
1505 }
1506 }
1507
1508 if (be32_to_cpu(agi->agi_count) != agcnts->agicount) {
1509 do_warn(_("agi_count %u, counted %u in ag %u\n"),
1510 be32_to_cpu(agi->agi_count), agcnts->agicount, agno);
1511 }
1512
1513 if (be32_to_cpu(agi->agi_freecount) != agcnts->agifreecount) {
1514 do_warn(_("agi_freecount %u, counted %u in ag %u\n"),
1515 be32_to_cpu(agi->agi_freecount), agcnts->agifreecount, agno);
1516 }
1517
1518 if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
1519 be32_to_cpu(agi->agi_freecount) != agcnts->fibtfreecount) {
1520 do_warn(_("agi_freecount %u, counted %u in ag %u finobt\n"),
1521 be32_to_cpu(agi->agi_freecount), agcnts->fibtfreecount,
1522 agno);
1523 }
1524
1525 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
1526 xfs_agino_t agino = be32_to_cpu(agi->agi_unlinked[i]);
1527
1528 if (agino != NULLAGINO) {
1529 do_warn(
1530 _("agi unlinked bucket %d is %u in ag %u (inode=%" PRIu64 ")\n"),
1531 i, agino, agno,
1532 XFS_AGINO_TO_INO(mp, agno, agino));
1533 }
1534 }
1535 }
1536
1537 /*
1538 * Scan an AG for obvious corruption.
1539 */
1540 static void
1541 scan_ag(
1542 work_queue_t *wq,
1543 xfs_agnumber_t agno,
1544 void *arg)
1545 {
1546 struct aghdr_cnts *agcnts = arg;
1547 struct xfs_agf *agf;
1548 struct xfs_buf *agfbuf = NULL;
1549 int agf_dirty = 0;
1550 struct xfs_agi *agi;
1551 struct xfs_buf *agibuf = NULL;
1552 int agi_dirty = 0;
1553 struct xfs_sb *sb = NULL;
1554 struct xfs_buf *sbbuf = NULL;
1555 int sb_dirty = 0;
1556 int status;
1557 char *objname = NULL;
1558
1559 sb = (struct xfs_sb *)calloc(BBTOB(XFS_FSS_TO_BB(mp, 1)), 1);
1560 if (!sb) {
1561 do_error(_("can't allocate memory for superblock\n"));
1562 return;
1563 }
1564
1565 sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
1566 XFS_FSS_TO_BB(mp, 1), 0, &xfs_sb_buf_ops);
1567 if (!sbbuf) {
1568 objname = _("root superblock");
1569 goto out_free_sb;
1570 }
1571 libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
1572 libxfs_sb_quota_from_disk(sb);
1573
1574 agfbuf = libxfs_readbuf(mp->m_dev,
1575 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
1576 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agf_buf_ops);
1577 if (!agfbuf) {
1578 objname = _("agf block");
1579 goto out_free_sbbuf;
1580 }
1581 agf = XFS_BUF_TO_AGF(agfbuf);
1582
1583 agibuf = libxfs_readbuf(mp->m_dev,
1584 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
1585 XFS_FSS_TO_BB(mp, 1), 0, &xfs_agi_buf_ops);
1586 if (!agibuf) {
1587 objname = _("agi block");
1588 goto out_free_agfbuf;
1589 }
1590 agi = XFS_BUF_TO_AGI(agibuf);
1591
1592 /* fix up bad ag headers */
1593
1594 status = verify_set_agheader(mp, sbbuf, sb, agf, agi, agno);
1595
1596 if (status & XR_AG_SB_SEC) {
1597 if (!no_modify)
1598 sb_dirty = 1;
1599 /*
1600 * clear bad sector bit because we don't want
1601 * to skip further processing. we just want to
1602 * ensure that we write out the modified sb buffer.
1603 */
1604 status &= ~XR_AG_SB_SEC;
1605 }
1606 if (status & XR_AG_SB) {
1607 if (!no_modify) {
1608 do_warn(_("reset bad sb for ag %d\n"), agno);
1609 sb_dirty = 1;
1610 } else {
1611 do_warn(_("would reset bad sb for ag %d\n"), agno);
1612 }
1613 }
1614 if (status & XR_AG_AGF) {
1615 if (!no_modify) {
1616 do_warn(_("reset bad agf for ag %d\n"), agno);
1617 agf_dirty = 1;
1618 } else {
1619 do_warn(_("would reset bad agf for ag %d\n"), agno);
1620 }
1621 }
1622 if (status & XR_AG_AGI) {
1623 if (!no_modify) {
1624 do_warn(_("reset bad agi for ag %d\n"), agno);
1625 agi_dirty = 1;
1626 } else {
1627 do_warn(_("would reset bad agi for ag %d\n"), agno);
1628 }
1629 }
1630
1631 if (status && no_modify) {
1632 do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
1633 agno);
1634 goto out_free_agibuf;
1635 }
1636
1637 scan_freelist(agf, agcnts);
1638
1639 validate_agf(agf, agno, agcnts);
1640 validate_agi(agi, agno, agcnts);
1641
1642 ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
1643 ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
1644 ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
1645
1646 /*
1647 * Only pay attention to CRC/verifier errors if we can correct them.
1648 * Note that we can get uncorrected EFSCORRUPTED errors here because
1649 * the verifier will flag on out of range values that we can't correct
1650 * until phase 5 when we have all the information necessary to rebuild
1651 * the freespace/inode btrees. We can correct bad CRC errors
1652 * immediately, though.
1653 */
1654 if (!no_modify) {
1655 agi_dirty += (agibuf->b_error == EFSBADCRC);
1656 agf_dirty += (agfbuf->b_error == EFSBADCRC);
1657 sb_dirty += (sbbuf->b_error == EFSBADCRC);
1658 }
1659
1660 if (agi_dirty && !no_modify)
1661 libxfs_writebuf(agibuf, 0);
1662 else
1663 libxfs_putbuf(agibuf);
1664
1665 if (agf_dirty && !no_modify)
1666 libxfs_writebuf(agfbuf, 0);
1667 else
1668 libxfs_putbuf(agfbuf);
1669
1670 if (sb_dirty && !no_modify) {
1671 if (agno == 0)
1672 memcpy(&mp->m_sb, sb, sizeof(xfs_sb_t));
1673 libxfs_sb_to_disk(XFS_BUF_TO_SBP(sbbuf), sb);
1674 libxfs_writebuf(sbbuf, 0);
1675 } else
1676 libxfs_putbuf(sbbuf);
1677 free(sb);
1678 PROG_RPT_INC(prog_rpt_done[agno], 1);
1679
1680 #ifdef XR_INODE_TRACE
1681 print_inode_list(i);
1682 #endif
1683 return;
1684
1685 out_free_agibuf:
1686 libxfs_putbuf(agibuf);
1687 out_free_agfbuf:
1688 libxfs_putbuf(agfbuf);
1689 out_free_sbbuf:
1690 libxfs_putbuf(sbbuf);
1691 out_free_sb:
1692 free(sb);
1693
1694 if (objname)
1695 do_error(_("can't get %s for ag %d\n"), objname, agno);
1696 }
1697
1698 #define SCAN_THREADS 32
1699
1700 void
1701 scan_ags(
1702 struct xfs_mount *mp,
1703 int scan_threads)
1704 {
1705 struct aghdr_cnts *agcnts;
1706 __uint64_t fdblocks = 0;
1707 __uint64_t icount = 0;
1708 __uint64_t ifreecount = 0;
1709 xfs_agnumber_t i;
1710 work_queue_t wq;
1711
1712 agcnts = malloc(mp->m_sb.sb_agcount * sizeof(*agcnts));
1713 if (!agcnts) {
1714 do_abort(_("no memory for ag header counts\n"));
1715 return;
1716 }
1717 memset(agcnts, 0, mp->m_sb.sb_agcount * sizeof(*agcnts));
1718
1719 create_work_queue(&wq, mp, scan_threads);
1720
1721 for (i = 0; i < mp->m_sb.sb_agcount; i++)
1722 queue_work(&wq, scan_ag, i, &agcnts[i]);
1723
1724 destroy_work_queue(&wq);
1725
1726 /* tally up the counts */
1727 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
1728 fdblocks += agcnts[i].fdblocks;
1729 icount += agcnts[i].agicount;
1730 ifreecount += agcnts[i].ifreecount;
1731 }
1732
1733 free(agcnts);
1734
1735 /*
1736 * Validate that our manual counts match the superblock.
1737 */
1738 if (mp->m_sb.sb_icount != icount) {
1739 do_warn(_("sb_icount %" PRIu64 ", counted %" PRIu64 "\n"),
1740 mp->m_sb.sb_icount, icount);
1741 }
1742
1743 if (mp->m_sb.sb_ifree != ifreecount) {
1744 do_warn(_("sb_ifree %" PRIu64 ", counted %" PRIu64 "\n"),
1745 mp->m_sb.sb_ifree, ifreecount);
1746 }
1747
1748 if (mp->m_sb.sb_fdblocks != fdblocks) {
1749 do_warn(_("sb_fdblocks %" PRIu64 ", counted %" PRIu64 "\n"),
1750 mp->m_sb.sb_fdblocks, fdblocks);
1751 }
1752 }
1753