]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/scan.c
Merge branch 'repair-speedup-20091112'
[thirdparty/xfsprogs-dev.git] / repair / scan.c
1 /*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include <libxfs.h>
20 #include "avl.h"
21 #include "globals.h"
22 #include "agheader.h"
23 #include "incore.h"
24 #include "protos.h"
25 #include "err_protos.h"
26 #include "dinode.h"
27 #include "scan.h"
28 #include "versions.h"
29 #include "bmap.h"
30 #include "progress.h"
31
32 extern int verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb,
33 xfs_agf_t *agf, xfs_agi_t *agi, xfs_agnumber_t i);
34
35 static xfs_mount_t *mp = NULL;
36
37 void
38 set_mp(xfs_mount_t *mpp)
39 {
40 libxfs_bcache_purge();
41 mp = mpp;
42 }
43
44 void
45 scan_sbtree(
46 xfs_agblock_t root,
47 int nlevels,
48 xfs_agnumber_t agno,
49 int suspect,
50 void (*func)(struct xfs_btree_block *block,
51 int level,
52 xfs_agblock_t bno,
53 xfs_agnumber_t agno,
54 int suspect,
55 int isroot),
56 int isroot)
57 {
58 xfs_buf_t *bp;
59
60 bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, root),
61 XFS_FSB_TO_BB(mp, 1), 0);
62 if (!bp) {
63 do_error(_("can't read btree block %d/%d\n"), agno, root);
64 return;
65 }
66 (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1, root, agno, suspect, isroot);
67 libxfs_putbuf(bp);
68 }
69
70 /*
71 * returns 1 on bad news (inode needs to be cleared), 0 on good
72 */
73 int
74 scan_lbtree(
75 xfs_dfsbno_t root,
76 int nlevels,
77 int (*func)(struct xfs_btree_block *block,
78 int level,
79 int type,
80 int whichfork,
81 xfs_dfsbno_t bno,
82 xfs_ino_t ino,
83 xfs_drfsbno_t *tot,
84 __uint64_t *nex,
85 blkmap_t **blkmapp,
86 bmap_cursor_t *bm_cursor,
87 int isroot,
88 int check_dups,
89 int *dirty),
90 int type,
91 int whichfork,
92 xfs_ino_t ino,
93 xfs_drfsbno_t *tot,
94 __uint64_t *nex,
95 blkmap_t **blkmapp,
96 bmap_cursor_t *bm_cursor,
97 int isroot,
98 int check_dups)
99 {
100 xfs_buf_t *bp;
101 int err;
102 int dirty = 0;
103
104 bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, root),
105 XFS_FSB_TO_BB(mp, 1), 0);
106 if (!bp) {
107 do_error(_("can't read btree block %d/%d\n"),
108 XFS_FSB_TO_AGNO(mp, root),
109 XFS_FSB_TO_AGBNO(mp, root));
110 return(1);
111 }
112 err = (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1,
113 type, whichfork, root, ino, tot, nex, blkmapp,
114 bm_cursor, isroot, check_dups, &dirty);
115
116 ASSERT(dirty == 0 || (dirty && !no_modify));
117
118 if (dirty && !no_modify)
119 libxfs_writebuf(bp, 0);
120 else
121 libxfs_putbuf(bp);
122
123 return(err);
124 }
125
126 int
127 scanfunc_bmap(
128 struct xfs_btree_block *block,
129 int level,
130 int type,
131 int whichfork,
132 xfs_dfsbno_t bno,
133 xfs_ino_t ino,
134 xfs_drfsbno_t *tot,
135 __uint64_t *nex,
136 blkmap_t **blkmapp,
137 bmap_cursor_t *bm_cursor,
138 int isroot,
139 int check_dups,
140 int *dirty)
141 {
142 int i;
143 int err;
144 xfs_bmbt_ptr_t *pp;
145 xfs_bmbt_key_t *pkey;
146 xfs_bmbt_rec_t *rp;
147 xfs_dfiloff_t first_key;
148 xfs_dfiloff_t last_key;
149 char *forkname;
150 int numrecs;
151 xfs_agnumber_t agno;
152 xfs_agblock_t agbno;
153 int state;
154
155 if (whichfork == XFS_DATA_FORK)
156 forkname = _("data");
157 else
158 forkname = _("attr");
159
160 /*
161 * unlike the ag freeblock btrees, if anything looks wrong
162 * in an inode bmap tree, just bail. it's possible that
163 * we'll miss a case where the to-be-toasted inode and
164 * another inode are claiming the same block but that's
165 * highly unlikely.
166 */
167 if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC) {
168 do_warn(_("bad magic # %#x in inode %llu (%s fork) bmbt "
169 "block %llu\n"), be32_to_cpu(block->bb_magic),
170 ino, forkname, bno);
171 return(1);
172 }
173 if (be16_to_cpu(block->bb_level) != level) {
174 do_warn(_("expected level %d got %d in inode %llu, (%s fork) "
175 "bmbt block %llu\n"), level,
176 be16_to_cpu(block->bb_level), ino, forkname, bno);
177 return(1);
178 }
179
180 if (check_dups == 0) {
181 /*
182 * check sibling pointers. if bad we have a conflict
183 * between the sibling pointers and the child pointers
184 * in the parent block. blow out the inode if that happens
185 */
186 if (bm_cursor->level[level].fsbno != NULLDFSBNO) {
187 /*
188 * this is not the first block on this level
189 * so the cursor for this level has recorded the
190 * values for this's block left-sibling.
191 */
192 if (bno != bm_cursor->level[level].right_fsbno) {
193 do_warn(
194 _("bad fwd (right) sibling pointer (saw %llu parent block says %llu)\n"
195 "\tin inode %llu (%s fork) bmap btree block %llu\n"),
196 bm_cursor->level[level].right_fsbno,
197 bno, ino, forkname,
198 bm_cursor->level[level].fsbno);
199 return(1);
200 }
201 if (be64_to_cpu(block->bb_u.l.bb_leftsib) !=
202 bm_cursor->level[level].fsbno) {
203 do_warn(
204 _("bad back (left) sibling pointer (saw %llu parent block says %llu)\n"
205 "\tin inode %llu (%s fork) bmap btree block %llu\n"),
206 be64_to_cpu(block->bb_u.l.bb_leftsib),
207 bm_cursor->level[level].fsbno,
208 ino, forkname, bno);
209 return(1);
210 }
211 } else {
212 /*
213 * This is the first or only block on this level.
214 * Check that the left sibling pointer is NULL
215 */
216 if (be64_to_cpu(block->bb_u.l.bb_leftsib) != NULLDFSBNO) {
217 do_warn(
218 _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
219 "\tin inode %llu (%s fork) bmap btree block %llu\n"),
220 be64_to_cpu(block->bb_u.l.bb_leftsib),
221 ino, forkname, bno);
222 return(1);
223 }
224 }
225
226 /*
227 * update cursor block pointers to reflect this block
228 */
229 bm_cursor->level[level].fsbno = bno;
230 bm_cursor->level[level].left_fsbno =
231 be64_to_cpu(block->bb_u.l.bb_leftsib);
232 bm_cursor->level[level].right_fsbno =
233 be64_to_cpu(block->bb_u.l.bb_rightsib);
234
235 agno = XFS_FSB_TO_AGNO(mp, bno);
236 agbno = XFS_FSB_TO_AGBNO(mp, bno);
237
238 pthread_mutex_lock(&ag_locks[agno]);
239 state = get_bmap(agno, agbno);
240 switch (state) {
241 case XR_E_UNKNOWN:
242 case XR_E_FREE1:
243 case XR_E_FREE:
244 set_bmap(agno, agbno, XR_E_INUSE);
245 break;
246 case XR_E_FS_MAP:
247 case XR_E_INUSE:
248 /*
249 * we'll try and continue searching here since
250 * the block looks like it's been claimed by file
251 * to store user data, a directory to store directory
252 * data, or the space allocation btrees but since
253 * we made it here, the block probably
254 * contains btree data.
255 */
256 set_bmap(agno, agbno, XR_E_MULT);
257 do_warn(
258 _("inode 0x%llx bmap block 0x%llx claimed, state is %d\n"),
259 ino, (__uint64_t) bno, state);
260 break;
261 case XR_E_MULT:
262 case XR_E_INUSE_FS:
263 set_bmap(agno, agbno, XR_E_MULT);
264 do_warn(
265 _("inode 0x%llx bmap block 0x%llx claimed, state is %d\n"),
266 ino, (__uint64_t) bno, state);
267 /*
268 * if we made it to here, this is probably a bmap block
269 * that is being used by *another* file as a bmap block
270 * so the block will be valid. Both files should be
271 * trashed along with any other file that impinges on
272 * any blocks referenced by either file. So we
273 * continue searching down this btree to mark all
274 * blocks duplicate
275 */
276 break;
277 case XR_E_BAD_STATE:
278 default:
279 do_warn(
280 _("bad state %d, inode 0x%llx bmap block 0x%llx\n"),
281 state, ino, (__uint64_t) bno);
282 break;
283 }
284 pthread_mutex_unlock(&ag_locks[agno]);
285 } else {
286 /*
287 * attribute fork for realtime files is in the regular
288 * filesystem
289 */
290 if (type != XR_INO_RTDATA || whichfork != XFS_DATA_FORK) {
291 if (search_dup_extent(XFS_FSB_TO_AGNO(mp, bno),
292 XFS_FSB_TO_AGBNO(mp, bno),
293 XFS_FSB_TO_AGBNO(mp, bno) + 1))
294 return(1);
295 } else {
296 if (search_rt_dup_extent(mp, bno))
297 return(1);
298 }
299 }
300 (*tot)++;
301 numrecs = be16_to_cpu(block->bb_numrecs);
302
303 if (level == 0) {
304 if (numrecs > mp->m_bmap_dmxr[0] || (isroot == 0 && numrecs <
305 mp->m_bmap_dmnr[0])) {
306 do_warn(
307 _("inode 0x%llx bad # of bmap records (%u, min - %u, max - %u)\n"),
308 ino, numrecs, mp->m_bmap_dmnr[0],
309 mp->m_bmap_dmxr[0]);
310 return(1);
311 }
312 rp = XFS_BMBT_REC_ADDR(mp, block, 1);
313 *nex += numrecs;
314 /*
315 * XXX - if we were going to fix up the btree record,
316 * we'd do it right here. For now, if there's a problem,
317 * we'll bail out and presumably clear the inode.
318 */
319 if (check_dups == 0) {
320 err = process_bmbt_reclist(mp, rp, numrecs,
321 type, ino, tot, blkmapp,
322 &first_key, &last_key,
323 whichfork);
324 if (err)
325 return(1);
326 /*
327 * check that key ordering is monotonically increasing.
328 * if the last_key value in the cursor is set to
329 * NULLDFILOFF, then we know this is the first block
330 * on the leaf level and we shouldn't check the
331 * last_key value.
332 */
333 if (first_key <= bm_cursor->level[level].last_key &&
334 bm_cursor->level[level].last_key !=
335 NULLDFILOFF) {
336 do_warn(
337 _("out-of-order bmap key (file offset) in inode %llu, %s fork, fsbno %llu\n"),
338 ino, forkname, bno);
339 return(1);
340 }
341 /*
342 * update cursor keys to reflect this block.
343 * don't have to check if last_key is > first_key
344 * since that gets checked by process_bmbt_reclist.
345 */
346 bm_cursor->level[level].first_key = first_key;
347 bm_cursor->level[level].last_key = last_key;
348
349 return(0);
350 } else
351 return(scan_bmbt_reclist(mp, rp, numrecs,
352 type, ino, tot, whichfork));
353 }
354 if (numrecs > mp->m_bmap_dmxr[1] || (isroot == 0 && numrecs <
355 mp->m_bmap_dmnr[1])) {
356 do_warn(
357 _("inode 0x%llx bad # of bmap records (%u, min - %u, max - %u)\n"),
358 ino, numrecs, mp->m_bmap_dmnr[1], mp->m_bmap_dmxr[1]);
359 return(1);
360 }
361 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
362 pkey = XFS_BMBT_KEY_ADDR(mp, block, 1);
363
364 last_key = NULLDFILOFF;
365
366 for (i = 0, err = 0; i < numrecs; i++) {
367 /*
368 * XXX - if we were going to fix up the interior btree nodes,
369 * we'd do it right here. For now, if there's a problem,
370 * we'll bail out and presumably clear the inode.
371 */
372 if (!verify_dfsbno(mp, be64_to_cpu(pp[i]))) {
373 do_warn(_("bad bmap btree ptr 0x%llx in ino %llu\n"),
374 be64_to_cpu(pp[i]), ino);
375 return(1);
376 }
377
378 err = scan_lbtree(be64_to_cpu(pp[i]), level, scanfunc_bmap,
379 type, whichfork, ino, tot, nex, blkmapp,
380 bm_cursor, 0, check_dups);
381 if (err)
382 return(1);
383
384 /*
385 * fix key (offset) mismatches between the first key
386 * in the child block (as recorded in the cursor) and the
387 * key in the interior node referencing the child block.
388 *
389 * fixes cases where entries have been shifted between
390 * child blocks but the parent hasn't been updated. We
391 * don't have to worry about the key values in the cursor
392 * not being set since we only look at the key values of
393 * our child and those are guaranteed to be set by the
394 * call to scan_lbtree() above.
395 */
396 if (check_dups == 0 && be64_to_cpu(pkey[i].br_startoff) !=
397 bm_cursor->level[level-1].first_key) {
398 if (!no_modify) {
399 do_warn(
400 _("correcting bt key (was %llu, now %llu) in inode %llu\n"
401 "\t\t%s fork, btree block %llu\n"),
402 be64_to_cpu(pkey[i].br_startoff),
403 bm_cursor->level[level-1].first_key,
404 ino,
405 forkname, bno);
406 *dirty = 1;
407 pkey[i].br_startoff = cpu_to_be64(
408 bm_cursor->level[level-1].first_key);
409 } else {
410 do_warn(
411 _("bad btree key (is %llu, should be %llu) in inode %llu\n"
412 "\t\t%s fork, btree block %llu\n"),
413 be64_to_cpu(pkey[i].br_startoff),
414 bm_cursor->level[level-1].first_key,
415 ino, forkname, bno);
416 }
417 }
418 }
419
420 /*
421 * If we're the last node at our level, check that the last child
422 * block's forward sibling pointer is NULL.
423 */
424 if (check_dups == 0 &&
425 bm_cursor->level[level].right_fsbno == NULLDFSBNO &&
426 bm_cursor->level[level - 1].right_fsbno != NULLDFSBNO) {
427 do_warn(
428 _("bad fwd (right) sibling pointer (saw %llu should be NULLDFSBNO)\n"
429 "\tin inode %llu (%s fork) bmap btree block %llu\n"),
430 bm_cursor->level[level - 1].right_fsbno,
431 ino, forkname, bm_cursor->level[level - 1].fsbno);
432 return(1);
433 }
434
435 /*
436 * update cursor keys to reflect this block
437 */
438 if (check_dups == 0) {
439 bm_cursor->level[level].first_key =
440 be64_to_cpu(pkey[0].br_startoff);
441 bm_cursor->level[level].last_key =
442 be64_to_cpu(pkey[numrecs - 1].br_startoff);
443 }
444
445 return(0);
446 }
447
448 void
449 scanfunc_allocbt(
450 struct xfs_btree_block *block,
451 int level,
452 xfs_agblock_t bno,
453 xfs_agnumber_t agno,
454 int suspect,
455 int isroot,
456 __uint32_t magic)
457 {
458 const char *name;
459 int i;
460 xfs_alloc_ptr_t *pp;
461 xfs_alloc_rec_t *rp;
462 int hdr_errors = 0;
463 int numrecs;
464 int state;
465
466 assert(magic == XFS_ABTB_MAGIC || magic == XFS_ABTC_MAGIC);
467
468 name = (magic == XFS_ABTB_MAGIC) ? "bno" : "cnt";
469
470 if (be32_to_cpu(block->bb_magic) != magic) {
471 do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
472 be32_to_cpu(block->bb_magic), name, agno, bno);
473 hdr_errors++;
474 if (suspect)
475 return;
476 }
477 if (be16_to_cpu(block->bb_level) != level) {
478 do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
479 level, be16_to_cpu(block->bb_level), name, agno, bno);
480 hdr_errors++;
481 if (suspect)
482 return;
483 }
484
485 /*
486 * check for btree blocks multiply claimed
487 */
488 state = get_bmap(agno, bno);
489 if (state != XR_E_UNKNOWN) {
490 set_bmap(agno, bno, XR_E_MULT);
491 do_warn(
492 _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
493 name, state, agno, bno, suspect);
494 return;
495 }
496 set_bmap(agno, bno, XR_E_FS_MAP);
497
498 numrecs = be16_to_cpu(block->bb_numrecs);
499
500 if (level == 0) {
501
502 if (numrecs > mp->m_alloc_mxr[0]) {
503 numrecs = mp->m_alloc_mxr[0];
504 hdr_errors++;
505 }
506 if (isroot == 0 && numrecs < mp->m_alloc_mnr[0]) {
507 numrecs = mp->m_alloc_mnr[0];
508 hdr_errors++;
509 }
510
511 if (hdr_errors)
512 suspect++;
513
514 rp = XFS_ALLOC_REC_ADDR(mp, block, 1);
515 for (i = 0; i < numrecs; i++) {
516 xfs_agblock_t b, end;
517 xfs_extlen_t len, blen;
518
519 b = be32_to_cpu(rp[i].ar_startblock);
520 len = be32_to_cpu(rp[i].ar_blockcount);
521 end = b + len;
522
523 if (b == 0 || !verify_agbno(mp, agno, b))
524 continue;
525 if (len == 0 || len > MAXEXTLEN)
526 continue;
527 if (!verify_agbno(mp, agno, end - 1))
528 continue;
529
530 for ( ; b < end; b += blen) {
531 state = get_bmap_ext(agno, b, end, &blen);
532 switch (state) {
533 case XR_E_UNKNOWN:
534 set_bmap(agno, b, XR_E_FREE1);
535 break;
536 case XR_E_FREE1:
537 /*
538 * no warning messages -- we'll catch
539 * FREE1 blocks later
540 */
541 if (magic == XFS_ABTC_MAGIC) {
542 set_bmap_ext(agno, b, blen,
543 XR_E_FREE);
544 break;
545 }
546 default:
547 do_warn(
548 _("block (%d,%d-%d) multiply claimed by %s space tree, state - %d\n"),
549 agno, b, b + blen - 1,
550 name, state);
551 break;
552 }
553 }
554 }
555 return;
556 }
557
558 /*
559 * interior record
560 */
561 pp = XFS_ALLOC_PTR_ADDR(mp, block, 1, mp->m_alloc_mxr[1]);
562
563 if (numrecs > mp->m_alloc_mxr[1]) {
564 numrecs = mp->m_alloc_mxr[1];
565 hdr_errors++;
566 }
567 if (isroot == 0 && numrecs < mp->m_alloc_mnr[1]) {
568 numrecs = mp->m_alloc_mnr[1];
569 hdr_errors++;
570 }
571
572 /*
573 * don't pass bogus tree flag down further if this block
574 * looked ok. bail out if two levels in a row look bad.
575 */
576
577 if (suspect && !hdr_errors)
578 suspect = 0;
579
580 if (hdr_errors) {
581 if (suspect)
582 return;
583 else suspect++;
584 }
585
586 for (i = 0; i < numrecs; i++) {
587 xfs_agblock_t bno = be32_to_cpu(pp[i]);
588
589 /*
590 * XXX - put sibling detection right here.
591 * we know our sibling chain is good. So as we go,
592 * we check the entry before and after each entry.
593 * If either of the entries references a different block,
594 * check the sibling pointer. If there's a sibling
595 * pointer mismatch, try and extract as much data
596 * as possible.
597 */
598 if (bno != 0 && verify_agbno(mp, agno, bno)) {
599 scan_sbtree(bno, level, agno, suspect,
600 (magic == XFS_ABTB_MAGIC) ?
601 scanfunc_bno : scanfunc_cnt, 0);
602 }
603 }
604 }
605
606 void
607 scanfunc_bno(
608 struct xfs_btree_block *block,
609 int level,
610 xfs_agblock_t bno,
611 xfs_agnumber_t agno,
612 int suspect,
613 int isroot)
614 {
615 return scanfunc_allocbt(block, level, bno, agno,
616 suspect, isroot, XFS_ABTB_MAGIC);
617 }
618
619 void
620 scanfunc_cnt(
621 struct xfs_btree_block *block,
622 int level,
623 xfs_agblock_t bno,
624 xfs_agnumber_t agno,
625 int suspect,
626 int isroot
627 )
628 {
629 return scanfunc_allocbt(block, level, bno, agno,
630 suspect, isroot, XFS_ABTC_MAGIC);
631 }
632
633 static int
634 scan_single_ino_chunk(
635 xfs_agnumber_t agno,
636 xfs_inobt_rec_t *rp,
637 int suspect)
638 {
639 xfs_ino_t lino;
640 xfs_agino_t ino;
641 xfs_agblock_t agbno;
642 int j;
643 int nfree;
644 int off;
645 int state;
646 ino_tree_node_t *ino_rec, *first_rec, *last_rec;
647
648 ino = be32_to_cpu(rp->ir_startino);
649 off = XFS_AGINO_TO_OFFSET(mp, ino);
650 agbno = XFS_AGINO_TO_AGBNO(mp, ino);
651 lino = XFS_AGINO_TO_INO(mp, agno, ino);
652
653 /*
654 * on multi-block block chunks, all chunks start
655 * at the beginning of the block. with multi-chunk
656 * blocks, all chunks must start on 64-inode boundaries
657 * since each block can hold N complete chunks. if
658 * fs has aligned inodes, all chunks must start
659 * at a fs_ino_alignment*N'th agbno. skip recs
660 * with badly aligned starting inodes.
661 */
662 if (ino == 0 ||
663 (inodes_per_block <= XFS_INODES_PER_CHUNK && off != 0) ||
664 (inodes_per_block > XFS_INODES_PER_CHUNK &&
665 off % XFS_INODES_PER_CHUNK != 0) ||
666 (fs_aligned_inodes && agbno % fs_ino_alignment != 0)) {
667 do_warn(
668 _("badly aligned inode rec (starting inode = %llu)\n"),
669 lino);
670 suspect++;
671 }
672
673 /*
674 * verify numeric validity of inode chunk first
675 * before inserting into a tree. don't have to
676 * worry about the overflow case because the
677 * starting ino number of a chunk can only get
678 * within 255 inodes of max (NULLAGINO). if it
679 * gets closer, the agino number will be illegal
680 * as the agbno will be too large.
681 */
682 if (verify_aginum(mp, agno, ino)) {
683 do_warn(
684 _("bad starting inode # (%llu (0x%x 0x%x)) in ino rec, skipping rec\n"),
685 lino, agno, ino);
686 return ++suspect;
687 }
688
689 if (verify_aginum(mp, agno,
690 ino + XFS_INODES_PER_CHUNK - 1)) {
691 do_warn(
692 _("bad ending inode # (%llu (0x%x 0x%x)) in ino rec, skipping rec\n"),
693 lino + XFS_INODES_PER_CHUNK - 1,
694 agno, ino + XFS_INODES_PER_CHUNK - 1);
695 return ++suspect;
696 }
697
698 /*
699 * set state of each block containing inodes
700 */
701 if (off == 0 && !suspect) {
702 for (j = 0;
703 j < XFS_INODES_PER_CHUNK;
704 j += mp->m_sb.sb_inopblock) {
705 agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
706
707 state = get_bmap(agno, agbno);
708 if (state == XR_E_UNKNOWN) {
709 set_bmap(agno, agbno, XR_E_INO);
710 } else if (state == XR_E_INUSE_FS && agno == 0 &&
711 ino + j >= first_prealloc_ino &&
712 ino + j < last_prealloc_ino) {
713 set_bmap(agno, agbno, XR_E_INO);
714 } else {
715 do_warn(
716 _("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
717 agno, agbno, mp->m_sb.sb_inopblock);
718 /*
719 * XXX - maybe should mark
720 * block a duplicate
721 */
722 return ++suspect;
723 }
724 }
725 }
726
727 /*
728 * ensure only one avl entry per chunk
729 */
730 find_inode_rec_range(agno, ino, ino + XFS_INODES_PER_CHUNK,
731 &first_rec, &last_rec);
732 if (first_rec != NULL) {
733 /*
734 * this chunk overlaps with one (or more)
735 * already in the tree
736 */
737 do_warn(
738 _("inode rec for ino %llu (%d/%d) overlaps existing rec (start %d/%d)\n"),
739 lino, agno, ino, agno, first_rec->ino_startnum);
740 suspect++;
741
742 /*
743 * if the 2 chunks start at the same place,
744 * then we don't have to put this one
745 * in the uncertain list. go to the next one.
746 */
747 if (first_rec->ino_startnum == ino)
748 return suspect;
749 }
750
751 nfree = 0;
752
753 /*
754 * now mark all the inodes as existing and free or used.
755 * if the tree is suspect, put them into the uncertain
756 * inode tree.
757 */
758 if (!suspect) {
759 if (XFS_INOBT_IS_FREE_DISK(rp, 0)) {
760 nfree++;
761 ino_rec = set_inode_free_alloc(agno, ino);
762 } else {
763 ino_rec = set_inode_used_alloc(agno, ino);
764 }
765 for (j = 1; j < XFS_INODES_PER_CHUNK; j++) {
766 if (XFS_INOBT_IS_FREE_DISK(rp, j)) {
767 nfree++;
768 set_inode_free(ino_rec, j);
769 } else {
770 set_inode_used(ino_rec, j);
771 }
772 }
773 } else {
774 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
775 if (XFS_INOBT_IS_FREE_DISK(rp, j)) {
776 nfree++;
777 add_aginode_uncertain(agno, ino + j, 1);
778 } else {
779 add_aginode_uncertain(agno, ino + j, 0);
780 }
781 }
782 }
783
784 if (nfree != be32_to_cpu(rp->ir_freecount)) {
785 do_warn(_("ir_freecount/free mismatch, inode "
786 "chunk %d/%d, freecount %d nfree %d\n"),
787 agno, ino, be32_to_cpu(rp->ir_freecount), nfree);
788 }
789
790 return suspect;
791 }
792
793
794 /*
795 * this one walks the inode btrees sucking the info there into
796 * the incore avl tree. We try and rescue corrupted btree records
797 * to minimize our chances of losing inodes. Inode info from potentially
798 * corrupt sources could be bogus so rather than put the info straight
799 * into the tree, instead we put it on a list and try and verify the
800 * info in the next phase by examining what's on disk. At that point,
801 * we'll be able to figure out what's what and stick the corrected info
802 * into the tree. We do bail out at some point and give up on a subtree
803 * so as to avoid walking randomly all over the ag.
804 *
805 * Note that it's also ok if the free/inuse info wrong, we can correct
806 * that when we examine the on-disk inode. The important thing is to
807 * get the start and alignment of the inode chunks right. Those chunks
808 * that we aren't sure about go into the uncertain list.
809 */
810 void
811 scanfunc_ino(
812 struct xfs_btree_block *block,
813 int level,
814 xfs_agblock_t bno,
815 xfs_agnumber_t agno,
816 int suspect,
817 int isroot
818 )
819 {
820 int i;
821 int numrecs;
822 int state;
823 xfs_inobt_ptr_t *pp;
824 xfs_inobt_rec_t *rp;
825 int hdr_errors;
826
827 hdr_errors = 0;
828
829 if (be32_to_cpu(block->bb_magic) != XFS_IBT_MAGIC) {
830 do_warn(_("bad magic # %#x in inobt block %d/%d\n"),
831 be32_to_cpu(block->bb_magic), agno, bno);
832 hdr_errors++;
833 bad_ino_btree = 1;
834 if (suspect)
835 return;
836 }
837 if (be16_to_cpu(block->bb_level) != level) {
838 do_warn(_("expected level %d got %d in inobt block %d/%d\n"),
839 level, be16_to_cpu(block->bb_level), agno, bno);
840 hdr_errors++;
841 bad_ino_btree = 1;
842 if (suspect)
843 return;
844 }
845
846 /*
847 * check for btree blocks multiply claimed, any unknown/free state
848 * is ok in the bitmap block.
849 */
850 state = get_bmap(agno, bno);
851 switch (state) {
852 case XR_E_UNKNOWN:
853 case XR_E_FREE1:
854 case XR_E_FREE:
855 set_bmap(agno, bno, XR_E_FS_MAP);
856 break;
857 default:
858 set_bmap(agno, bno, XR_E_MULT);
859 do_warn(
860 _("inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
861 state, agno, bno, suspect);
862 }
863
864 numrecs = be16_to_cpu(block->bb_numrecs);
865
866 /*
867 * leaf record in btree
868 */
869 if (level == 0) {
870 /* check for trashed btree block */
871
872 if (numrecs > mp->m_inobt_mxr[0]) {
873 numrecs = mp->m_inobt_mxr[0];
874 hdr_errors++;
875 }
876 if (isroot == 0 && numrecs < mp->m_inobt_mnr[0]) {
877 numrecs = mp->m_inobt_mnr[0];
878 hdr_errors++;
879 }
880
881 if (hdr_errors) {
882 bad_ino_btree = 1;
883 do_warn(_("dubious inode btree block header %d/%d\n"),
884 agno, bno);
885 suspect++;
886 }
887
888 rp = XFS_INOBT_REC_ADDR(mp, block, 1);
889
890 /*
891 * step through the records, each record points to
892 * a chunk of inodes. The start of inode chunks should
893 * be block-aligned. Each inode btree rec should point
894 * to the start of a block of inodes or the start of a group
895 * of INODES_PER_CHUNK (64) inodes. off is the offset into
896 * the block. skip processing of bogus records.
897 */
898 for (i = 0; i < numrecs; i++)
899 suspect = scan_single_ino_chunk(agno, &rp[i], suspect);
900
901 if (suspect)
902 bad_ino_btree = 1;
903
904 return;
905 }
906
907 /*
908 * interior record, continue on
909 */
910 if (numrecs > mp->m_inobt_mxr[1]) {
911 numrecs = mp->m_inobt_mxr[1];
912 hdr_errors++;
913 }
914 if (isroot == 0 && numrecs < mp->m_inobt_mnr[1]) {
915 numrecs = mp->m_inobt_mnr[1];
916 hdr_errors++;
917 }
918
919 pp = XFS_INOBT_PTR_ADDR(mp, block, 1, mp->m_inobt_mxr[1]);
920
921 /*
922 * don't pass bogus tree flag down further if this block
923 * looked ok. bail out if two levels in a row look bad.
924 */
925
926 if (suspect && !hdr_errors)
927 suspect = 0;
928
929 if (hdr_errors) {
930 bad_ino_btree = 1;
931 if (suspect)
932 return;
933 else suspect++;
934 }
935
936 for (i = 0; i < numrecs; i++) {
937 if (be32_to_cpu(pp[i]) != 0 && verify_agbno(mp, agno,
938 be32_to_cpu(pp[i])))
939 scan_sbtree(be32_to_cpu(pp[i]), level, agno,
940 suspect, scanfunc_ino, 0);
941 }
942 }
943
944 void
945 scan_freelist(
946 xfs_agf_t *agf)
947 {
948 xfs_agfl_t *agfl;
949 xfs_buf_t *agflbuf;
950 xfs_agnumber_t agno;
951 xfs_agblock_t bno;
952 int count;
953 int i;
954
955 agno = be32_to_cpu(agf->agf_seqno);
956
957 if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
958 XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
959 XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
960 set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
961
962 if (be32_to_cpu(agf->agf_flcount) == 0)
963 return;
964
965 agflbuf = libxfs_readbuf(mp->m_dev,
966 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
967 XFS_FSS_TO_BB(mp, 1), 0);
968 if (!agflbuf) {
969 do_abort(_("can't read agfl block for ag %d\n"), agno);
970 return;
971 }
972 agfl = XFS_BUF_TO_AGFL(agflbuf);
973 i = be32_to_cpu(agf->agf_flfirst);
974 count = 0;
975 for (;;) {
976 bno = be32_to_cpu(agfl->agfl_bno[i]);
977 if (verify_agbno(mp, agno, bno))
978 set_bmap(agno, bno, XR_E_FREE);
979 else
980 do_warn(_("bad agbno %u in agfl, agno %d\n"),
981 bno, agno);
982 count++;
983 if (i == be32_to_cpu(agf->agf_fllast))
984 break;
985 if (++i == XFS_AGFL_SIZE(mp))
986 i = 0;
987 }
988 if (count != be32_to_cpu(agf->agf_flcount)) {
989 do_warn(_("freeblk count %d != flcount %d in ag %d\n"), count,
990 be32_to_cpu(agf->agf_flcount), agno);
991 }
992 libxfs_putbuf(agflbuf);
993 }
994
995 void
996 scan_ag(
997 xfs_agnumber_t agno)
998 {
999 xfs_agf_t *agf;
1000 xfs_buf_t *agfbuf;
1001 int agf_dirty;
1002 xfs_agi_t *agi;
1003 xfs_buf_t *agibuf;
1004 int agi_dirty;
1005 xfs_sb_t *sb;
1006 xfs_buf_t *sbbuf;
1007 int sb_dirty;
1008 int status;
1009
1010 agi_dirty = agf_dirty = sb_dirty = 0;
1011
1012 sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
1013 XFS_FSS_TO_BB(mp, 1), 0);
1014 if (!sbbuf) {
1015 do_error(_("can't get root superblock for ag %d\n"), agno);
1016 return;
1017 }
1018
1019 sb = (xfs_sb_t *)calloc(BBSIZE, 1);
1020 if (!sb) {
1021 do_error(_("can't allocate memory for superblock\n"));
1022 libxfs_putbuf(sbbuf);
1023 return;
1024 }
1025 libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
1026
1027 agfbuf = libxfs_readbuf(mp->m_dev,
1028 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
1029 XFS_FSS_TO_BB(mp, 1), 0);
1030 if (!agfbuf) {
1031 do_error(_("can't read agf block for ag %d\n"), agno);
1032 libxfs_putbuf(sbbuf);
1033 free(sb);
1034 return;
1035 }
1036 agf = XFS_BUF_TO_AGF(agfbuf);
1037
1038 agibuf = libxfs_readbuf(mp->m_dev,
1039 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
1040 XFS_FSS_TO_BB(mp, 1), 0);
1041 if (!agibuf) {
1042 do_error(_("can't read agi block for ag %d\n"), agno);
1043 libxfs_putbuf(agfbuf);
1044 libxfs_putbuf(sbbuf);
1045 free(sb);
1046 return;
1047 }
1048 agi = XFS_BUF_TO_AGI(agibuf);
1049
1050 /* fix up bad ag headers */
1051
1052 status = verify_set_agheader(mp, sbbuf, sb, agf, agi, agno);
1053
1054 if (status & XR_AG_SB_SEC) {
1055 if (!no_modify)
1056 sb_dirty = 1;
1057 /*
1058 * clear bad sector bit because we don't want
1059 * to skip further processing. we just want to
1060 * ensure that we write out the modified sb buffer.
1061 */
1062 status &= ~XR_AG_SB_SEC;
1063 }
1064 if (status & XR_AG_SB) {
1065 if (!no_modify) {
1066 do_warn(_("reset bad sb for ag %d\n"), agno);
1067 sb_dirty = 1;
1068 } else {
1069 do_warn(_("would reset bad sb for ag %d\n"), agno);
1070 }
1071 }
1072 if (status & XR_AG_AGF) {
1073 if (!no_modify) {
1074 do_warn(_("reset bad agf for ag %d\n"), agno);
1075 agf_dirty = 1;
1076 } else {
1077 do_warn(_("would reset bad agf for ag %d\n"), agno);
1078 }
1079 }
1080 if (status & XR_AG_AGI) {
1081 if (!no_modify) {
1082 do_warn(_("reset bad agi for ag %d\n"), agno);
1083 agi_dirty = 1;
1084 } else {
1085 do_warn(_("would reset bad agi for ag %d\n"), agno);
1086 }
1087 }
1088
1089 if (status && no_modify) {
1090 libxfs_putbuf(agibuf);
1091 libxfs_putbuf(agfbuf);
1092 libxfs_putbuf(sbbuf);
1093 free(sb);
1094
1095 do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
1096 agno);
1097
1098 return;
1099 }
1100
1101 scan_freelist(agf);
1102
1103 if (be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]) != 0 && verify_agbno(mp,
1104 agno, be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO])))
1105 scan_sbtree(be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
1106 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
1107 agno, 0, scanfunc_bno, 1);
1108 else
1109 do_warn(_("bad agbno %u for btbno root, agno %d\n"),
1110 be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
1111 agno);
1112
1113 if (be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]) != 0 && verify_agbno(mp,
1114 agno, be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT])))
1115 scan_sbtree(be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
1116 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
1117 agno, 0, scanfunc_cnt, 1);
1118 else
1119 do_warn(_("bad agbno %u for btbcnt root, agno %d\n"),
1120 be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
1121 agno);
1122
1123 if (be32_to_cpu(agi->agi_root) != 0 && verify_agbno(mp, agno,
1124 be32_to_cpu(agi->agi_root)))
1125 scan_sbtree(be32_to_cpu(agi->agi_root),
1126 be32_to_cpu(agi->agi_level), agno, 0, scanfunc_ino, 1);
1127 else
1128 do_warn(_("bad agbno %u for inobt root, agno %d\n"),
1129 be32_to_cpu(agi->agi_root), agno);
1130
1131 ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
1132
1133 if (agi_dirty && !no_modify)
1134 libxfs_writebuf(agibuf, 0);
1135 else
1136 libxfs_putbuf(agibuf);
1137
1138 ASSERT(agf_dirty == 0 || (agf_dirty && !no_modify));
1139
1140 if (agf_dirty && !no_modify)
1141 libxfs_writebuf(agfbuf, 0);
1142 else
1143 libxfs_putbuf(agfbuf);
1144
1145 ASSERT(sb_dirty == 0 || (sb_dirty && !no_modify));
1146
1147 if (sb_dirty && !no_modify) {
1148 if (agno == 0)
1149 memcpy(&mp->m_sb, sb, sizeof(xfs_sb_t));
1150 libxfs_sb_to_disk(XFS_BUF_TO_SBP(sbbuf), sb, XFS_SB_ALL_BITS);
1151 libxfs_writebuf(sbbuf, 0);
1152 } else
1153 libxfs_putbuf(sbbuf);
1154 free(sb);
1155 PROG_RPT_INC(prog_rpt_done[agno], 1);
1156 }