]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/dino_chunks.c
repair: process sparse inode records correctly
[thirdparty/xfsprogs-dev.git] / repair / dino_chunks.c
1 /*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include <libxfs.h>
20 #include "avl.h"
21 #include "globals.h"
22 #include "agheader.h"
23 #include "incore.h"
24 #include "protos.h"
25 #include "err_protos.h"
26 #include "dinode.h"
27 #include "versions.h"
28 #include "prefetch.h"
29 #include "progress.h"
30
31 /*
32 * validates inode block or chunk, returns # of good inodes
33 * the dinodes are verified using verify_uncertain_dinode() which
34 * means only the basic inode info is checked, no fork checks.
35 */
36 static int
37 check_aginode_block(xfs_mount_t *mp,
38 xfs_agnumber_t agno,
39 xfs_agblock_t agbno)
40 {
41
42 xfs_dinode_t *dino_p;
43 int i;
44 int cnt = 0;
45 xfs_buf_t *bp;
46
47 /*
48 * it's ok to read these possible inode blocks in one at
49 * a time because they don't belong to known inodes (if
50 * they did, we'd know about them courtesy of the incore inode
51 * tree and we wouldn't be here and we stale the buffers out
52 * so no one else will overlap them.
53 */
54 bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno),
55 XFS_FSB_TO_BB(mp, 1), 0, NULL);
56 if (!bp) {
57 do_warn(_("cannot read agbno (%u/%u), disk block %" PRId64 "\n"),
58 agno, agbno, XFS_AGB_TO_DADDR(mp, agno, agbno));
59 return(0);
60 }
61
62 for (i = 0; i < mp->m_sb.sb_inopblock; i++) {
63 dino_p = xfs_make_iptr(mp, bp, i);
64 if (!verify_uncertain_dinode(mp, dino_p, agno,
65 XFS_OFFBNO_TO_AGINO(mp, agbno, i)))
66 cnt++;
67 }
68 if (cnt)
69 bp->b_ops = &xfs_inode_buf_ops;
70
71 libxfs_putbuf(bp);
72 return(cnt);
73 }
74
75 /*
76 * tries to establish if the inode really exists in a valid
77 * inode chunk. returns number of new inodes if things are good
78 * and 0 if bad. start is the start of the discovered inode chunk.
79 * routine assumes that ino is a legal inode number
80 * (verified by verify_inum()). If the inode chunk turns out
81 * to be good, this routine will put the inode chunk into
82 * the good inode chunk tree if required.
83 *
84 * the verify_(ag)inode* family of routines are utility
85 * routines called by check_uncertain_aginodes() and
86 * process_uncertain_aginodes().
87 */
88 static int
89 verify_inode_chunk(xfs_mount_t *mp,
90 xfs_ino_t ino,
91 xfs_ino_t *start_ino)
92 {
93 xfs_agnumber_t agno;
94 xfs_agino_t agino;
95 xfs_agino_t start_agino;
96 xfs_agblock_t agbno;
97 xfs_agblock_t start_agbno = 0;
98 xfs_agblock_t end_agbno;
99 xfs_agblock_t max_agbno;
100 xfs_agblock_t cur_agbno;
101 xfs_agblock_t chunk_start_agbno;
102 xfs_agblock_t chunk_stop_agbno;
103 ino_tree_node_t *irec_before_p = NULL;
104 ino_tree_node_t *irec_after_p = NULL;
105 ino_tree_node_t *irec_p;
106 ino_tree_node_t *irec_next_p;
107 int irec_cnt;
108 int ino_cnt = 0;
109 int num_blks;
110 int i;
111 int j;
112 int state;
113 xfs_extlen_t blen;
114
115 agno = XFS_INO_TO_AGNO(mp, ino);
116 agino = XFS_INO_TO_AGINO(mp, ino);
117 agbno = XFS_INO_TO_AGBNO(mp, ino);
118 *start_ino = NULLFSINO;
119
120 ASSERT(mp->m_ialloc_blks > 0);
121
122 if (agno == mp->m_sb.sb_agcount - 1)
123 max_agbno = mp->m_sb.sb_dblocks -
124 (xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno;
125 else
126 max_agbno = mp->m_sb.sb_agblocks;
127
128 /*
129 * is the inode beyond the end of the AG?
130 */
131 if (agbno >= max_agbno)
132 return(0);
133
134 /*
135 * check for the easy case, inodes per block >= XFS_INODES_PER_CHUNK
136 * (multiple chunks per block)
137 */
138 if (mp->m_ialloc_blks == 1) {
139 if (agbno > max_agbno)
140 return 0;
141 if (check_aginode_block(mp, agno, agino) == 0)
142 return 0;
143
144 pthread_mutex_lock(&ag_locks[agno].lock);
145
146 state = get_bmap(agno, agbno);
147 switch (state) {
148 case XR_E_INO:
149 do_warn(
150 _("uncertain inode block %d/%d already known\n"),
151 agno, agbno);
152 break;
153 case XR_E_UNKNOWN:
154 case XR_E_FREE1:
155 case XR_E_FREE:
156 set_bmap(agno, agbno, XR_E_INO);
157 break;
158 case XR_E_MULT:
159 case XR_E_INUSE:
160 case XR_E_INUSE_FS:
161 case XR_E_FS_MAP:
162 /*
163 * if block is already claimed, forget it.
164 */
165 do_warn(
166 _("inode block %d/%d multiply claimed, (state %d)\n"),
167 agno, agbno, state);
168 set_bmap(agno, agbno, XR_E_MULT);
169 pthread_mutex_unlock(&ag_locks[agno].lock);
170 return(0);
171 default:
172 do_warn(
173 _("inode block %d/%d bad state, (state %d)\n"),
174 agno, agbno, state);
175 set_bmap(agno, agbno, XR_E_INO);
176 break;
177 }
178
179 pthread_mutex_unlock(&ag_locks[agno].lock);
180
181 start_agino = XFS_OFFBNO_TO_AGINO(mp, agbno, 0);
182 *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
183
184 /*
185 * put new inode record(s) into inode tree
186 */
187 for (j = 0; j < chunks_pblock; j++) {
188 if ((irec_p = find_inode_rec(mp, agno, start_agino))
189 == NULL) {
190 irec_p = set_inode_free_alloc(mp, agno,
191 start_agino);
192 for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
193 set_inode_free(irec_p, i);
194 }
195 if (start_agino <= agino && agino <
196 start_agino + XFS_INODES_PER_CHUNK)
197 set_inode_used(irec_p, agino - start_agino);
198
199 start_agino += XFS_INODES_PER_CHUNK;
200 ino_cnt += XFS_INODES_PER_CHUNK;
201 }
202
203 return(ino_cnt);
204 } else if (fs_aligned_inodes) {
205 /*
206 * next easy case -- aligned inode filesystem.
207 * just check out the chunk
208 */
209 start_agbno = rounddown(XFS_INO_TO_AGBNO(mp, ino),
210 fs_ino_alignment);
211 end_agbno = start_agbno + mp->m_ialloc_blks;
212
213 /*
214 * if this fs has aligned inodes but the end of the
215 * chunk is beyond the end of the ag, this is a bad
216 * chunk
217 */
218 if (end_agbno > max_agbno)
219 return(0);
220
221 /*
222 * check out all blocks in chunk
223 */
224 ino_cnt = 0;
225 for (cur_agbno = start_agbno; cur_agbno < end_agbno;
226 cur_agbno++) {
227 ino_cnt += check_aginode_block(mp, agno, cur_agbno);
228 }
229
230 /*
231 * if we lose either 2 blocks worth of inodes or >25% of
232 * the chunk, just forget it.
233 */
234 if (ino_cnt < XFS_INODES_PER_CHUNK - 2 * mp->m_sb.sb_inopblock
235 || ino_cnt < XFS_INODES_PER_CHUNK - 16)
236 return(0);
237
238 /*
239 * ok, put the record into the tree, if no conflict.
240 */
241 if (find_uncertain_inode_rec(agno,
242 XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0)))
243 return(0);
244
245 start_agino = XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0);
246 *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
247
248 irec_p = set_inode_free_alloc(mp, agno,
249 XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0));
250
251 for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
252 set_inode_free(irec_p, i);
253
254 ASSERT(start_agino <= agino &&
255 start_agino + XFS_INODES_PER_CHUNK > agino);
256
257 set_inode_used(irec_p, agino - start_agino);
258
259 return(XFS_INODES_PER_CHUNK);
260 }
261
262 /*
263 * hard case -- pre-6.3 filesystem.
264 * set default start/end agbnos and ensure agbnos are legal.
265 * we're setting a range [start_agbno, end_agbno) such that
266 * a discovered inode chunk completely within that range
267 * would include the inode passed into us.
268 */
269 if (mp->m_ialloc_blks > 1) {
270 if (agino > mp->m_ialloc_inos)
271 start_agbno = agbno - mp->m_ialloc_blks + 1;
272 else
273 start_agbno = 1;
274 }
275
276 end_agbno = agbno + mp->m_ialloc_blks;
277
278 if (end_agbno > max_agbno)
279 end_agbno = max_agbno;
280
281 /*
282 * search tree for known inodes within +/- 1 inode chunk range
283 */
284 irec_before_p = irec_after_p = NULL;
285
286 find_inode_rec_range(mp, agno, XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0),
287 XFS_OFFBNO_TO_AGINO(mp, end_agbno, mp->m_sb.sb_inopblock - 1),
288 &irec_before_p, &irec_after_p);
289
290 /*
291 * if we have known inode chunks in our search range, establish
292 * their start and end-points to tighten our search range. range
293 * is [start, end) -- e.g. max/end agbno is one beyond the
294 * last block to be examined. the avl routines work this way.
295 */
296 if (irec_before_p) {
297 /*
298 * only one inode record in the range, move one boundary in
299 */
300 if (irec_before_p == irec_after_p) {
301 if (irec_before_p->ino_startnum < agino)
302 start_agbno = XFS_AGINO_TO_AGBNO(mp,
303 irec_before_p->ino_startnum +
304 XFS_INODES_PER_CHUNK);
305 else
306 end_agbno = XFS_AGINO_TO_AGBNO(mp,
307 irec_before_p->ino_startnum);
308 }
309
310 /*
311 * find the start of the gap in the search range (which
312 * should contain our unknown inode). if the only irec
313 * within +/- 1 chunks starts after the inode we're
314 * looking for, skip this stuff since the end_agbno
315 * of the range has already been trimmed in to not
316 * include that irec.
317 */
318 if (irec_before_p->ino_startnum < agino) {
319 irec_p = irec_before_p;
320 irec_next_p = next_ino_rec(irec_p);
321
322 while(irec_next_p != NULL &&
323 irec_p->ino_startnum + XFS_INODES_PER_CHUNK ==
324 irec_next_p->ino_startnum) {
325 irec_p = irec_next_p;
326 irec_next_p = next_ino_rec(irec_next_p);
327 }
328
329 start_agbno = XFS_AGINO_TO_AGBNO(mp,
330 irec_p->ino_startnum) +
331 mp->m_ialloc_blks;
332
333 /*
334 * we know that the inode we're trying to verify isn't
335 * in an inode chunk so the next ino_rec marks the end
336 * of the gap -- is it within the search range?
337 */
338 if (irec_next_p != NULL &&
339 agino + mp->m_ialloc_inos >=
340 irec_next_p->ino_startnum)
341 end_agbno = XFS_AGINO_TO_AGBNO(mp,
342 irec_next_p->ino_startnum);
343 }
344
345 ASSERT(start_agbno < end_agbno);
346 }
347
348 /*
349 * if the gap is too small to contain a chunk, we lose.
350 * this means that inode chunks known to be good surround
351 * the inode in question and that the space between them
352 * is too small for a legal inode chunk
353 */
354 if (end_agbno - start_agbno < mp->m_ialloc_blks)
355 return(0);
356
357 /*
358 * now grunge around the disk, start at the inode block and
359 * go in each direction until you hit a non-inode block or
360 * run into a range boundary. A non-inode block is block
361 * with *no* good inodes in it. Unfortunately, we can't
362 * co-opt bad blocks into inode chunks (which might take
363 * care of disk blocks that turn into zeroes) because the
364 * filesystem could very well allocate two inode chunks
365 * with a one block file in between and we'd zap the file.
366 * We're better off just losing the rest of the
367 * inode chunk instead.
368 */
369 for (cur_agbno = agbno; cur_agbno >= start_agbno; cur_agbno--) {
370 /*
371 * if the block has no inodes, it's a bad block so
372 * break out now without decrementing cur_agbno so
373 * chunk start blockno will be set to the last good block
374 */
375 if (!(irec_cnt = check_aginode_block(mp, agno, cur_agbno)))
376 break;
377 ino_cnt += irec_cnt;
378 }
379
380 chunk_start_agbno = cur_agbno + 1;
381
382 for (cur_agbno = agbno + 1; cur_agbno < end_agbno; cur_agbno++) {
383 /*
384 * if the block has no inodes, it's a bad block so
385 * break out now without incrementing cur_agbno so
386 * chunk start blockno will be set to the block
387 * immediately after the last good block.
388 */
389 if (!(irec_cnt = check_aginode_block(mp, agno, cur_agbno)))
390 break;
391 ino_cnt += irec_cnt;
392 }
393
394 chunk_stop_agbno = cur_agbno;
395
396 num_blks = chunk_stop_agbno - chunk_start_agbno;
397
398 if (num_blks < mp->m_ialloc_blks || ino_cnt == 0)
399 return 0;
400
401 /*
402 * XXX - later - if the entire range is selected and they're all
403 * good inodes, keep searching in either direction.
404 * until you the range of inodes end, then split into chunks
405 * for now, just take one chunk's worth starting at the lowest
406 * possible point and hopefully we'll pick the rest up later.
407 *
408 * XXX - if we were going to fix up an inode chunk for
409 * any good inodes in the chunk, this is where we would
410 * do it. For now, keep it simple and lose the rest of
411 * the chunk
412 */
413
414 if (num_blks % mp->m_ialloc_blks != 0) {
415 num_blks = rounddown(num_blks, mp->m_ialloc_blks);
416 chunk_stop_agbno = chunk_start_agbno + num_blks;
417 }
418
419 /*
420 * ok, we've got a candidate inode chunk. now we have to
421 * verify that we aren't trying to use blocks that are already
422 * in use. If so, mark them as multiply claimed since odds
423 * are very low that we found this chunk by stumbling across
424 * user data -- we're probably here as a result of a directory
425 * entry or an iunlinked pointer
426 */
427 pthread_mutex_lock(&ag_locks[agno].lock);
428 for (cur_agbno = chunk_start_agbno;
429 cur_agbno < chunk_stop_agbno;
430 cur_agbno += blen) {
431 state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
432 switch (state) {
433 case XR_E_MULT:
434 case XR_E_INUSE:
435 case XR_E_INUSE_FS:
436 case XR_E_FS_MAP:
437 do_warn(
438 _("inode block %d/%d multiply claimed, (state %d)\n"),
439 agno, cur_agbno, state);
440 set_bmap_ext(agno, cur_agbno, blen, XR_E_MULT);
441 pthread_mutex_unlock(&ag_locks[agno].lock);
442 return 0;
443 case XR_E_INO:
444 do_error(
445 _("uncertain inode block overlap, agbno = %d, ino = %" PRIu64 "\n"),
446 agbno, ino);
447 break;
448 default:
449 break;
450 }
451 }
452 pthread_mutex_unlock(&ag_locks[agno].lock);
453
454 /*
455 * ok, chunk is good. put the record into the tree if required,
456 * and fill in the bitmap. All inodes will be marked as "free"
457 * except for the one that led us to discover the chunk. That's
458 * ok because we'll override the free setting later if the
459 * contents of the inode indicate it's in use.
460 */
461 start_agino = XFS_OFFBNO_TO_AGINO(mp, chunk_start_agbno, 0);
462 *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
463
464 ASSERT(find_inode_rec(mp, agno, start_agino) == NULL);
465
466 irec_p = set_inode_free_alloc(mp, agno, start_agino);
467 for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
468 set_inode_free(irec_p, i);
469
470 ASSERT(start_agino <= agino &&
471 start_agino + XFS_INODES_PER_CHUNK > agino);
472
473 set_inode_used(irec_p, agino - start_agino);
474
475 pthread_mutex_lock(&ag_locks[agno].lock);
476
477 for (cur_agbno = chunk_start_agbno;
478 cur_agbno < chunk_stop_agbno;
479 cur_agbno += blen) {
480 state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
481 switch (state) {
482 case XR_E_INO:
483 do_error(
484 _("uncertain inode block %" PRIu64 " already known\n"),
485 XFS_AGB_TO_FSB(mp, agno, cur_agbno));
486 break;
487 case XR_E_UNKNOWN:
488 case XR_E_FREE1:
489 case XR_E_FREE:
490 set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
491 break;
492 case XR_E_MULT:
493 case XR_E_INUSE:
494 case XR_E_INUSE_FS:
495 case XR_E_FS_MAP:
496 do_error(
497 _("inode block %d/%d multiply claimed, (state %d)\n"),
498 agno, cur_agbno, state);
499 break;
500 default:
501 do_warn(
502 _("inode block %d/%d bad state, (state %d)\n"),
503 agno, cur_agbno, state);
504 set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
505 break;
506 }
507 }
508 pthread_mutex_unlock(&ag_locks[agno].lock);
509
510 return(ino_cnt);
511 }
512
513 /*
514 * same as above only for ag inode chunks
515 */
516 static int
517 verify_aginode_chunk(xfs_mount_t *mp,
518 xfs_agnumber_t agno,
519 xfs_agino_t agino,
520 xfs_agino_t *agino_start)
521 {
522 xfs_ino_t ino;
523 int res;
524
525 res = verify_inode_chunk(mp, XFS_AGINO_TO_INO(mp, agno, agino), &ino);
526
527 if (res)
528 *agino_start = XFS_INO_TO_AGINO(mp, ino);
529 else
530 *agino_start = NULLAGINO;
531
532 return(res);
533 }
534
535 /*
536 * this does the same as the two above only it returns a pointer
537 * to the inode record in the good inode tree
538 */
539 static ino_tree_node_t *
540 verify_aginode_chunk_irec(xfs_mount_t *mp,
541 xfs_agnumber_t agno,
542 xfs_agino_t agino)
543 {
544 xfs_agino_t start_agino;
545 ino_tree_node_t *irec = NULL;
546
547 if (verify_aginode_chunk(mp, agno, agino, &start_agino))
548 irec = find_inode_rec(mp, agno, start_agino);
549
550 return(irec);
551 }
552
553
554
555 /*
556 * processes an inode allocation chunk/block, returns 1 on I/O errors,
557 * 0 otherwise
558 *
559 * *bogus is set to 1 if the entire set of inodes is bad.
560 */
561
562 /* ARGSUSED */
563 static int
564 process_inode_chunk(
565 xfs_mount_t *mp,
566 xfs_agnumber_t agno,
567 int num_inos,
568 ino_tree_node_t *first_irec,
569 int ino_discovery,
570 int check_dups,
571 int extra_attr_check,
572 int *bogus)
573 {
574 xfs_ino_t parent;
575 ino_tree_node_t *ino_rec;
576 xfs_buf_t **bplist;
577 xfs_dinode_t *dino;
578 int icnt;
579 int status;
580 int is_used;
581 int state;
582 int ino_dirty;
583 int irec_offset;
584 int ibuf_offset;
585 xfs_agino_t agino;
586 xfs_agblock_t agbno;
587 xfs_ino_t ino;
588 int dirty = 0;
589 int isa_dir = 0;
590 int blks_per_cluster;
591 int cluster_count;
592 int bp_index;
593 int cluster_offset;
594
595 ASSERT(first_irec != NULL);
596 ASSERT(XFS_AGINO_TO_OFFSET(mp, first_irec->ino_startnum) == 0);
597
598 *bogus = 0;
599 ASSERT(mp->m_ialloc_blks > 0);
600
601 blks_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog;
602 if (blks_per_cluster == 0)
603 blks_per_cluster = 1;
604 cluster_count = XFS_INODES_PER_CHUNK / inodes_per_cluster;
605 if (cluster_count == 0)
606 cluster_count = 1;
607
608 /*
609 * get all blocks required to read in this chunk (may wind up
610 * having to process more chunks in a multi-chunk per block fs)
611 */
612 agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
613
614 /*
615 * set up first irec
616 */
617 ino_rec = first_irec;
618 irec_offset = 0;
619
620 bplist = malloc(cluster_count * sizeof(xfs_buf_t *));
621 if (bplist == NULL)
622 do_error(_("failed to allocate %zd bytes of memory\n"),
623 cluster_count * sizeof(xfs_buf_t *));
624
625 for (bp_index = 0; bp_index < cluster_count; bp_index++) {
626 /*
627 * Skip the cluster buffer if the first inode is sparse. The
628 * remaining inodes in the cluster share the same state as
629 * sparse inodes occur at cluster granularity.
630 */
631 if (is_inode_sparse(ino_rec, irec_offset)) {
632 pftrace("skip sparse inode, startnum 0x%x idx %d",
633 ino_rec->ino_startnum, irec_offset);
634 bplist[bp_index] = NULL;
635 goto next_readbuf;
636 }
637
638 pftrace("about to read off %llu in AG %d",
639 XFS_AGB_TO_DADDR(mp, agno, agbno), agno);
640
641 bplist[bp_index] = libxfs_readbuf(mp->m_dev,
642 XFS_AGB_TO_DADDR(mp, agno, agbno),
643 XFS_FSB_TO_BB(mp, blks_per_cluster), 0,
644 &xfs_inode_buf_ops);
645 if (!bplist[bp_index]) {
646 do_warn(_("cannot read inode %" PRIu64 ", disk block %" PRId64 ", cnt %d\n"),
647 XFS_AGINO_TO_INO(mp, agno, first_irec->ino_startnum),
648 XFS_AGB_TO_DADDR(mp, agno, agbno),
649 XFS_FSB_TO_BB(mp, blks_per_cluster));
650 while (bp_index > 0) {
651 bp_index--;
652 libxfs_putbuf(bplist[bp_index]);
653 }
654 free(bplist);
655 return(1);
656 }
657
658 pftrace("readbuf %p (%llu, %d) in AG %d", bplist[bp_index],
659 (long long)XFS_BUF_ADDR(bplist[bp_index]),
660 XFS_BUF_COUNT(bplist[bp_index]), agno);
661
662 bplist[bp_index]->b_ops = &xfs_inode_buf_ops;
663
664 next_readbuf:
665 irec_offset += mp->m_sb.sb_inopblock * blks_per_cluster;
666 agbno += blks_per_cluster;
667 }
668 agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
669
670 /*
671 * initialize counters
672 */
673 irec_offset = 0;
674 ibuf_offset = 0;
675 cluster_offset = 0;
676 icnt = 0;
677 status = 0;
678 bp_index = 0;
679
680 /*
681 * verify inode chunk if necessary
682 */
683 if (ino_discovery) {
684 for (;;) {
685 agino = irec_offset + ino_rec->ino_startnum;
686
687 /* no buffers for sparse clusters */
688 if (bplist[bp_index]) {
689 /* make inode pointer */
690 dino = xfs_make_iptr(mp, bplist[bp_index],
691 cluster_offset);
692
693 /*
694 * we always think that the root and realtime
695 * inodes are verified even though we may have
696 * to reset them later to keep from losing the
697 * chunk that they're in
698 */
699 if (verify_dinode(mp, dino, agno, agino) == 0 ||
700 (agno == 0 &&
701 (mp->m_sb.sb_rootino == agino ||
702 mp->m_sb.sb_rsumino == agino ||
703 mp->m_sb.sb_rbmino == agino)))
704 status++;
705 }
706
707 irec_offset++;
708 icnt++;
709 cluster_offset++;
710
711 if (icnt == mp->m_ialloc_inos &&
712 irec_offset == XFS_INODES_PER_CHUNK) {
713 /*
714 * done! - finished up irec and block
715 * simultaneously
716 */
717 break;
718 } else if (irec_offset == XFS_INODES_PER_CHUNK) {
719 /*
720 * get new irec (multiple chunks per block fs)
721 */
722 ino_rec = next_ino_rec(ino_rec);
723 ASSERT(ino_rec->ino_startnum == agino + 1);
724 irec_offset = 0;
725 }
726 if (cluster_offset == inodes_per_cluster) {
727 bp_index++;
728 cluster_offset = 0;
729 }
730 }
731
732 /*
733 * if chunk/block is bad, blow it off. the inode records
734 * will be deleted by the caller if appropriate.
735 */
736 if (!status) {
737 *bogus = 1;
738 for (bp_index = 0; bp_index < cluster_count; bp_index++)
739 if (bplist[bp_index])
740 libxfs_putbuf(bplist[bp_index]);
741 free(bplist);
742 return(0);
743 }
744
745 /*
746 * reset irec and counters
747 */
748 ino_rec = first_irec;
749
750 irec_offset = 0;
751 cluster_offset = 0;
752 bp_index = 0;
753 icnt = 0;
754 status = 0;
755 }
756
757 /*
758 * mark block as an inode block in the incore bitmap
759 */
760 if (!is_inode_sparse(ino_rec, irec_offset)) {
761 pthread_mutex_lock(&ag_locks[agno].lock);
762 state = get_bmap(agno, agbno);
763 switch (state) {
764 case XR_E_INO: /* already marked */
765 break;
766 case XR_E_UNKNOWN:
767 case XR_E_FREE:
768 case XR_E_FREE1:
769 set_bmap(agno, agbno, XR_E_INO);
770 break;
771 case XR_E_BAD_STATE:
772 do_error(_("bad state in block map %d\n"), state);
773 break;
774 default:
775 set_bmap(agno, agbno, XR_E_MULT);
776 do_warn(
777 _("inode block %" PRIu64 " multiply claimed, state was %d\n"),
778 XFS_AGB_TO_FSB(mp, agno, agbno), state);
779 break;
780 }
781 pthread_mutex_unlock(&ag_locks[agno].lock);
782 }
783
784 for (;;) {
785 agino = irec_offset + ino_rec->ino_startnum;
786 ino = XFS_AGINO_TO_INO(mp, agno, agino);
787
788 if (is_inode_sparse(ino_rec, irec_offset))
789 goto process_next;
790
791 /* make inode pointer */
792 dino = xfs_make_iptr(mp, bplist[bp_index], cluster_offset);
793
794
795 is_used = 3;
796 ino_dirty = 0;
797 parent = 0;
798
799 status = process_dinode(mp, dino, agno, agino,
800 is_inode_free(ino_rec, irec_offset),
801 &ino_dirty, &is_used,ino_discovery, check_dups,
802 extra_attr_check, &isa_dir, &parent);
803
804 ASSERT(is_used != 3);
805 if (ino_dirty) {
806 dirty = 1;
807 libxfs_dinode_calc_crc(mp, dino);
808 }
809
810 /*
811 * XXX - if we want to try and keep
812 * track of whether we need to bang on
813 * the inode maps (instead of just
814 * blindly reconstructing them like
815 * we do now, this is where to start.
816 */
817 if (is_used) {
818 __uint16_t di_mode;
819
820 if (is_inode_free(ino_rec, irec_offset)) {
821 if (verbose || no_modify) {
822 do_warn(
823 _("imap claims in-use inode %" PRIu64 " is free, "),
824 ino);
825 }
826
827 if (verbose || !no_modify)
828 do_warn(_("correcting imap\n"));
829 else
830 do_warn(_("would correct imap\n"));
831 }
832 set_inode_used(ino_rec, irec_offset);
833
834 /*
835 * store the on-disk file type for comparing in
836 * phase 6.
837 */
838 di_mode = be16_to_cpu(dino->di_mode);
839 di_mode = (di_mode & S_IFMT) >> S_SHIFT;
840 set_inode_ftype(ino_rec, irec_offset,
841 xfs_mode_to_ftype[di_mode]);
842
843 /*
844 * store on-disk nlink count for comparing in phase 7
845 */
846 set_inode_disk_nlinks(ino_rec, irec_offset,
847 dino->di_version > 1
848 ? be32_to_cpu(dino->di_nlink)
849 : be16_to_cpu(dino->di_onlink));
850
851 } else {
852 set_inode_free(ino_rec, irec_offset);
853 }
854
855 /*
856 * if we lose the root inode, or it turns into
857 * a non-directory, that allows us to double-check
858 * later whether or not we need to reinitialize it.
859 */
860 if (isa_dir) {
861 set_inode_isadir(ino_rec, irec_offset);
862 /*
863 * we always set the parent but
864 * we may as well wait until
865 * phase 4 (no inode discovery)
866 * because the parent info will
867 * be solid then.
868 */
869 if (!ino_discovery) {
870 ASSERT(parent != 0);
871 set_inode_parent(ino_rec, irec_offset, parent);
872 ASSERT(parent ==
873 get_inode_parent(ino_rec, irec_offset));
874 }
875 } else {
876 clear_inode_isadir(ino_rec, irec_offset);
877 }
878
879 if (status) {
880 if (mp->m_sb.sb_rootino == ino) {
881 need_root_inode = 1;
882
883 if (!no_modify) {
884 do_warn(
885 _("cleared root inode %" PRIu64 "\n"),
886 ino);
887 } else {
888 do_warn(
889 _("would clear root inode %" PRIu64 "\n"),
890 ino);
891 }
892 } else if (mp->m_sb.sb_rbmino == ino) {
893 need_rbmino = 1;
894
895 if (!no_modify) {
896 do_warn(
897 _("cleared realtime bitmap inode %" PRIu64 "\n"),
898 ino);
899 } else {
900 do_warn(
901 _("would clear realtime bitmap inode %" PRIu64 "\n"),
902 ino);
903 }
904 } else if (mp->m_sb.sb_rsumino == ino) {
905 need_rsumino = 1;
906
907 if (!no_modify) {
908 do_warn(
909 _("cleared realtime summary inode %" PRIu64 "\n"),
910 ino);
911 } else {
912 do_warn(
913 _("would clear realtime summary inode %" PRIu64 "\n"),
914 ino);
915 }
916 } else if (!no_modify) {
917 do_warn(_("cleared inode %" PRIu64 "\n"),
918 ino);
919 } else {
920 do_warn(_("would have cleared inode %" PRIu64 "\n"),
921 ino);
922 }
923 }
924
925 process_next:
926 irec_offset++;
927 ibuf_offset++;
928 icnt++;
929 cluster_offset++;
930
931 if (icnt == mp->m_ialloc_inos &&
932 irec_offset == XFS_INODES_PER_CHUNK) {
933 /*
934 * done! - finished up irec and block simultaneously
935 */
936 for (bp_index = 0; bp_index < cluster_count; bp_index++) {
937 if (!bplist[bp_index])
938 continue;
939
940 pftrace("put/writebuf %p (%llu) in AG %d",
941 bplist[bp_index], (long long)
942 XFS_BUF_ADDR(bplist[bp_index]), agno);
943
944 if (dirty && !no_modify)
945 libxfs_writebuf(bplist[bp_index], 0);
946 else
947 libxfs_putbuf(bplist[bp_index]);
948 }
949 free(bplist);
950 break;
951 } else if (ibuf_offset == mp->m_sb.sb_inopblock) {
952 /*
953 * mark block as an inode block in the incore bitmap
954 * and reset inode buffer offset counter
955 */
956 ibuf_offset = 0;
957 agbno++;
958
959 if (!is_inode_sparse(ino_rec, irec_offset)) {
960 pthread_mutex_lock(&ag_locks[agno].lock);
961 state = get_bmap(agno, agbno);
962 switch (state) {
963 case XR_E_INO: /* already marked */
964 break;
965 case XR_E_UNKNOWN:
966 case XR_E_FREE:
967 case XR_E_FREE1:
968 set_bmap(agno, agbno, XR_E_INO);
969 break;
970 case XR_E_BAD_STATE:
971 do_error(
972 _("bad state in block map %d\n"),
973 state);
974 break;
975 default:
976 set_bmap(agno, agbno, XR_E_MULT);
977 do_warn(
978 _("inode block %" PRIu64 " multiply claimed, state was %d\n"),
979 XFS_AGB_TO_FSB(mp, agno, agbno),
980 state);
981 break;
982 }
983 pthread_mutex_unlock(&ag_locks[agno].lock);
984 }
985 } else if (irec_offset == XFS_INODES_PER_CHUNK) {
986 /*
987 * get new irec (multiple chunks per block fs)
988 */
989 ino_rec = next_ino_rec(ino_rec);
990 ASSERT(ino_rec->ino_startnum == agino + 1);
991 irec_offset = 0;
992 }
993 if (cluster_offset == inodes_per_cluster) {
994 bp_index++;
995 cluster_offset = 0;
996 }
997 }
998 return(0);
999 }
1000
1001 /*
1002 * check all inodes mentioned in the ag's incore inode maps.
1003 * the map may be incomplete. If so, we'll catch the missing
1004 * inodes (hopefully) when we traverse the directory tree.
1005 * check_dirs is set to 1 if directory inodes should be
1006 * processed for internal consistency, parent setting and
1007 * discovery of unknown inodes. this only happens
1008 * in phase 3. check_dups is set to 1 if we're looking for
1009 * inodes that reference duplicate blocks so we can trash
1010 * the inode right then and there. this is set only in
1011 * phase 4 after we've run through and set the bitmap once.
1012 */
1013 void
1014 process_aginodes(
1015 xfs_mount_t *mp,
1016 prefetch_args_t *pf_args,
1017 xfs_agnumber_t agno,
1018 int ino_discovery,
1019 int check_dups,
1020 int extra_attr_check)
1021 {
1022 int num_inos, bogus;
1023 ino_tree_node_t *ino_rec, *first_ino_rec, *prev_ino_rec;
1024 #ifdef XR_PF_TRACE
1025 int count;
1026 #endif
1027 first_ino_rec = ino_rec = findfirst_inode_rec(agno);
1028
1029 while (ino_rec != NULL) {
1030 /*
1031 * paranoia - step through inode records until we step
1032 * through a full allocation of inodes. this could
1033 * be an issue in big-block filesystems where a block
1034 * can hold more than one inode chunk. make sure to
1035 * grab the record corresponding to the beginning of
1036 * the next block before we call the processing routines.
1037 */
1038 num_inos = XFS_INODES_PER_CHUNK;
1039 while (num_inos < mp->m_ialloc_inos && ino_rec != NULL) {
1040 /*
1041 * inodes chunks will always be aligned and sized
1042 * correctly
1043 */
1044 if ((ino_rec = next_ino_rec(ino_rec)) != NULL)
1045 num_inos += XFS_INODES_PER_CHUNK;
1046 }
1047
1048 ASSERT(num_inos == mp->m_ialloc_inos);
1049
1050 if (pf_args) {
1051 sem_post(&pf_args->ra_count);
1052 #ifdef XR_PF_TRACE
1053 sem_getvalue(&pf_args->ra_count, &count);
1054 pftrace("processing inode chunk %p in AG %d (sem count = %d)",
1055 first_ino_rec, agno, count);
1056 #endif
1057 }
1058
1059 if (process_inode_chunk(mp, agno, num_inos, first_ino_rec,
1060 ino_discovery, check_dups, extra_attr_check,
1061 &bogus)) {
1062 /* XXX - i/o error, we've got a problem */
1063 abort();
1064 }
1065
1066 if (!bogus)
1067 first_ino_rec = ino_rec = next_ino_rec(ino_rec);
1068 else {
1069 /*
1070 * inodes pointed to by this record are
1071 * completely bogus, blow the records for
1072 * this chunk out.
1073 * the inode block(s) will get reclaimed
1074 * in phase 4 when the block map is
1075 * reconstructed after inodes claiming
1076 * duplicate blocks are deleted.
1077 */
1078 num_inos = 0;
1079 ino_rec = first_ino_rec;
1080 while (num_inos < mp->m_ialloc_inos &&
1081 ino_rec != NULL) {
1082 prev_ino_rec = ino_rec;
1083
1084 if ((ino_rec = next_ino_rec(ino_rec)) != NULL)
1085 num_inos += XFS_INODES_PER_CHUNK;
1086
1087 get_inode_rec(mp, agno, prev_ino_rec);
1088 free_inode_rec(agno, prev_ino_rec);
1089 }
1090
1091 first_ino_rec = ino_rec;
1092 }
1093 PROG_RPT_INC(prog_rpt_done[agno], num_inos);
1094 }
1095 }
1096
1097 /*
1098 * verify the uncertain inode list for an ag.
1099 * Good inodes get moved into the good inode tree.
1100 * returns 0 if there are no uncertain inode records to
1101 * be processed, 1 otherwise. This routine destroys the
1102 * the entire uncertain inode tree for the ag as a side-effect.
1103 */
1104 void
1105 check_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
1106 {
1107 ino_tree_node_t *irec;
1108 ino_tree_node_t *nrec;
1109 xfs_agino_t start;
1110 xfs_agino_t i;
1111 xfs_agino_t agino;
1112 int got_some;
1113
1114 nrec = NULL;
1115 got_some = 0;
1116
1117 clear_uncertain_ino_cache(agno);
1118
1119 if ((irec = findfirst_uncertain_inode_rec(agno)) == NULL)
1120 return;
1121
1122 /*
1123 * the trick here is to find a contiguous range
1124 * of inodes, make sure that it doesn't overlap
1125 * with a known to exist chunk, and then make
1126 * sure it is a number of entire chunks.
1127 * we check on-disk once we have an idea of what's
1128 * going on just to double-check.
1129 *
1130 * process the uncertain inode record list and look
1131 * on disk to see if the referenced inodes are good
1132 */
1133
1134 do_warn(_("found inodes not in the inode allocation tree\n"));
1135
1136 do {
1137 /*
1138 * check every confirmed (which in this case means
1139 * inode that we really suspect to be an inode) inode
1140 */
1141 for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
1142 if (!is_inode_confirmed(irec, i))
1143 continue;
1144
1145 agino = i + irec->ino_startnum;
1146
1147 if (verify_aginum(mp, agno, agino))
1148 continue;
1149
1150 if (nrec != NULL && nrec->ino_startnum <= agino &&
1151 agino < nrec->ino_startnum +
1152 XFS_INODES_PER_CHUNK)
1153 continue;
1154
1155 if ((nrec = find_inode_rec(mp, agno, agino)) == NULL)
1156 if (!verify_aginum(mp, agno, agino))
1157 if (verify_aginode_chunk(mp, agno,
1158 agino, &start))
1159 got_some = 1;
1160 }
1161
1162 get_uncertain_inode_rec(mp, agno, irec);
1163 free_inode_rec(agno, irec);
1164
1165 irec = findfirst_uncertain_inode_rec(agno);
1166 } while (irec != NULL);
1167
1168 if (got_some)
1169 do_warn(_("found inodes not in the inode allocation tree\n"));
1170
1171 return;
1172 }
1173
1174 /*
1175 * verify and process the uncertain inodes for an ag.
1176 * this is different from check_ in that we can't just
1177 * move the good inodes into the good inode tree and let
1178 * process_aginodes() deal with them because this gets called
1179 * after process_aginodes() has been run on the ag inode tree.
1180 * So we have to process the inodes as well as verify since
1181 * we don't want to rerun process_aginodes() on a tree that has
1182 * mostly been processed.
1183 *
1184 * Note that if this routine does process some inodes, it can
1185 * add uncertain inodes to any ag which would require that
1186 * the routine be called again to process those newly-added
1187 * uncertain inodes.
1188 *
1189 * returns 0 if no inodes were processed and 1 if inodes
1190 * were processed (and it is possible that new uncertain
1191 * inodes were discovered).
1192 *
1193 * as a side-effect, this routine tears down the uncertain
1194 * inode tree for the ag.
1195 */
1196 int
1197 process_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
1198 {
1199 ino_tree_node_t *irec;
1200 ino_tree_node_t *nrec;
1201 xfs_agino_t agino;
1202 int i;
1203 int bogus;
1204 int cnt;
1205 int got_some;
1206
1207 #ifdef XR_INODE_TRACE
1208 fprintf(stderr, "in process_uncertain_aginodes, agno = %d\n", agno);
1209 #endif
1210
1211 got_some = 0;
1212
1213 clear_uncertain_ino_cache(agno);
1214
1215 if ((irec = findfirst_uncertain_inode_rec(agno)) == NULL)
1216 return(0);
1217
1218 nrec = NULL;
1219
1220 do {
1221 /*
1222 * check every confirmed inode
1223 */
1224 for (cnt = i = 0; i < XFS_INODES_PER_CHUNK; i++) {
1225 if (!is_inode_confirmed(irec, i))
1226 continue;
1227 cnt++;
1228 agino = i + irec->ino_startnum;
1229 #ifdef XR_INODE_TRACE
1230 fprintf(stderr, "ag inode = %d (0x%x)\n", agino, agino);
1231 #endif
1232 /*
1233 * skip over inodes already processed (in the
1234 * good tree), bad inode numbers, and inode numbers
1235 * pointing to bogus inodes
1236 */
1237 if (verify_aginum(mp, agno, agino))
1238 continue;
1239
1240 if (nrec != NULL && nrec->ino_startnum <= agino &&
1241 agino < nrec->ino_startnum +
1242 XFS_INODES_PER_CHUNK)
1243 continue;
1244
1245 if ((nrec = find_inode_rec(mp, agno, agino)) != NULL)
1246 continue;
1247
1248 /*
1249 * verify the chunk. if good, it will be
1250 * added to the good inode tree.
1251 */
1252 if ((nrec = verify_aginode_chunk_irec(mp,
1253 agno, agino)) == NULL)
1254 continue;
1255
1256 got_some = 1;
1257
1258 /*
1259 * process the inode record we just added
1260 * to the good inode tree. The inode
1261 * processing may add more records to the
1262 * uncertain inode lists.
1263 */
1264 if (process_inode_chunk(mp, agno, mp->m_ialloc_inos,
1265 nrec, 1, 0, 0, &bogus)) {
1266 /* XXX - i/o error, we've got a problem */
1267 abort();
1268 }
1269 }
1270
1271 ASSERT(cnt != 0);
1272 /*
1273 * now return the uncertain inode record to the free pool
1274 * and pull another one off the list for processing
1275 */
1276 get_uncertain_inode_rec(mp, agno, irec);
1277 free_inode_rec(agno, irec);
1278
1279 irec = findfirst_uncertain_inode_rec(agno);
1280 } while (irec != NULL);
1281
1282 if (got_some)
1283 do_warn(_("found inodes not in the inode allocation tree\n"));
1284
1285 return(1);
1286 }