]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/dino_chunks.c
171756818a6a3ab5c509f9f297b86811ef3eb625
[thirdparty/xfsprogs-dev.git] / repair / dino_chunks.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6
7 #include "libxfs.h"
8 #include "avl.h"
9 #include "globals.h"
10 #include "agheader.h"
11 #include "incore.h"
12 #include "protos.h"
13 #include "err_protos.h"
14 #include "dinode.h"
15 #include "versions.h"
16 #include "prefetch.h"
17 #include "progress.h"
18
19 /*
20 * validates inode block or chunk, returns # of good inodes
21 * the dinodes are verified using verify_uncertain_dinode() which
22 * means only the basic inode info is checked, no fork checks.
23 */
24 static int
25 check_aginode_block(
26 xfs_mount_t *mp,
27 xfs_agnumber_t agno,
28 xfs_agblock_t agbno)
29 {
30 struct xfs_dinode *dino_p;
31 int i;
32 int cnt = 0;
33 struct xfs_buf *bp;
34 int error;
35
36 /*
37 * it's ok to read these possible inode blocks in one at
38 * a time because they don't belong to known inodes (if
39 * they did, we'd know about them courtesy of the incore inode
40 * tree and we wouldn't be here and we stale the buffers out
41 * so no one else will overlap them.
42 */
43 error = -libxfs_buf_read(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno),
44 XFS_FSB_TO_BB(mp, 1), LIBXFS_READBUF_SALVAGE, &bp,
45 NULL);
46 if (error) {
47 do_warn(_("cannot read agbno (%u/%u), disk block %" PRId64 "\n"),
48 agno, agbno, XFS_AGB_TO_DADDR(mp, agno, agbno));
49 return(0);
50 }
51
52 for (i = 0; i < mp->m_sb.sb_inopblock; i++) {
53 dino_p = xfs_make_iptr(mp, bp, i);
54 if (!verify_uncertain_dinode(mp, dino_p, agno,
55 XFS_OFFBNO_TO_AGINO(mp, agbno, i)))
56 cnt++;
57 }
58 if (cnt)
59 bp->b_ops = &xfs_inode_buf_ops;
60
61 libxfs_buf_relse(bp);
62 return(cnt);
63 }
64
65 /*
66 * tries to establish if the inode really exists in a valid
67 * inode chunk. returns number of new inodes if things are good
68 * and 0 if bad. start is the start of the discovered inode chunk.
69 * routine assumes that ino is a legal inode number
70 * (verified by libxfs_verify_ino()). If the inode chunk turns out
71 * to be good, this routine will put the inode chunk into
72 * the good inode chunk tree if required.
73 *
74 * the verify_(ag)inode* family of routines are utility
75 * routines called by check_uncertain_aginodes() and
76 * process_uncertain_aginodes().
77 */
78 static int
79 verify_inode_chunk(xfs_mount_t *mp,
80 xfs_ino_t ino,
81 xfs_ino_t *start_ino)
82 {
83 xfs_agnumber_t agno;
84 xfs_agino_t agino;
85 xfs_agino_t start_agino;
86 xfs_agblock_t agbno;
87 xfs_agblock_t start_agbno = 0;
88 xfs_agblock_t end_agbno;
89 xfs_agblock_t max_agbno;
90 xfs_agblock_t cur_agbno;
91 xfs_agblock_t chunk_start_agbno;
92 xfs_agblock_t chunk_stop_agbno;
93 ino_tree_node_t *irec_before_p = NULL;
94 ino_tree_node_t *irec_after_p = NULL;
95 ino_tree_node_t *irec_p;
96 ino_tree_node_t *irec_next_p;
97 int irec_cnt;
98 int ino_cnt = 0;
99 int num_blks;
100 int i;
101 int j;
102 int state;
103 xfs_extlen_t blen;
104 struct xfs_ino_geometry *igeo = M_IGEO(mp);
105
106 agno = XFS_INO_TO_AGNO(mp, ino);
107 agino = XFS_INO_TO_AGINO(mp, ino);
108 agbno = XFS_INO_TO_AGBNO(mp, ino);
109 *start_ino = NULLFSINO;
110
111 ASSERT(igeo->ialloc_blks > 0);
112
113 if (agno == mp->m_sb.sb_agcount - 1)
114 max_agbno = mp->m_sb.sb_dblocks -
115 (xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno;
116 else
117 max_agbno = mp->m_sb.sb_agblocks;
118
119 /*
120 * is the inode beyond the end of the AG?
121 */
122 if (agbno >= max_agbno)
123 return(0);
124
125 /*
126 * check for the easy case, inodes per block >= XFS_INODES_PER_CHUNK
127 * (multiple chunks per block)
128 */
129 if (igeo->ialloc_blks == 1) {
130 if (agbno > max_agbno)
131 return 0;
132 if (check_aginode_block(mp, agno, agino) == 0)
133 return 0;
134
135 pthread_mutex_lock(&ag_locks[agno].lock);
136
137 state = get_bmap(agno, agbno);
138 switch (state) {
139 case XR_E_INO:
140 do_warn(
141 _("uncertain inode block %d/%d already known\n"),
142 agno, agbno);
143 break;
144 case XR_E_UNKNOWN:
145 case XR_E_FREE1:
146 case XR_E_FREE:
147 set_bmap(agno, agbno, XR_E_INO);
148 break;
149 case XR_E_MULT:
150 case XR_E_INUSE:
151 case XR_E_INUSE_FS:
152 case XR_E_FS_MAP:
153 /*
154 * if block is already claimed, forget it.
155 */
156 do_warn(
157 _("inode block %d/%d multiply claimed, (state %d)\n"),
158 agno, agbno, state);
159 set_bmap(agno, agbno, XR_E_MULT);
160 pthread_mutex_unlock(&ag_locks[agno].lock);
161 return(0);
162 default:
163 do_warn(
164 _("inode block %d/%d bad state, (state %d)\n"),
165 agno, agbno, state);
166 set_bmap(agno, agbno, XR_E_INO);
167 break;
168 }
169
170 pthread_mutex_unlock(&ag_locks[agno].lock);
171
172 start_agino = XFS_AGB_TO_AGINO(mp, agbno);
173 *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
174
175 /*
176 * put new inode record(s) into inode tree
177 */
178 for (j = 0; j < chunks_pblock; j++) {
179 if ((irec_p = find_inode_rec(mp, agno, start_agino))
180 == NULL) {
181 irec_p = set_inode_free_alloc(mp, agno,
182 start_agino);
183 for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
184 set_inode_free(irec_p, i);
185 }
186 if (start_agino <= agino && agino <
187 start_agino + XFS_INODES_PER_CHUNK)
188 set_inode_used(irec_p, agino - start_agino);
189
190 start_agino += XFS_INODES_PER_CHUNK;
191 ino_cnt += XFS_INODES_PER_CHUNK;
192 }
193
194 return(ino_cnt);
195 } else if (fs_aligned_inodes) {
196 /*
197 * next easy case -- aligned inode filesystem.
198 * just check out the chunk
199 */
200 start_agbno = rounddown(XFS_INO_TO_AGBNO(mp, ino),
201 fs_ino_alignment);
202 end_agbno = start_agbno + igeo->ialloc_blks;
203
204 /*
205 * if this fs has aligned inodes but the end of the
206 * chunk is beyond the end of the ag, this is a bad
207 * chunk
208 */
209 if (end_agbno > max_agbno)
210 return(0);
211
212 /*
213 * check out all blocks in chunk
214 */
215 ino_cnt = 0;
216 for (cur_agbno = start_agbno; cur_agbno < end_agbno;
217 cur_agbno++) {
218 ino_cnt += check_aginode_block(mp, agno, cur_agbno);
219 }
220
221 /*
222 * if we lose either 2 blocks worth of inodes or >25% of
223 * the chunk, just forget it.
224 */
225 if (ino_cnt < XFS_INODES_PER_CHUNK - 2 * mp->m_sb.sb_inopblock
226 || ino_cnt < XFS_INODES_PER_CHUNK - 16)
227 return(0);
228
229 /*
230 * ok, put the record into the tree, if no conflict.
231 */
232 if (find_inode_rec(mp, agno, XFS_AGB_TO_AGINO(mp, start_agbno)))
233 return(0);
234
235 start_agino = XFS_AGB_TO_AGINO(mp, start_agbno);
236 *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
237
238 irec_p = set_inode_free_alloc(mp, agno,
239 XFS_AGB_TO_AGINO(mp, start_agbno));
240
241 for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
242 set_inode_free(irec_p, i);
243
244 ASSERT(start_agino <= agino &&
245 start_agino + XFS_INODES_PER_CHUNK > agino);
246
247 set_inode_used(irec_p, agino - start_agino);
248
249 return(XFS_INODES_PER_CHUNK);
250 }
251
252 /*
253 * hard case -- pre-6.3 filesystem.
254 * set default start/end agbnos and ensure agbnos are legal.
255 * we're setting a range [start_agbno, end_agbno) such that
256 * a discovered inode chunk completely within that range
257 * would include the inode passed into us.
258 */
259 if (igeo->ialloc_blks > 1) {
260 if (agino > igeo->ialloc_inos)
261 start_agbno = agbno - igeo->ialloc_blks + 1;
262 else
263 start_agbno = 1;
264 }
265
266 end_agbno = agbno + igeo->ialloc_blks;
267
268 if (end_agbno > max_agbno)
269 end_agbno = max_agbno;
270
271 /*
272 * search tree for known inodes within +/- 1 inode chunk range
273 */
274 irec_before_p = irec_after_p = NULL;
275
276 find_inode_rec_range(mp, agno, XFS_AGB_TO_AGINO(mp, start_agbno),
277 XFS_OFFBNO_TO_AGINO(mp, end_agbno, mp->m_sb.sb_inopblock - 1),
278 &irec_before_p, &irec_after_p);
279
280 /*
281 * if we have known inode chunks in our search range, establish
282 * their start and end-points to tighten our search range. range
283 * is [start, end) -- e.g. max/end agbno is one beyond the
284 * last block to be examined. the avl routines work this way.
285 */
286 if (irec_before_p) {
287 /*
288 * only one inode record in the range, move one boundary in
289 */
290 if (irec_before_p == irec_after_p) {
291 if (irec_before_p->ino_startnum < agino)
292 start_agbno = XFS_AGINO_TO_AGBNO(mp,
293 irec_before_p->ino_startnum +
294 XFS_INODES_PER_CHUNK);
295 else
296 end_agbno = XFS_AGINO_TO_AGBNO(mp,
297 irec_before_p->ino_startnum);
298 }
299
300 /*
301 * find the start of the gap in the search range (which
302 * should contain our unknown inode). if the only irec
303 * within +/- 1 chunks starts after the inode we're
304 * looking for, skip this stuff since the end_agbno
305 * of the range has already been trimmed in to not
306 * include that irec.
307 */
308 if (irec_before_p->ino_startnum < agino) {
309 irec_p = irec_before_p;
310 irec_next_p = next_ino_rec(irec_p);
311
312 while(irec_next_p != NULL &&
313 irec_p->ino_startnum + XFS_INODES_PER_CHUNK ==
314 irec_next_p->ino_startnum) {
315 irec_p = irec_next_p;
316 irec_next_p = next_ino_rec(irec_next_p);
317 }
318
319 start_agbno = XFS_AGINO_TO_AGBNO(mp,
320 irec_p->ino_startnum) +
321 igeo->ialloc_blks;
322
323 /*
324 * we know that the inode we're trying to verify isn't
325 * in an inode chunk so the next ino_rec marks the end
326 * of the gap -- is it within the search range?
327 */
328 if (irec_next_p != NULL &&
329 agino + igeo->ialloc_inos >=
330 irec_next_p->ino_startnum)
331 end_agbno = XFS_AGINO_TO_AGBNO(mp,
332 irec_next_p->ino_startnum);
333 }
334
335 ASSERT(start_agbno < end_agbno);
336 }
337
338 /*
339 * if the gap is too small to contain a chunk, we lose.
340 * this means that inode chunks known to be good surround
341 * the inode in question and that the space between them
342 * is too small for a legal inode chunk
343 */
344 if (end_agbno - start_agbno < igeo->ialloc_blks)
345 return(0);
346
347 /*
348 * now grunge around the disk, start at the inode block and
349 * go in each direction until you hit a non-inode block or
350 * run into a range boundary. A non-inode block is block
351 * with *no* good inodes in it. Unfortunately, we can't
352 * co-opt bad blocks into inode chunks (which might take
353 * care of disk blocks that turn into zeroes) because the
354 * filesystem could very well allocate two inode chunks
355 * with a one block file in between and we'd zap the file.
356 * We're better off just losing the rest of the
357 * inode chunk instead.
358 */
359 for (cur_agbno = agbno; cur_agbno >= start_agbno; cur_agbno--) {
360 /*
361 * if the block has no inodes, it's a bad block so
362 * break out now without decrementing cur_agbno so
363 * chunk start blockno will be set to the last good block
364 */
365 if (!(irec_cnt = check_aginode_block(mp, agno, cur_agbno)))
366 break;
367 ino_cnt += irec_cnt;
368 }
369
370 chunk_start_agbno = cur_agbno + 1;
371
372 for (cur_agbno = agbno + 1; cur_agbno < end_agbno; cur_agbno++) {
373 /*
374 * if the block has no inodes, it's a bad block so
375 * break out now without incrementing cur_agbno so
376 * chunk start blockno will be set to the block
377 * immediately after the last good block.
378 */
379 if (!(irec_cnt = check_aginode_block(mp, agno, cur_agbno)))
380 break;
381 ino_cnt += irec_cnt;
382 }
383
384 chunk_stop_agbno = cur_agbno;
385
386 num_blks = chunk_stop_agbno - chunk_start_agbno;
387
388 if (num_blks < igeo->ialloc_blks || ino_cnt == 0)
389 return 0;
390
391 /*
392 * XXX - later - if the entire range is selected and they're all
393 * good inodes, keep searching in either direction.
394 * until you the range of inodes end, then split into chunks
395 * for now, just take one chunk's worth starting at the lowest
396 * possible point and hopefully we'll pick the rest up later.
397 *
398 * XXX - if we were going to fix up an inode chunk for
399 * any good inodes in the chunk, this is where we would
400 * do it. For now, keep it simple and lose the rest of
401 * the chunk
402 */
403
404 if (num_blks % igeo->ialloc_blks != 0) {
405 num_blks = rounddown(num_blks, igeo->ialloc_blks);
406 chunk_stop_agbno = chunk_start_agbno + num_blks;
407 }
408
409 /*
410 * ok, we've got a candidate inode chunk. now we have to
411 * verify that we aren't trying to use blocks that are already
412 * in use. If so, mark them as multiply claimed since odds
413 * are very low that we found this chunk by stumbling across
414 * user data -- we're probably here as a result of a directory
415 * entry or an iunlinked pointer
416 */
417 pthread_mutex_lock(&ag_locks[agno].lock);
418 for (cur_agbno = chunk_start_agbno;
419 cur_agbno < chunk_stop_agbno;
420 cur_agbno += blen) {
421 state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
422 switch (state) {
423 case XR_E_MULT:
424 case XR_E_INUSE:
425 case XR_E_INUSE_FS:
426 case XR_E_FS_MAP:
427 do_warn(
428 _("inode block %d/%d multiply claimed, (state %d)\n"),
429 agno, cur_agbno, state);
430 set_bmap_ext(agno, cur_agbno, blen, XR_E_MULT);
431 pthread_mutex_unlock(&ag_locks[agno].lock);
432 return 0;
433 case XR_E_INO:
434 do_error(
435 _("uncertain inode block overlap, agbno = %d, ino = %" PRIu64 "\n"),
436 agbno, ino);
437 break;
438 default:
439 break;
440 }
441 }
442 pthread_mutex_unlock(&ag_locks[agno].lock);
443
444 /*
445 * ok, chunk is good. put the record into the tree if required,
446 * and fill in the bitmap. All inodes will be marked as "free"
447 * except for the one that led us to discover the chunk. That's
448 * ok because we'll override the free setting later if the
449 * contents of the inode indicate it's in use.
450 */
451 start_agino = XFS_AGB_TO_AGINO(mp, chunk_start_agbno);
452 *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
453
454 ASSERT(find_inode_rec(mp, agno, start_agino) == NULL);
455
456 irec_p = set_inode_free_alloc(mp, agno, start_agino);
457 for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
458 set_inode_free(irec_p, i);
459
460 ASSERT(start_agino <= agino &&
461 start_agino + XFS_INODES_PER_CHUNK > agino);
462
463 set_inode_used(irec_p, agino - start_agino);
464
465 pthread_mutex_lock(&ag_locks[agno].lock);
466
467 for (cur_agbno = chunk_start_agbno;
468 cur_agbno < chunk_stop_agbno;
469 cur_agbno += blen) {
470 state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
471 switch (state) {
472 case XR_E_INO:
473 do_error(
474 _("uncertain inode block %" PRIu64 " already known\n"),
475 XFS_AGB_TO_FSB(mp, agno, cur_agbno));
476 break;
477 case XR_E_UNKNOWN:
478 case XR_E_FREE1:
479 case XR_E_FREE:
480 set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
481 break;
482 case XR_E_MULT:
483 case XR_E_INUSE:
484 case XR_E_INUSE_FS:
485 case XR_E_FS_MAP:
486 do_error(
487 _("inode block %d/%d multiply claimed, (state %d)\n"),
488 agno, cur_agbno, state);
489 break;
490 default:
491 do_warn(
492 _("inode block %d/%d bad state, (state %d)\n"),
493 agno, cur_agbno, state);
494 set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
495 break;
496 }
497 }
498 pthread_mutex_unlock(&ag_locks[agno].lock);
499
500 return(ino_cnt);
501 }
502
503 /*
504 * same as above only for ag inode chunks
505 */
506 static int
507 verify_aginode_chunk(xfs_mount_t *mp,
508 xfs_agnumber_t agno,
509 xfs_agino_t agino,
510 xfs_agino_t *agino_start)
511 {
512 xfs_ino_t ino;
513 int res;
514
515 res = verify_inode_chunk(mp, XFS_AGINO_TO_INO(mp, agno, agino), &ino);
516
517 if (res)
518 *agino_start = XFS_INO_TO_AGINO(mp, ino);
519 else
520 *agino_start = NULLAGINO;
521
522 return(res);
523 }
524
525 /*
526 * this does the same as the two above only it returns a pointer
527 * to the inode record in the good inode tree
528 */
529 static ino_tree_node_t *
530 verify_aginode_chunk_irec(xfs_mount_t *mp,
531 xfs_agnumber_t agno,
532 xfs_agino_t agino)
533 {
534 xfs_agino_t start_agino;
535 ino_tree_node_t *irec = NULL;
536
537 if (verify_aginode_chunk(mp, agno, agino, &start_agino))
538 irec = find_inode_rec(mp, agno, start_agino);
539
540 return(irec);
541 }
542
543 /*
544 * Set the state of an inode block during inode chunk processing. The block is
545 * expected to be in the free or inode state. If free, it transitions to the
546 * inode state. Warn if the block is in neither expected state as this indicates
547 * multiply claimed blocks.
548 */
549 static void
550 process_inode_agbno_state(
551 struct xfs_mount *mp,
552 xfs_agnumber_t agno,
553 xfs_agblock_t agbno)
554 {
555 int state;
556
557 pthread_mutex_lock(&ag_locks[agno].lock);
558 state = get_bmap(agno, agbno);
559 switch (state) {
560 case XR_E_INO: /* already marked */
561 break;
562 case XR_E_UNKNOWN:
563 case XR_E_FREE:
564 case XR_E_FREE1:
565 set_bmap(agno, agbno, XR_E_INO);
566 break;
567 case XR_E_BAD_STATE:
568 do_error(_("bad state in block map %d\n"), state);
569 break;
570 default:
571 set_bmap(agno, agbno, XR_E_MULT);
572 do_warn(
573 _("inode block %" PRIu64 " multiply claimed, state was %d\n"),
574 XFS_AGB_TO_FSB(mp, agno, agbno), state);
575 break;
576 }
577 pthread_mutex_unlock(&ag_locks[agno].lock);
578 }
579
580 /*
581 * processes an inode allocation chunk/block, returns 1 on I/O errors,
582 * 0 otherwise
583 *
584 * *bogus is set to 1 if the entire set of inodes is bad.
585 */
586 static int
587 process_inode_chunk(
588 xfs_mount_t *mp,
589 xfs_agnumber_t agno,
590 int num_inos,
591 ino_tree_node_t *first_irec,
592 int ino_discovery,
593 int check_dups,
594 int extra_attr_check,
595 int *bogus)
596 {
597 xfs_ino_t parent;
598 ino_tree_node_t *ino_rec;
599 struct xfs_buf **bplist;
600 struct xfs_dinode *dino;
601 int icnt;
602 int status;
603 int bp_found;
604 int is_used;
605 int ino_dirty;
606 int irec_offset;
607 int ibuf_offset;
608 xfs_agino_t agino;
609 xfs_agblock_t agbno;
610 xfs_ino_t ino;
611 int dirty = 0;
612 int isa_dir = 0;
613 int cluster_count;
614 int bp_index;
615 int cluster_offset;
616 struct xfs_ino_geometry *igeo = M_IGEO(mp);
617 bool can_punch_sparse = false;
618 int error;
619
620 ASSERT(first_irec != NULL);
621 ASSERT(XFS_AGINO_TO_OFFSET(mp, first_irec->ino_startnum) == 0);
622
623 *bogus = 0;
624 ASSERT(igeo->ialloc_blks > 0);
625
626 cluster_count = XFS_INODES_PER_CHUNK / M_IGEO(mp)->inodes_per_cluster;
627 if (cluster_count == 0)
628 cluster_count = 1;
629
630 if (xfs_has_sparseinodes(mp) &&
631 M_IGEO(mp)->inodes_per_cluster >= XFS_INODES_PER_HOLEMASK_BIT)
632 can_punch_sparse = true;
633
634 /*
635 * get all blocks required to read in this chunk (may wind up
636 * having to process more chunks in a multi-chunk per block fs)
637 */
638 agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
639
640 /*
641 * set up first irec
642 */
643 ino_rec = first_irec;
644 irec_offset = 0;
645
646 bplist = malloc(cluster_count * sizeof(struct xfs_buf *));
647 if (bplist == NULL)
648 do_error(_("failed to allocate %zd bytes of memory\n"),
649 cluster_count * sizeof(struct xfs_buf *));
650
651 for (bp_index = 0; bp_index < cluster_count; bp_index++) {
652 /*
653 * Skip the cluster buffer if the first inode is sparse. The
654 * remaining inodes in the cluster share the same state as
655 * sparse inodes occur at cluster granularity.
656 */
657 if (is_inode_sparse(ino_rec, irec_offset)) {
658 pftrace("skip sparse inode, startnum 0x%x idx %d",
659 ino_rec->ino_startnum, irec_offset);
660 bplist[bp_index] = NULL;
661 goto next_readbuf;
662 }
663
664 pftrace("about to read off %llu in AG %d",
665 XFS_AGB_TO_DADDR(mp, agno, agbno), agno);
666
667 error = -libxfs_buf_read(mp->m_dev,
668 XFS_AGB_TO_DADDR(mp, agno, agbno),
669 XFS_FSB_TO_BB(mp,
670 M_IGEO(mp)->blocks_per_cluster),
671 LIBXFS_READBUF_SALVAGE, &bplist[bp_index],
672 &xfs_inode_buf_ops);
673 if (error) {
674 do_warn(_("cannot read inode %" PRIu64 ", disk block %" PRId64 ", cnt %d\n"),
675 XFS_AGINO_TO_INO(mp, agno, first_irec->ino_startnum),
676 XFS_AGB_TO_DADDR(mp, agno, agbno),
677 XFS_FSB_TO_BB(mp,
678 M_IGEO(mp)->blocks_per_cluster));
679 while (bp_index > 0) {
680 bp_index--;
681 libxfs_buf_relse(bplist[bp_index]);
682 }
683 free(bplist);
684 return(1);
685 }
686
687 pftrace("readbuf %p (%llu, %d) in AG %d", bplist[bp_index],
688 (long long)xfs_buf_daddr(bplist[bp_index]),
689 bplist[bp_index]->b_length, agno);
690
691 bplist[bp_index]->b_ops = &xfs_inode_buf_ops;
692
693 next_readbuf:
694 irec_offset += mp->m_sb.sb_inopblock *
695 M_IGEO(mp)->blocks_per_cluster;
696 agbno += M_IGEO(mp)->blocks_per_cluster;
697 }
698 agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
699
700 /*
701 * initialize counters
702 */
703 irec_offset = 0;
704 ibuf_offset = 0;
705 cluster_offset = 0;
706 icnt = 0;
707 status = 0;
708 bp_found = 0;
709 bp_index = 0;
710
711 /*
712 * verify inode chunk if necessary
713 */
714 if (ino_discovery) {
715 for (;;) {
716 agino = irec_offset + ino_rec->ino_startnum;
717
718 /* no buffers for sparse clusters */
719 if (bplist[bp_index]) {
720 /* make inode pointer */
721 dino = xfs_make_iptr(mp, bplist[bp_index],
722 cluster_offset);
723
724 /*
725 * we always think that the root and realtime
726 * inodes are verified even though we may have
727 * to reset them later to keep from losing the
728 * chunk that they're in
729 */
730 if (verify_dinode(mp, dino, agno, agino) == 0 ||
731 (agno == 0 &&
732 (mp->m_sb.sb_rootino == agino ||
733 mp->m_sb.sb_rsumino == agino ||
734 mp->m_sb.sb_rbmino == agino))) {
735 status++;
736 bp_found++;
737 }
738 }
739
740 irec_offset++;
741 icnt++;
742 cluster_offset++;
743
744 if (icnt == igeo->ialloc_inos &&
745 irec_offset == XFS_INODES_PER_CHUNK) {
746 /*
747 * done! - finished up irec and block
748 * simultaneously
749 */
750 break;
751 } else if (irec_offset == XFS_INODES_PER_CHUNK) {
752 /*
753 * get new irec (multiple chunks per block fs)
754 */
755 ino_rec = next_ino_rec(ino_rec);
756 ASSERT(ino_rec->ino_startnum == agino + 1);
757 irec_offset = 0;
758 }
759 if (cluster_offset == M_IGEO(mp)->inodes_per_cluster) {
760 if (can_punch_sparse &&
761 bplist[bp_index] != NULL &&
762 bp_found == 0) {
763 /*
764 * We didn't find any good inodes in
765 * this cluster, blow it away before
766 * moving on to the next one.
767 */
768 libxfs_buf_relse(bplist[bp_index]);
769 bplist[bp_index] = NULL;
770 }
771 bp_index++;
772 cluster_offset = 0;
773 bp_found = 0;
774 }
775 }
776
777 if (can_punch_sparse &&
778 bp_index < cluster_count &&
779 bplist[bp_index] != NULL &&
780 bp_found == 0) {
781 /*
782 * We didn't find any good inodes in this cluster, blow
783 * it away.
784 */
785 libxfs_buf_relse(bplist[bp_index]);
786 bplist[bp_index] = NULL;
787 }
788
789 /*
790 * if chunk/block is bad, blow it off. the inode records
791 * will be deleted by the caller if appropriate.
792 */
793 if (!status) {
794 *bogus = 1;
795 for (bp_index = 0; bp_index < cluster_count; bp_index++)
796 if (bplist[bp_index])
797 libxfs_buf_relse(bplist[bp_index]);
798 free(bplist);
799 return(0);
800 }
801
802 /*
803 * reset irec and counters
804 */
805 ino_rec = first_irec;
806
807 irec_offset = 0;
808 cluster_offset = 0;
809 bp_index = 0;
810 icnt = 0;
811 status = 0;
812 }
813
814 /*
815 * mark block as an inode block in the incore bitmap
816 */
817 if (!is_inode_sparse(ino_rec, irec_offset))
818 process_inode_agbno_state(mp, agno, agbno);
819
820 for (;;) {
821 agino = irec_offset + ino_rec->ino_startnum;
822 ino = XFS_AGINO_TO_INO(mp, agno, agino);
823
824 if (is_inode_sparse(ino_rec, irec_offset))
825 goto process_next;
826
827 /*
828 * Repair skips reading the cluster buffer if the first inode
829 * in the cluster is marked as sparse. If subsequent inodes in
830 * the cluster buffer are /not/ marked sparse, there won't be
831 * a buffer, so we need to avoid the null pointer dereference.
832 */
833 if (bplist[bp_index] == NULL) {
834 do_warn(
835 _("imap claims inode %" PRIu64 " is present, but inode cluster is sparse, "),
836 ino);
837 if (!no_modify)
838 do_warn(_("correcting imap\n"));
839 else
840 do_warn(_("would correct imap\n"));
841 set_inode_sparse(ino_rec, irec_offset);
842 set_inode_free(ino_rec, irec_offset);
843 goto process_next;
844 }
845
846 /* make inode pointer */
847 dino = xfs_make_iptr(mp, bplist[bp_index], cluster_offset);
848
849
850 is_used = 3;
851 ino_dirty = 0;
852 parent = 0;
853
854 status = process_dinode(mp, dino, agno, agino,
855 is_inode_free(ino_rec, irec_offset),
856 &ino_dirty, &is_used,ino_discovery, check_dups,
857 extra_attr_check, &isa_dir, &parent);
858
859 ASSERT(is_used != 3);
860 if (ino_dirty) {
861 dirty = 1;
862 libxfs_dinode_calc_crc(mp, dino);
863 }
864
865 /*
866 * XXX - if we want to try and keep
867 * track of whether we need to bang on
868 * the inode maps (instead of just
869 * blindly reconstructing them like
870 * we do now, this is where to start.
871 */
872 if (is_used) {
873 if (is_inode_free(ino_rec, irec_offset)) {
874 do_warn(
875 _("imap claims in-use inode %" PRIu64 " is free, "),
876 ino);
877
878 if (!no_modify)
879 do_warn(_("correcting imap\n"));
880 else
881 do_warn(_("would correct imap\n"));
882 }
883 set_inode_used(ino_rec, irec_offset);
884
885 /*
886 * store the on-disk file type for comparing in
887 * phase 6.
888 */
889 set_inode_ftype(ino_rec, irec_offset,
890 libxfs_mode_to_ftype(be16_to_cpu(dino->di_mode)));
891
892 /*
893 * store on-disk nlink count for comparing in phase 7
894 */
895 set_inode_disk_nlinks(ino_rec, irec_offset,
896 dino->di_version > 1
897 ? be32_to_cpu(dino->di_nlink)
898 : be16_to_cpu(dino->di_onlink));
899
900 } else {
901 set_inode_free(ino_rec, irec_offset);
902 }
903
904 /*
905 * if we lose the root inode, or it turns into
906 * a non-directory, that allows us to double-check
907 * later whether or not we need to reinitialize it.
908 */
909 if (isa_dir) {
910 set_inode_isadir(ino_rec, irec_offset);
911 if (ino_discovery) {
912 ASSERT(parent != 0);
913 set_inode_parent(ino_rec, irec_offset, parent);
914 ASSERT(parent ==
915 get_inode_parent(ino_rec, irec_offset));
916 }
917 } else {
918 clear_inode_isadir(ino_rec, irec_offset);
919 }
920
921 if (status) {
922 if (mp->m_sb.sb_rootino == ino) {
923 need_root_inode = 1;
924
925 if (!no_modify) {
926 do_warn(
927 _("cleared root inode %" PRIu64 "\n"),
928 ino);
929 } else {
930 do_warn(
931 _("would clear root inode %" PRIu64 "\n"),
932 ino);
933 }
934 } else if (mp->m_sb.sb_rbmino == ino) {
935 need_rbmino = 1;
936
937 if (!no_modify) {
938 do_warn(
939 _("cleared realtime bitmap inode %" PRIu64 "\n"),
940 ino);
941 } else {
942 do_warn(
943 _("would clear realtime bitmap inode %" PRIu64 "\n"),
944 ino);
945 }
946 } else if (mp->m_sb.sb_rsumino == ino) {
947 need_rsumino = 1;
948
949 if (!no_modify) {
950 do_warn(
951 _("cleared realtime summary inode %" PRIu64 "\n"),
952 ino);
953 } else {
954 do_warn(
955 _("would clear realtime summary inode %" PRIu64 "\n"),
956 ino);
957 }
958 } else if (!no_modify) {
959 do_warn(_("cleared inode %" PRIu64 "\n"),
960 ino);
961 } else {
962 do_warn(_("would have cleared inode %" PRIu64 "\n"),
963 ino);
964 }
965 clear_inode_was_rl(ino_rec, irec_offset);
966 }
967
968 process_next:
969 irec_offset++;
970 ibuf_offset++;
971 icnt++;
972 cluster_offset++;
973
974 if (icnt == igeo->ialloc_inos &&
975 irec_offset == XFS_INODES_PER_CHUNK) {
976 /*
977 * done! - finished up irec and block simultaneously
978 */
979 for (bp_index = 0; bp_index < cluster_count; bp_index++) {
980 if (!bplist[bp_index])
981 continue;
982
983 pftrace("put/writebuf %p (%llu) in AG %d",
984 bplist[bp_index], (long long)
985 xfs_buf_daddr(bplist[bp_index]), agno);
986
987 if (dirty && !no_modify) {
988 libxfs_buf_mark_dirty(bplist[bp_index]);
989 libxfs_buf_relse(bplist[bp_index]);
990 }
991 else
992 libxfs_buf_relse(bplist[bp_index]);
993 }
994 free(bplist);
995 break;
996 } else if (ibuf_offset == mp->m_sb.sb_inopblock) {
997 /*
998 * mark block as an inode block in the incore bitmap
999 * and reset inode buffer offset counter
1000 */
1001 ibuf_offset = 0;
1002 agbno++;
1003
1004 if (!is_inode_sparse(ino_rec, irec_offset))
1005 process_inode_agbno_state(mp, agno, agbno);
1006 } else if (irec_offset == XFS_INODES_PER_CHUNK) {
1007 /*
1008 * get new irec (multiple chunks per block fs)
1009 */
1010 ino_rec = next_ino_rec(ino_rec);
1011 ASSERT(ino_rec->ino_startnum == agino + 1);
1012 irec_offset = 0;
1013 }
1014 if (cluster_offset == M_IGEO(mp)->inodes_per_cluster) {
1015 bp_index++;
1016 cluster_offset = 0;
1017 }
1018 }
1019 return(0);
1020 }
1021
1022 /*
1023 * check all inodes mentioned in the ag's incore inode maps.
1024 * the map may be incomplete. If so, we'll catch the missing
1025 * inodes (hopefully) when we traverse the directory tree.
1026 * check_dirs is set to 1 if directory inodes should be
1027 * processed for internal consistency, parent setting and
1028 * discovery of unknown inodes. this only happens
1029 * in phase 3. check_dups is set to 1 if we're looking for
1030 * inodes that reference duplicate blocks so we can trash
1031 * the inode right then and there. this is set only in
1032 * phase 4 after we've run through and set the bitmap once.
1033 */
1034 void
1035 process_aginodes(
1036 xfs_mount_t *mp,
1037 prefetch_args_t *pf_args,
1038 xfs_agnumber_t agno,
1039 int ino_discovery,
1040 int check_dups,
1041 int extra_attr_check)
1042 {
1043 int num_inos, bogus;
1044 ino_tree_node_t *ino_rec, *first_ino_rec, *prev_ino_rec;
1045 struct xfs_ino_geometry *igeo = M_IGEO(mp);
1046 #ifdef XR_PF_TRACE
1047 int count;
1048 #endif
1049 first_ino_rec = ino_rec = findfirst_inode_rec(agno);
1050
1051 while (ino_rec != NULL) {
1052 /*
1053 * paranoia - step through inode records until we step
1054 * through a full allocation of inodes. this could
1055 * be an issue in big-block filesystems where a block
1056 * can hold more than one inode chunk. make sure to
1057 * grab the record corresponding to the beginning of
1058 * the next block before we call the processing routines.
1059 */
1060 num_inos = XFS_INODES_PER_CHUNK;
1061 while (num_inos < igeo->ialloc_inos && ino_rec != NULL) {
1062 /*
1063 * inodes chunks will always be aligned and sized
1064 * correctly
1065 */
1066 if ((ino_rec = next_ino_rec(ino_rec)) != NULL)
1067 num_inos += XFS_INODES_PER_CHUNK;
1068 }
1069
1070 ASSERT(num_inos == igeo->ialloc_inos);
1071
1072 if (pf_args) {
1073 sem_post(&pf_args->ra_count);
1074 #ifdef XR_PF_TRACE
1075 sem_getvalue(&pf_args->ra_count, &count);
1076 pftrace("processing inode chunk %p in AG %d (sem count = %d)",
1077 first_ino_rec, agno, count);
1078 #endif
1079 }
1080
1081 if (process_inode_chunk(mp, agno, num_inos, first_ino_rec,
1082 ino_discovery, check_dups, extra_attr_check,
1083 &bogus)) {
1084 /* XXX - i/o error, we've got a problem */
1085 abort();
1086 }
1087
1088 if (!bogus)
1089 first_ino_rec = ino_rec = next_ino_rec(ino_rec);
1090 else {
1091 /*
1092 * inodes pointed to by this record are
1093 * completely bogus, blow the records for
1094 * this chunk out.
1095 * the inode block(s) will get reclaimed
1096 * in phase 4 when the block map is
1097 * reconstructed after inodes claiming
1098 * duplicate blocks are deleted.
1099 */
1100 num_inos = 0;
1101 ino_rec = first_ino_rec;
1102 while (num_inos < igeo->ialloc_inos &&
1103 ino_rec != NULL) {
1104 prev_ino_rec = ino_rec;
1105
1106 if ((ino_rec = next_ino_rec(ino_rec)) != NULL)
1107 num_inos += XFS_INODES_PER_CHUNK;
1108
1109 get_inode_rec(mp, agno, prev_ino_rec);
1110 free_inode_rec(agno, prev_ino_rec);
1111 }
1112
1113 first_ino_rec = ino_rec;
1114 }
1115 PROG_RPT_INC(prog_rpt_done[agno], num_inos);
1116 }
1117 }
1118
1119 /*
1120 * verify the uncertain inode list for an ag.
1121 * Good inodes get moved into the good inode tree.
1122 * returns 0 if there are no uncertain inode records to
1123 * be processed, 1 otherwise. This routine destroys the
1124 * the entire uncertain inode tree for the ag as a side-effect.
1125 */
1126 void
1127 check_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
1128 {
1129 ino_tree_node_t *irec;
1130 ino_tree_node_t *nrec;
1131 xfs_agino_t start;
1132 xfs_agino_t i;
1133 xfs_agino_t agino;
1134 int got_some;
1135 struct xfs_perag *pag;
1136
1137 nrec = NULL;
1138 got_some = 0;
1139
1140 clear_uncertain_ino_cache(agno);
1141
1142 if ((irec = findfirst_uncertain_inode_rec(agno)) == NULL)
1143 return;
1144
1145 /*
1146 * the trick here is to find a contiguous range
1147 * of inodes, make sure that it doesn't overlap
1148 * with a known to exist chunk, and then make
1149 * sure it is a number of entire chunks.
1150 * we check on-disk once we have an idea of what's
1151 * going on just to double-check.
1152 *
1153 * process the uncertain inode record list and look
1154 * on disk to see if the referenced inodes are good
1155 */
1156
1157 do_warn(_("found inodes not in the inode allocation tree\n"));
1158
1159 pag = libxfs_perag_get(mp, agno);
1160 do {
1161 /*
1162 * check every confirmed (which in this case means
1163 * inode that we really suspect to be an inode) inode
1164 */
1165 for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
1166 if (!is_inode_confirmed(irec, i))
1167 continue;
1168
1169 agino = i + irec->ino_startnum;
1170
1171 if (!libxfs_verify_agino(pag, agino))
1172 continue;
1173
1174 if (nrec != NULL && nrec->ino_startnum <= agino &&
1175 agino < nrec->ino_startnum +
1176 XFS_INODES_PER_CHUNK)
1177 continue;
1178
1179 if ((nrec = find_inode_rec(mp, agno, agino)) == NULL)
1180 if (libxfs_verify_agino(pag, agino))
1181 if (verify_aginode_chunk(mp, agno,
1182 agino, &start))
1183 got_some = 1;
1184 }
1185
1186 get_uncertain_inode_rec(mp, agno, irec);
1187 free_inode_rec(agno, irec);
1188
1189 irec = findfirst_uncertain_inode_rec(agno);
1190 } while (irec != NULL);
1191 libxfs_perag_put(pag);
1192
1193 if (got_some)
1194 do_warn(_("found inodes not in the inode allocation tree\n"));
1195
1196 return;
1197 }
1198
1199 /*
1200 * verify and process the uncertain inodes for an ag.
1201 * this is different from check_ in that we can't just
1202 * move the good inodes into the good inode tree and let
1203 * process_aginodes() deal with them because this gets called
1204 * after process_aginodes() has been run on the ag inode tree.
1205 * So we have to process the inodes as well as verify since
1206 * we don't want to rerun process_aginodes() on a tree that has
1207 * mostly been processed.
1208 *
1209 * Note that if this routine does process some inodes, it can
1210 * add uncertain inodes to any ag which would require that
1211 * the routine be called again to process those newly-added
1212 * uncertain inodes.
1213 *
1214 * returns 0 if no inodes were processed and 1 if inodes
1215 * were processed (and it is possible that new uncertain
1216 * inodes were discovered).
1217 *
1218 * as a side-effect, this routine tears down the uncertain
1219 * inode tree for the ag.
1220 */
1221 int
1222 process_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
1223 {
1224 ino_tree_node_t *irec;
1225 ino_tree_node_t *nrec;
1226 xfs_agino_t agino;
1227 int i;
1228 int bogus;
1229 int cnt;
1230 int got_some;
1231 struct xfs_ino_geometry *igeo = M_IGEO(mp);
1232 struct xfs_perag *pag;
1233
1234 #ifdef XR_INODE_TRACE
1235 fprintf(stderr, "in process_uncertain_aginodes, agno = %d\n", agno);
1236 #endif
1237
1238 got_some = 0;
1239
1240 clear_uncertain_ino_cache(agno);
1241
1242 if ((irec = findfirst_uncertain_inode_rec(agno)) == NULL)
1243 return(0);
1244
1245 nrec = NULL;
1246
1247 pag = libxfs_perag_get(mp, agno);
1248 do {
1249 /*
1250 * check every confirmed inode
1251 */
1252 for (cnt = i = 0; i < XFS_INODES_PER_CHUNK; i++) {
1253 if (!is_inode_confirmed(irec, i))
1254 continue;
1255 cnt++;
1256 agino = i + irec->ino_startnum;
1257 #ifdef XR_INODE_TRACE
1258 fprintf(stderr, "ag inode = %d (0x%x)\n", agino, agino);
1259 #endif
1260 /*
1261 * skip over inodes already processed (in the
1262 * good tree), bad inode numbers, and inode numbers
1263 * pointing to bogus inodes
1264 */
1265 if (!libxfs_verify_agino(pag, agino))
1266 continue;
1267
1268 if (nrec != NULL && nrec->ino_startnum <= agino &&
1269 agino < nrec->ino_startnum +
1270 XFS_INODES_PER_CHUNK)
1271 continue;
1272
1273 if ((nrec = find_inode_rec(mp, agno, agino)) != NULL)
1274 continue;
1275
1276 /*
1277 * verify the chunk. if good, it will be
1278 * added to the good inode tree.
1279 */
1280 if ((nrec = verify_aginode_chunk_irec(mp,
1281 agno, agino)) == NULL)
1282 continue;
1283
1284 got_some = 1;
1285
1286 /*
1287 * process the inode record we just added
1288 * to the good inode tree. The inode
1289 * processing may add more records to the
1290 * uncertain inode lists. always process the
1291 * extended attribute structure because we might
1292 * decide that some inodes are still in use
1293 */
1294 if (process_inode_chunk(mp, agno, igeo->ialloc_inos,
1295 nrec, 1, 0, 1, &bogus)) {
1296 /* XXX - i/o error, we've got a problem */
1297 abort();
1298 }
1299 }
1300
1301 ASSERT(cnt != 0);
1302 /*
1303 * now return the uncertain inode record to the free pool
1304 * and pull another one off the list for processing
1305 */
1306 get_uncertain_inode_rec(mp, agno, irec);
1307 free_inode_rec(agno, irec);
1308
1309 irec = findfirst_uncertain_inode_rec(agno);
1310 } while (irec != NULL);
1311 libxfs_perag_put(pag);
1312
1313 if (got_some)
1314 do_warn(_("found inodes not in the inode allocation tree\n"));
1315
1316 return(1);
1317 }