]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/da_util.c
libfrog: clean up platform_nproc
[thirdparty/xfsprogs-dev.git] / repair / da_util.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2015 Red Hat, Inc.
4 * All Rights Reserved.
5 */
6
7 /* Various utilities for repair of directory and attribute metadata */
8
9 #include "libxfs.h"
10 #include "globals.h"
11 #include "err_protos.h"
12 #include "bmap.h"
13 #include "da_util.h"
14
15 /*
16 * the cursor gets passed up and down the da btree processing
17 * routines. The interior block processing routines use the
18 * cursor to determine if the pointers to and from the preceding
19 * and succeeding sibling blocks are ok and whether the values in
20 * the current block are consistent with the entries in the parent
21 * nodes. When a block is traversed, a parent-verification routine
22 * is called to verify if the next logical entry in the next level up
23 * is consistent with the greatest hashval in the next block of the
24 * current level. The verification routine is itself recursive and
25 * calls itself if it has to traverse an interior block to get
26 * the next logical entry. The routine recurses upwards through
27 * the tree until it finds a block where it can simply step to
28 * the next entry. The hashval in that entry should be equal to
29 * the hashval being passed to it (the greatest hashval in the block
30 * that the entry points to). If that isn't true, then the tree
31 * is blown and we need to trash it, salvage and trash it, or fix it.
32 * Currently, we just trash it.
33 */
34
35 /*
36 * Multibuffer handling.
37 * V2 directory blocks can be noncontiguous, needing multiple buffers.
38 * attr blocks are single blocks; this code handles that as well.
39 */
40 struct xfs_buf *
41 da_read_buf(
42 xfs_mount_t *mp,
43 int nex,
44 bmap_ext_t *bmp,
45 const struct xfs_buf_ops *ops)
46 {
47 #define MAP_ARRAY_SZ 4
48 struct xfs_buf_map map_array[MAP_ARRAY_SZ];
49 struct xfs_buf_map *map;
50 struct xfs_buf *bp;
51 int i;
52
53 if (nex > MAP_ARRAY_SZ) {
54 map = calloc(nex, sizeof(*map));
55 if (map == NULL) {
56 do_error(_("couldn't malloc dir2 buffer list\n"));
57 exit(1);
58 }
59 } else {
60 /* common case avoids calloc/free */
61 map = map_array;
62 }
63 for (i = 0; i < nex; i++) {
64 map[i].bm_bn = XFS_FSB_TO_DADDR(mp, bmp[i].startblock);
65 map[i].bm_len = XFS_FSB_TO_BB(mp, bmp[i].blockcount);
66 }
67 bp = libxfs_readbuf_map(mp->m_dev, map, nex, 0, ops);
68 if (map != map_array)
69 free(map);
70 return bp;
71 }
72
73 #define FORKNAME(type) (type == XFS_DATA_FORK ? _("directory") : _("attribute"))
74
75 /*
76 * walk tree from root to the left-most leaf block reading in
77 * blocks and setting up cursor. passes back file block number of the
78 * left-most leaf block if successful (bno). returns 1 if successful,
79 * 0 if unsuccessful.
80 */
81 int
82 traverse_int_dablock(
83 xfs_mount_t *mp,
84 da_bt_cursor_t *da_cursor,
85 xfs_dablk_t *rbno,
86 int whichfork)
87 {
88 bmap_ext_t *bmp;
89 xfs_dablk_t bno;
90 struct xfs_buf *bp;
91 int i;
92 int nex;
93 xfs_da_intnode_t *node;
94 bmap_ext_t lbmp;
95 struct xfs_da_geometry *geo;
96 struct xfs_da_node_entry *btree;
97 struct xfs_da3_icnode_hdr nodehdr;
98
99 if (whichfork == XFS_DATA_FORK) {
100 geo = mp->m_dir_geo;
101 bno = geo->leafblk;
102 } else {
103 geo = mp->m_attr_geo;
104 bno = 0;
105 }
106
107 /*
108 * traverse down left-side of tree until we hit the
109 * left-most leaf block setting up the btree cursor along
110 * the way.
111 */
112 i = -1;
113 node = NULL;
114 da_cursor->active = 0;
115
116 do {
117 /*
118 * read in each block along the way and set up cursor
119 */
120 nex = blkmap_getn(da_cursor->blkmap, bno,
121 geo->fsbcount, &bmp, &lbmp);
122
123 if (nex == 0)
124 goto error_out;
125
126 bp = da_read_buf(mp, nex, bmp, &xfs_da3_node_buf_ops);
127 if (bmp != &lbmp)
128 free(bmp);
129
130 if (!bp) {
131 do_warn(
132 _("can't read %s block %u for inode %" PRIu64 "\n"),
133 FORKNAME(whichfork), bno, da_cursor->ino);
134 goto error_out;
135 }
136
137 node = bp->b_addr;
138 M_DIROPS(mp)->node_hdr_from_disk(&nodehdr, node);
139
140 if (whichfork == XFS_DATA_FORK &&
141 (nodehdr.magic == XFS_DIR2_LEAFN_MAGIC ||
142 nodehdr.magic == XFS_DIR3_LEAFN_MAGIC)) {
143 if (i != -1) {
144 do_warn(
145 _("found non-root LEAFN node in inode %" PRIu64 " bno = %u\n"),
146 da_cursor->ino, bno);
147 }
148 *rbno = 0;
149 libxfs_putbuf(bp);
150 return 1;
151 }
152
153 if (nodehdr.magic != XFS_DA_NODE_MAGIC &&
154 nodehdr.magic != XFS_DA3_NODE_MAGIC) {
155 do_warn(
156 _("bad %s magic number 0x%x in inode %" PRIu64 " bno = %u\n"),
157 FORKNAME(whichfork), nodehdr.magic,
158 da_cursor->ino, bno);
159 libxfs_putbuf(bp);
160 goto error_out;
161 }
162
163 /* corrupt node; rebuild the dir. */
164 if (bp->b_error == -EFSBADCRC || bp->b_error == -EFSCORRUPTED) {
165 libxfs_putbuf(bp);
166 do_warn(
167 _("corrupt %s tree block %u for inode %" PRIu64 "\n"),
168 FORKNAME(whichfork), bno, da_cursor->ino);
169 goto error_out;
170 }
171
172 btree = M_DIROPS(mp)->node_tree_p(node);
173 if (nodehdr.count > geo->node_ents) {
174 do_warn(
175 _("bad %s record count in inode %" PRIu64 ", count = %d, max = %d\n"),
176 FORKNAME(whichfork), da_cursor->ino,
177 nodehdr.count, geo->node_ents);
178 libxfs_putbuf(bp);
179 goto error_out;
180 }
181
182 /*
183 * maintain level counter
184 */
185 if (i == -1) {
186 i = da_cursor->active = nodehdr.level;
187 if (i < 1 || i >= XFS_DA_NODE_MAXDEPTH) {
188 do_warn(
189 _("bad header depth for directory inode %" PRIu64 "\n"),
190 da_cursor->ino);
191 libxfs_putbuf(bp);
192 i = -1;
193 goto error_out;
194 }
195 } else {
196 if (nodehdr.level == i - 1) {
197 i--;
198 } else {
199 do_warn(
200 _("bad %s btree for inode %" PRIu64 "\n"),
201 FORKNAME(whichfork), da_cursor->ino);
202 libxfs_putbuf(bp);
203 goto error_out;
204 }
205 }
206
207 da_cursor->level[i].hashval = be32_to_cpu(btree[0].hashval);
208 da_cursor->level[i].bp = bp;
209 da_cursor->level[i].bno = bno;
210 da_cursor->level[i].index = 0;
211
212 /*
213 * set up new bno for next level down
214 */
215 bno = be32_to_cpu(btree[0].before);
216 } while (node != NULL && i > 1);
217
218 /*
219 * now return block number and get out
220 */
221 *rbno = da_cursor->level[0].bno = bno;
222 return 1;
223
224 error_out:
225 while (i > 1 && i <= da_cursor->active) {
226 libxfs_putbuf(da_cursor->level[i].bp);
227 i++;
228 }
229
230 return 0;
231 }
232
233 /*
234 * blow out buffer for this level and all the rest above as well
235 * if error == 0, we are not expecting to encounter any unreleased
236 * buffers (e.g. if we do, it's a mistake). if error == 1, we're
237 * in an error-handling case so unreleased buffers may exist.
238 */
239 static void
240 release_da_cursor_int(
241 xfs_mount_t *mp,
242 da_bt_cursor_t *cursor,
243 int prev_level,
244 int error)
245 {
246 int level = prev_level + 1;
247
248 if (cursor->level[level].bp != NULL) {
249 if (!error) {
250 do_warn(_("release_da_cursor_int got unexpected "
251 "non-null bp, dabno = %u\n"),
252 cursor->level[level].bno);
253 }
254 ASSERT(error != 0);
255
256 libxfs_putbuf(cursor->level[level].bp);
257 cursor->level[level].bp = NULL;
258 }
259
260 if (level < cursor->active)
261 release_da_cursor_int(mp, cursor, level, error);
262
263 return;
264 }
265
266 void
267 release_da_cursor(
268 xfs_mount_t *mp,
269 da_bt_cursor_t *cursor,
270 int prev_level)
271 {
272 release_da_cursor_int(mp, cursor, prev_level, 0);
273 }
274
275 void
276 err_release_da_cursor(
277 xfs_mount_t *mp,
278 da_bt_cursor_t *cursor,
279 int prev_level)
280 {
281 release_da_cursor_int(mp, cursor, prev_level, 1);
282 }
283
284 /*
285 * make sure that all entries in all blocks along the right side of
286 * of the tree are used and hashval's are consistent. level is the
287 * level of the descendent block. returns 0 if good (even if it had
288 * to be fixed up), and 1 if bad. The right edge of the tree is
289 * technically a block boundary. This routine should be used then
290 * instead of verify_da_path().
291 */
292 int
293 verify_final_da_path(
294 xfs_mount_t *mp,
295 da_bt_cursor_t *cursor,
296 const int p_level,
297 int whichfork)
298 {
299 xfs_da_intnode_t *node;
300 xfs_dahash_t hashval;
301 int bad = 0;
302 int entry;
303 int this_level = p_level + 1;
304 struct xfs_da_node_entry *btree;
305 struct xfs_da3_icnode_hdr nodehdr;
306
307 #ifdef XR_DIR_TRACE
308 fprintf(stderr, "in verify_final_da_path, this_level = %d\n",
309 this_level);
310 #endif
311
312 /*
313 * the index should point to the next "unprocessed" entry
314 * in the block which should be the final (rightmost) entry
315 */
316 entry = cursor->level[this_level].index;
317 node = cursor->level[this_level].bp->b_addr;
318 btree = M_DIROPS(mp)->node_tree_p(node);
319 M_DIROPS(mp)->node_hdr_from_disk(&nodehdr, node);
320
321 /*
322 * check internal block consistency on this level -- ensure
323 * that all entries are used, encountered and expected hashvals
324 * match, etc.
325 */
326 if (entry != nodehdr.count - 1) {
327 do_warn(
328 _("%s block used/count inconsistency - %d/%hu\n"),
329 FORKNAME(whichfork), entry, nodehdr.count);
330 bad++;
331 }
332 /*
333 * hash values monotonically increasing ???
334 */
335 if (cursor->level[this_level].hashval >=
336 be32_to_cpu(btree[entry].hashval)) {
337 do_warn(
338 _("%s block hashvalue inconsistency, expected > %u / saw %u\n"),
339 FORKNAME(whichfork),
340 cursor->level[this_level].hashval,
341 be32_to_cpu(btree[entry].hashval));
342 bad++;
343 }
344 if (nodehdr.forw != 0) {
345 do_warn(
346 _("bad %s forward block pointer, expected 0, saw %u\n"),
347 FORKNAME(whichfork), nodehdr.forw);
348 bad++;
349 }
350 if (bad) {
351 do_warn(_("bad %s block in inode %" PRIu64 "\n"),
352 FORKNAME(whichfork), cursor->ino);
353 return 1;
354 }
355 /*
356 * keep track of greatest block # -- that gets
357 * us the length of the directory/attribute
358 */
359 if (cursor->level[this_level].bno > cursor->greatest_bno)
360 cursor->greatest_bno = cursor->level[this_level].bno;
361
362 /*
363 * ok, now check descendant block number against this level
364 */
365 if (cursor->level[p_level].bno != be32_to_cpu(btree[entry].before)) {
366 #ifdef XR_DIR_TRACE
367 fprintf(stderr, "bad %s btree pointer, child bno should "
368 "be %d, block bno is %d, hashval is %u\n",
369 FORKNAME(whichfork), be16_to_cpu(btree[entry].before),
370 cursor->level[p_level].bno,
371 cursor->level[p_level].hashval);
372 fprintf(stderr, "verify_final_da_path returns 1 (bad) #1a\n");
373 #endif
374 return 1;
375 }
376
377 if (cursor->level[p_level].hashval !=
378 be32_to_cpu(btree[entry].hashval)) {
379 if (!no_modify) {
380 do_warn(
381 _("correcting bad hashval in non-leaf %s block\n"
382 "\tin (level %d) in inode %" PRIu64 ".\n"),
383 FORKNAME(whichfork), this_level, cursor->ino);
384 btree[entry].hashval = cpu_to_be32(
385 cursor->level[p_level].hashval);
386 cursor->level[this_level].dirty++;
387 } else {
388 do_warn(
389 _("would correct bad hashval in non-leaf %s block\n"
390 "\tin (level %d) in inode %" PRIu64 ".\n"),
391 FORKNAME(whichfork), this_level, cursor->ino);
392 }
393 }
394
395 /*
396 * Note: squirrel hashval away _before_ releasing the
397 * buffer, preventing a use-after-free problem.
398 */
399 hashval = be32_to_cpu(btree[entry].hashval);
400
401 /*
402 * release/write buffer
403 */
404 ASSERT(cursor->level[this_level].dirty == 0 ||
405 (cursor->level[this_level].dirty && !no_modify));
406
407 if (cursor->level[this_level].dirty && !no_modify)
408 libxfs_writebuf(cursor->level[this_level].bp, 0);
409 else
410 libxfs_putbuf(cursor->level[this_level].bp);
411
412 cursor->level[this_level].bp = NULL;
413
414 /*
415 * bail out if this is the root block (top of tree)
416 */
417 if (this_level >= cursor->active) {
418 #ifdef XR_DIR_TRACE
419 fprintf(stderr, "verify_final_da_path returns 0 (ok)\n");
420 #endif
421 return 0;
422 }
423 /*
424 * set hashvalue to correctly reflect the now-validated
425 * last entry in this block and continue upwards validation
426 */
427 cursor->level[this_level].hashval = hashval;
428
429 return verify_final_da_path(mp, cursor, this_level, whichfork);
430 }
431
432 /*
433 * Verifies the path from a descendant block up to the root.
434 * Should be called when the descendant level traversal hits
435 * a block boundary before crossing the boundary (reading in a new
436 * block).
437 *
438 * the directory/attr btrees work differently to the other fs btrees.
439 * each interior block contains records that are <hashval, bno>
440 * pairs. The bno is a file bno, not a filesystem bno. The last
441 * hashvalue in the block <bno> will be <hashval>. BUT unlike
442 * the freespace btrees, the *last* value in each block gets
443 * propagated up the tree instead of the first value in each block.
444 * that is, the interior records point to child blocks and the *greatest*
445 * hash value contained by the child block is the one the block above
446 * uses as the key for the child block.
447 *
448 * level is the level of the descendent block. returns 0 if good,
449 * and 1 if bad. The descendant block may be a leaf block.
450 *
451 * the invariant here is that the values in the cursor for the
452 * levels beneath this level (this_level) and the cursor index
453 * for this level *must* be valid.
454 *
455 * that is, the hashval/bno info is accurate for all
456 * DESCENDANTS and match what the node[index] information
457 * for the current index in the cursor for this level.
458 *
459 * the index values in the cursor for the descendant level
460 * are allowed to be off by one as they will reflect the
461 * next entry at those levels to be processed.
462 *
463 * the hashvalue for the current level can't be set until
464 * we hit the last entry in the block so, it's garbage
465 * until set by this routine.
466 *
467 * bno and bp for the current block/level are always valid
468 * since they have to be set so we can get a buffer for the
469 * block.
470 */
471 int
472 verify_da_path(
473 xfs_mount_t *mp,
474 da_bt_cursor_t *cursor,
475 const int p_level,
476 int whichfork)
477 {
478 xfs_da_intnode_t *node;
479 xfs_da_intnode_t *newnode;
480 xfs_dablk_t dabno;
481 struct xfs_buf *bp;
482 int bad;
483 int entry;
484 int this_level = p_level + 1;
485 bmap_ext_t *bmp;
486 int nex;
487 bmap_ext_t lbmp;
488 struct xfs_da_geometry *geo;
489 struct xfs_da_node_entry *btree;
490 struct xfs_da3_icnode_hdr nodehdr;
491
492 if (whichfork == XFS_DATA_FORK)
493 geo = mp->m_dir_geo;
494 else
495 geo = mp->m_attr_geo;
496
497 /* No buffer at this level, tree is corrupt. */
498 if (cursor->level[this_level].bp == NULL)
499 return 1;
500
501 /*
502 * index is currently set to point to the entry that
503 * should be processed now in this level.
504 */
505 entry = cursor->level[this_level].index;
506 node = cursor->level[this_level].bp->b_addr;
507 btree = M_DIROPS(mp)->node_tree_p(node);
508 M_DIROPS(mp)->node_hdr_from_disk(&nodehdr, node);
509
510 /* No entries in this node? Tree is corrupt. */
511 if (nodehdr.count == 0)
512 return 1;
513
514 /*
515 * if this block is out of entries, validate this
516 * block and move on to the next block.
517 * and update cursor value for said level
518 */
519 if (entry >= nodehdr.count) {
520 /*
521 * update the hash value for this level before
522 * validating it. bno value should be ok since
523 * it was set when the block was first read in.
524 */
525 cursor->level[this_level].hashval =
526 be32_to_cpu(btree[entry - 1].hashval);
527
528 /*
529 * keep track of greatest block # -- that gets
530 * us the length of the directory
531 */
532 if (cursor->level[this_level].bno > cursor->greatest_bno)
533 cursor->greatest_bno = cursor->level[this_level].bno;
534
535 /*
536 * validate the path for the current used-up block
537 * before we trash it
538 */
539 if (verify_da_path(mp, cursor, this_level, whichfork))
540 return 1;
541 /*
542 * ok, now get the next buffer and check sibling pointers
543 */
544 dabno = nodehdr.forw;
545 ASSERT(dabno != 0);
546 nex = blkmap_getn(cursor->blkmap, dabno, geo->fsbcount,
547 &bmp, &lbmp);
548 if (nex == 0) {
549 do_warn(
550 _("can't get map info for %s block %u of inode %" PRIu64 "\n"),
551 FORKNAME(whichfork), dabno, cursor->ino);
552 return 1;
553 }
554
555 bp = da_read_buf(mp, nex, bmp, &xfs_da3_node_buf_ops);
556 if (bmp != &lbmp)
557 free(bmp);
558
559 if (!bp) {
560 do_warn(
561 _("can't read %s block %u for inode %" PRIu64 "\n"),
562 FORKNAME(whichfork), dabno, cursor->ino);
563 return 1;
564 }
565
566 newnode = bp->b_addr;
567 btree = M_DIROPS(mp)->node_tree_p(newnode);
568 M_DIROPS(mp)->node_hdr_from_disk(&nodehdr, newnode);
569
570 /*
571 * verify magic number and back pointer, sanity-check
572 * entry count, verify level
573 */
574 bad = 0;
575 if (nodehdr.magic != XFS_DA_NODE_MAGIC &&
576 nodehdr.magic != XFS_DA3_NODE_MAGIC) {
577 do_warn(
578 _("bad magic number %x in %s block %u for inode %" PRIu64 "\n"),
579 nodehdr.magic, FORKNAME(whichfork),
580 dabno, cursor->ino);
581 bad++;
582 }
583 if (nodehdr.back != cursor->level[this_level].bno) {
584 do_warn(
585 _("bad back pointer in %s block %u for inode %" PRIu64 "\n"),
586 FORKNAME(whichfork), dabno, cursor->ino);
587 bad++;
588 }
589 if (nodehdr.count > geo->node_ents) {
590 do_warn(
591 _("entry count %d too large in %s block %u for inode %" PRIu64 "\n"),
592 nodehdr.count, FORKNAME(whichfork),
593 dabno, cursor->ino);
594 bad++;
595 }
596 if (nodehdr.level != this_level) {
597 do_warn(
598 _("bad level %d in %s block %u for inode %" PRIu64 "\n"),
599 nodehdr.level, FORKNAME(whichfork),
600 dabno, cursor->ino);
601 bad++;
602 }
603 if (bad) {
604 #ifdef XR_DIR_TRACE
605 fprintf(stderr, "verify_da_path returns 1 (bad) #4\n");
606 #endif
607 libxfs_putbuf(bp);
608 return 1;
609 }
610
611 /*
612 * update cursor, write out the *current* level if
613 * required. don't write out the descendant level
614 */
615 ASSERT(cursor->level[this_level].dirty == 0 ||
616 (cursor->level[this_level].dirty && !no_modify));
617
618 /*
619 * If block looks ok but CRC didn't match, make sure to
620 * recompute it.
621 */
622 if (!no_modify &&
623 cursor->level[this_level].bp->b_error == -EFSBADCRC)
624 cursor->level[this_level].dirty = 1;
625
626 if (cursor->level[this_level].dirty && !no_modify)
627 libxfs_writebuf(cursor->level[this_level].bp, 0);
628 else
629 libxfs_putbuf(cursor->level[this_level].bp);
630
631 /* switch cursor to point at the new buffer we just read */
632 cursor->level[this_level].bp = bp;
633 cursor->level[this_level].dirty = 0;
634 cursor->level[this_level].bno = dabno;
635 cursor->level[this_level].hashval =
636 be32_to_cpu(btree[0].hashval);
637
638 entry = cursor->level[this_level].index = 0;
639 }
640 /*
641 * ditto for block numbers
642 */
643 if (cursor->level[p_level].bno != be32_to_cpu(btree[entry].before)) {
644 #ifdef XR_DIR_TRACE
645 fprintf(stderr, "bad %s btree pointer, child bno "
646 "should be %d, block bno is %d, hashval is %u\n",
647 FORKNAME(whichfork), be32_to_cpu(btree[entry].before),
648 cursor->level[p_level].bno,
649 cursor->level[p_level].hashval);
650 fprintf(stderr, "verify_da_path returns 1 (bad) #1a\n");
651 #endif
652 return 1;
653 }
654 /*
655 * ok, now validate last hashvalue in the descendant
656 * block against the hashval in the current entry
657 */
658 if (cursor->level[p_level].hashval !=
659 be32_to_cpu(btree[entry].hashval)) {
660 if (!no_modify) {
661 do_warn(
662 _("correcting bad hashval in interior %s block\n"
663 "\tin (level %d) in inode %" PRIu64 ".\n"),
664 FORKNAME(whichfork), this_level, cursor->ino);
665 btree[entry].hashval = cpu_to_be32(
666 cursor->level[p_level].hashval);
667 cursor->level[this_level].dirty++;
668 } else {
669 do_warn(
670 _("would correct bad hashval in interior %s block\n"
671 "\tin (level %d) in inode %" PRIu64 ".\n"),
672 FORKNAME(whichfork), this_level, cursor->ino);
673 }
674 }
675 /*
676 * increment index for this level to point to next entry
677 * (which should point to the next descendant block)
678 */
679 cursor->level[this_level].index++;
680 #ifdef XR_DIR_TRACE
681 fprintf(stderr, "verify_da_path returns 0 (ok)\n");
682 #endif
683 return 0;
684 }