]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - repair/da_util.c
libxfs: refactor manage_zones()
[thirdparty/xfsprogs-dev.git] / repair / da_util.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2015 Red Hat, Inc.
4 * All Rights Reserved.
5 */
6
7 /* Various utilities for repair of directory and attribute metadata */
8
9 #include "libxfs.h"
10 #include "globals.h"
11 #include "err_protos.h"
12 #include "bmap.h"
13 #include "da_util.h"
14
15 /*
16 * Takes a name and length (name need not be null-terminated) and whether
17 * we are checking a dir (as opposed to an attr).
18 * Returns 1 if the name contains a NUL or if a directory entry contains a '/'.
19 * Returns 0 if the name checks out.
20 */
21 int
22 namecheck(
23 char *name,
24 int length,
25 bool isadir)
26 {
27 char *c;
28 int i;
29
30 ASSERT(length < MAXNAMELEN);
31
32 for (c = name, i = 0; i < length; i++, c++) {
33 if (isadir && *c == '/')
34 return 1;
35 if (*c == '\0')
36 return 1;
37 }
38
39 return 0;
40 }
41
42 /*
43 * the cursor gets passed up and down the da btree processing
44 * routines. The interior block processing routines use the
45 * cursor to determine if the pointers to and from the preceding
46 * and succeeding sibling blocks are ok and whether the values in
47 * the current block are consistent with the entries in the parent
48 * nodes. When a block is traversed, a parent-verification routine
49 * is called to verify if the next logical entry in the next level up
50 * is consistent with the greatest hashval in the next block of the
51 * current level. The verification routine is itself recursive and
52 * calls itself if it has to traverse an interior block to get
53 * the next logical entry. The routine recurses upwards through
54 * the tree until it finds a block where it can simply step to
55 * the next entry. The hashval in that entry should be equal to
56 * the hashval being passed to it (the greatest hashval in the block
57 * that the entry points to). If that isn't true, then the tree
58 * is blown and we need to trash it, salvage and trash it, or fix it.
59 * Currently, we just trash it.
60 */
61
62 /*
63 * Multibuffer handling.
64 * V2 directory blocks can be noncontiguous, needing multiple buffers.
65 * attr blocks are single blocks; this code handles that as well.
66 */
67 struct xfs_buf *
68 da_read_buf(
69 xfs_mount_t *mp,
70 int nex,
71 bmap_ext_t *bmp,
72 const struct xfs_buf_ops *ops)
73 {
74 #define MAP_ARRAY_SZ 4
75 struct xfs_buf_map map_array[MAP_ARRAY_SZ];
76 struct xfs_buf_map *map;
77 struct xfs_buf *bp;
78 int i;
79
80 if (nex > MAP_ARRAY_SZ) {
81 map = calloc(nex, sizeof(*map));
82 if (map == NULL) {
83 do_error(_("couldn't malloc dir2 buffer list\n"));
84 exit(1);
85 }
86 } else {
87 /* common case avoids calloc/free */
88 map = map_array;
89 }
90 for (i = 0; i < nex; i++) {
91 map[i].bm_bn = XFS_FSB_TO_DADDR(mp, bmp[i].startblock);
92 map[i].bm_len = XFS_FSB_TO_BB(mp, bmp[i].blockcount);
93 }
94 bp = libxfs_readbuf_map(mp->m_dev, map, nex, 0, ops);
95 if (map != map_array)
96 free(map);
97 return bp;
98 }
99
100 #define FORKNAME(type) (type == XFS_DATA_FORK ? _("directory") : _("attribute"))
101
102 /*
103 * walk tree from root to the left-most leaf block reading in
104 * blocks and setting up cursor. passes back file block number of the
105 * left-most leaf block if successful (bno). returns 1 if successful,
106 * 0 if unsuccessful.
107 */
108 int
109 traverse_int_dablock(
110 xfs_mount_t *mp,
111 da_bt_cursor_t *da_cursor,
112 xfs_dablk_t *rbno,
113 int whichfork)
114 {
115 bmap_ext_t *bmp;
116 xfs_dablk_t bno;
117 struct xfs_buf *bp;
118 int i;
119 int nex;
120 xfs_da_intnode_t *node;
121 bmap_ext_t lbmp;
122 struct xfs_da_geometry *geo;
123 struct xfs_da_node_entry *btree;
124 struct xfs_da3_icnode_hdr nodehdr;
125
126 if (whichfork == XFS_DATA_FORK) {
127 geo = mp->m_dir_geo;
128 bno = geo->leafblk;
129 } else {
130 geo = mp->m_attr_geo;
131 bno = 0;
132 }
133
134 /*
135 * traverse down left-side of tree until we hit the
136 * left-most leaf block setting up the btree cursor along
137 * the way.
138 */
139 i = -1;
140 node = NULL;
141 da_cursor->active = 0;
142
143 do {
144 /*
145 * read in each block along the way and set up cursor
146 */
147 nex = blkmap_getn(da_cursor->blkmap, bno,
148 geo->fsbcount, &bmp, &lbmp);
149
150 if (nex == 0)
151 goto error_out;
152
153 bp = da_read_buf(mp, nex, bmp, &xfs_da3_node_buf_ops);
154 if (bmp != &lbmp)
155 free(bmp);
156
157 if (!bp) {
158 do_warn(
159 _("can't read %s block %u for inode %" PRIu64 "\n"),
160 FORKNAME(whichfork), bno, da_cursor->ino);
161 goto error_out;
162 }
163
164 node = bp->b_addr;
165 M_DIROPS(mp)->node_hdr_from_disk(&nodehdr, node);
166
167 if (whichfork == XFS_DATA_FORK &&
168 (nodehdr.magic == XFS_DIR2_LEAFN_MAGIC ||
169 nodehdr.magic == XFS_DIR3_LEAFN_MAGIC)) {
170 if (i != -1) {
171 do_warn(
172 _("found non-root LEAFN node in inode %" PRIu64 " bno = %u\n"),
173 da_cursor->ino, bno);
174 }
175 *rbno = 0;
176 libxfs_putbuf(bp);
177 return 1;
178 }
179
180 if (nodehdr.magic != XFS_DA_NODE_MAGIC &&
181 nodehdr.magic != XFS_DA3_NODE_MAGIC) {
182 do_warn(
183 _("bad %s magic number 0x%x in inode %" PRIu64 " bno = %u\n"),
184 FORKNAME(whichfork), nodehdr.magic,
185 da_cursor->ino, bno);
186 libxfs_putbuf(bp);
187 goto error_out;
188 }
189
190 /* corrupt node; rebuild the dir. */
191 if (bp->b_error == -EFSBADCRC || bp->b_error == -EFSCORRUPTED) {
192 libxfs_putbuf(bp);
193 do_warn(
194 _("corrupt %s tree block %u for inode %" PRIu64 "\n"),
195 FORKNAME(whichfork), bno, da_cursor->ino);
196 goto error_out;
197 }
198
199 btree = M_DIROPS(mp)->node_tree_p(node);
200 if (nodehdr.count > geo->node_ents) {
201 do_warn(
202 _("bad %s record count in inode %" PRIu64 ", count = %d, max = %d\n"),
203 FORKNAME(whichfork), da_cursor->ino,
204 nodehdr.count, geo->node_ents);
205 libxfs_putbuf(bp);
206 goto error_out;
207 }
208
209 /*
210 * maintain level counter
211 */
212 if (i == -1) {
213 i = da_cursor->active = nodehdr.level;
214 if (i < 1 || i >= XFS_DA_NODE_MAXDEPTH) {
215 do_warn(
216 _("bad header depth for directory inode %" PRIu64 "\n"),
217 da_cursor->ino);
218 libxfs_putbuf(bp);
219 i = -1;
220 goto error_out;
221 }
222 } else {
223 if (nodehdr.level == i - 1) {
224 i--;
225 } else {
226 do_warn(
227 _("bad %s btree for inode %" PRIu64 "\n"),
228 FORKNAME(whichfork), da_cursor->ino);
229 libxfs_putbuf(bp);
230 goto error_out;
231 }
232 }
233
234 da_cursor->level[i].hashval = be32_to_cpu(btree[0].hashval);
235 da_cursor->level[i].bp = bp;
236 da_cursor->level[i].bno = bno;
237 da_cursor->level[i].index = 0;
238
239 /*
240 * set up new bno for next level down
241 */
242 bno = be32_to_cpu(btree[0].before);
243 } while (node != NULL && i > 1);
244
245 /*
246 * now return block number and get out
247 */
248 *rbno = da_cursor->level[0].bno = bno;
249 return 1;
250
251 error_out:
252 while (i > 1 && i <= da_cursor->active) {
253 libxfs_putbuf(da_cursor->level[i].bp);
254 i++;
255 }
256
257 return 0;
258 }
259
260 /*
261 * blow out buffer for this level and all the rest above as well
262 * if error == 0, we are not expecting to encounter any unreleased
263 * buffers (e.g. if we do, it's a mistake). if error == 1, we're
264 * in an error-handling case so unreleased buffers may exist.
265 */
266 static void
267 release_da_cursor_int(
268 xfs_mount_t *mp,
269 da_bt_cursor_t *cursor,
270 int prev_level,
271 int error)
272 {
273 int level = prev_level + 1;
274
275 if (cursor->level[level].bp != NULL) {
276 if (!error) {
277 do_warn(_("release_da_cursor_int got unexpected "
278 "non-null bp, dabno = %u\n"),
279 cursor->level[level].bno);
280 }
281 ASSERT(error != 0);
282
283 libxfs_putbuf(cursor->level[level].bp);
284 cursor->level[level].bp = NULL;
285 }
286
287 if (level < cursor->active)
288 release_da_cursor_int(mp, cursor, level, error);
289
290 return;
291 }
292
293 void
294 release_da_cursor(
295 xfs_mount_t *mp,
296 da_bt_cursor_t *cursor,
297 int prev_level)
298 {
299 release_da_cursor_int(mp, cursor, prev_level, 0);
300 }
301
302 void
303 err_release_da_cursor(
304 xfs_mount_t *mp,
305 da_bt_cursor_t *cursor,
306 int prev_level)
307 {
308 release_da_cursor_int(mp, cursor, prev_level, 1);
309 }
310
311 /*
312 * make sure that all entries in all blocks along the right side of
313 * of the tree are used and hashval's are consistent. level is the
314 * level of the descendent block. returns 0 if good (even if it had
315 * to be fixed up), and 1 if bad. The right edge of the tree is
316 * technically a block boundary. This routine should be used then
317 * instead of verify_da_path().
318 */
319 int
320 verify_final_da_path(
321 xfs_mount_t *mp,
322 da_bt_cursor_t *cursor,
323 const int p_level,
324 int whichfork)
325 {
326 xfs_da_intnode_t *node;
327 xfs_dahash_t hashval;
328 int bad = 0;
329 int entry;
330 int this_level = p_level + 1;
331 struct xfs_da_node_entry *btree;
332 struct xfs_da3_icnode_hdr nodehdr;
333
334 #ifdef XR_DIR_TRACE
335 fprintf(stderr, "in verify_final_da_path, this_level = %d\n",
336 this_level);
337 #endif
338
339 /*
340 * the index should point to the next "unprocessed" entry
341 * in the block which should be the final (rightmost) entry
342 */
343 entry = cursor->level[this_level].index;
344 node = cursor->level[this_level].bp->b_addr;
345 btree = M_DIROPS(mp)->node_tree_p(node);
346 M_DIROPS(mp)->node_hdr_from_disk(&nodehdr, node);
347
348 /*
349 * check internal block consistency on this level -- ensure
350 * that all entries are used, encountered and expected hashvals
351 * match, etc.
352 */
353 if (entry != nodehdr.count - 1) {
354 do_warn(
355 _("%s block used/count inconsistency - %d/%hu\n"),
356 FORKNAME(whichfork), entry, nodehdr.count);
357 bad++;
358 }
359 /*
360 * hash values monotonically increasing ???
361 */
362 if (cursor->level[this_level].hashval >=
363 be32_to_cpu(btree[entry].hashval)) {
364 do_warn(
365 _("%s block hashvalue inconsistency, expected > %u / saw %u\n"),
366 FORKNAME(whichfork),
367 cursor->level[this_level].hashval,
368 be32_to_cpu(btree[entry].hashval));
369 bad++;
370 }
371 if (nodehdr.forw != 0) {
372 do_warn(
373 _("bad %s forward block pointer, expected 0, saw %u\n"),
374 FORKNAME(whichfork), nodehdr.forw);
375 bad++;
376 }
377 if (bad) {
378 do_warn(_("bad %s block in inode %" PRIu64 "\n"),
379 FORKNAME(whichfork), cursor->ino);
380 return 1;
381 }
382 /*
383 * keep track of greatest block # -- that gets
384 * us the length of the directory/attribute
385 */
386 if (cursor->level[this_level].bno > cursor->greatest_bno)
387 cursor->greatest_bno = cursor->level[this_level].bno;
388
389 /*
390 * ok, now check descendant block number against this level
391 */
392 if (cursor->level[p_level].bno != be32_to_cpu(btree[entry].before)) {
393 #ifdef XR_DIR_TRACE
394 fprintf(stderr, "bad %s btree pointer, child bno should "
395 "be %d, block bno is %d, hashval is %u\n",
396 FORKNAME(whichfork), be16_to_cpu(btree[entry].before),
397 cursor->level[p_level].bno,
398 cursor->level[p_level].hashval);
399 fprintf(stderr, "verify_final_da_path returns 1 (bad) #1a\n");
400 #endif
401 return 1;
402 }
403
404 if (cursor->level[p_level].hashval !=
405 be32_to_cpu(btree[entry].hashval)) {
406 if (!no_modify) {
407 do_warn(
408 _("correcting bad hashval in non-leaf %s block\n"
409 "\tin (level %d) in inode %" PRIu64 ".\n"),
410 FORKNAME(whichfork), this_level, cursor->ino);
411 btree[entry].hashval = cpu_to_be32(
412 cursor->level[p_level].hashval);
413 cursor->level[this_level].dirty++;
414 } else {
415 do_warn(
416 _("would correct bad hashval in non-leaf %s block\n"
417 "\tin (level %d) in inode %" PRIu64 ".\n"),
418 FORKNAME(whichfork), this_level, cursor->ino);
419 }
420 }
421
422 /*
423 * Note: squirrel hashval away _before_ releasing the
424 * buffer, preventing a use-after-free problem.
425 */
426 hashval = be32_to_cpu(btree[entry].hashval);
427
428 /*
429 * release/write buffer
430 */
431 ASSERT(cursor->level[this_level].dirty == 0 ||
432 (cursor->level[this_level].dirty && !no_modify));
433
434 if (cursor->level[this_level].dirty && !no_modify)
435 libxfs_writebuf(cursor->level[this_level].bp, 0);
436 else
437 libxfs_putbuf(cursor->level[this_level].bp);
438
439 cursor->level[this_level].bp = NULL;
440
441 /*
442 * bail out if this is the root block (top of tree)
443 */
444 if (this_level >= cursor->active) {
445 #ifdef XR_DIR_TRACE
446 fprintf(stderr, "verify_final_da_path returns 0 (ok)\n");
447 #endif
448 return 0;
449 }
450 /*
451 * set hashvalue to correctly reflect the now-validated
452 * last entry in this block and continue upwards validation
453 */
454 cursor->level[this_level].hashval = hashval;
455
456 return verify_final_da_path(mp, cursor, this_level, whichfork);
457 }
458
459 /*
460 * Verifies the path from a descendant block up to the root.
461 * Should be called when the descendant level traversal hits
462 * a block boundary before crossing the boundary (reading in a new
463 * block).
464 *
465 * the directory/attr btrees work differently to the other fs btrees.
466 * each interior block contains records that are <hashval, bno>
467 * pairs. The bno is a file bno, not a filesystem bno. The last
468 * hashvalue in the block <bno> will be <hashval>. BUT unlike
469 * the freespace btrees, the *last* value in each block gets
470 * propagated up the tree instead of the first value in each block.
471 * that is, the interior records point to child blocks and the *greatest*
472 * hash value contained by the child block is the one the block above
473 * uses as the key for the child block.
474 *
475 * level is the level of the descendent block. returns 0 if good,
476 * and 1 if bad. The descendant block may be a leaf block.
477 *
478 * the invariant here is that the values in the cursor for the
479 * levels beneath this level (this_level) and the cursor index
480 * for this level *must* be valid.
481 *
482 * that is, the hashval/bno info is accurate for all
483 * DESCENDANTS and match what the node[index] information
484 * for the current index in the cursor for this level.
485 *
486 * the index values in the cursor for the descendant level
487 * are allowed to be off by one as they will reflect the
488 * next entry at those levels to be processed.
489 *
490 * the hashvalue for the current level can't be set until
491 * we hit the last entry in the block so, it's garbage
492 * until set by this routine.
493 *
494 * bno and bp for the current block/level are always valid
495 * since they have to be set so we can get a buffer for the
496 * block.
497 */
498 int
499 verify_da_path(
500 xfs_mount_t *mp,
501 da_bt_cursor_t *cursor,
502 const int p_level,
503 int whichfork)
504 {
505 xfs_da_intnode_t *node;
506 xfs_da_intnode_t *newnode;
507 xfs_dablk_t dabno;
508 struct xfs_buf *bp;
509 int bad;
510 int entry;
511 int this_level = p_level + 1;
512 bmap_ext_t *bmp;
513 int nex;
514 bmap_ext_t lbmp;
515 struct xfs_da_geometry *geo;
516 struct xfs_da_node_entry *btree;
517 struct xfs_da3_icnode_hdr nodehdr;
518
519 if (whichfork == XFS_DATA_FORK)
520 geo = mp->m_dir_geo;
521 else
522 geo = mp->m_attr_geo;
523
524 /* No buffer at this level, tree is corrupt. */
525 if (cursor->level[this_level].bp == NULL)
526 return 1;
527
528 /*
529 * index is currently set to point to the entry that
530 * should be processed now in this level.
531 */
532 entry = cursor->level[this_level].index;
533 node = cursor->level[this_level].bp->b_addr;
534 btree = M_DIROPS(mp)->node_tree_p(node);
535 M_DIROPS(mp)->node_hdr_from_disk(&nodehdr, node);
536
537 /* No entries in this node? Tree is corrupt. */
538 if (nodehdr.count == 0)
539 return 1;
540
541 /*
542 * if this block is out of entries, validate this
543 * block and move on to the next block.
544 * and update cursor value for said level
545 */
546 if (entry >= nodehdr.count) {
547 /*
548 * update the hash value for this level before
549 * validating it. bno value should be ok since
550 * it was set when the block was first read in.
551 */
552 cursor->level[this_level].hashval =
553 be32_to_cpu(btree[entry - 1].hashval);
554
555 /*
556 * keep track of greatest block # -- that gets
557 * us the length of the directory
558 */
559 if (cursor->level[this_level].bno > cursor->greatest_bno)
560 cursor->greatest_bno = cursor->level[this_level].bno;
561
562 /*
563 * validate the path for the current used-up block
564 * before we trash it
565 */
566 if (verify_da_path(mp, cursor, this_level, whichfork))
567 return 1;
568 /*
569 * ok, now get the next buffer and check sibling pointers
570 */
571 dabno = nodehdr.forw;
572 ASSERT(dabno != 0);
573 nex = blkmap_getn(cursor->blkmap, dabno, geo->fsbcount,
574 &bmp, &lbmp);
575 if (nex == 0) {
576 do_warn(
577 _("can't get map info for %s block %u of inode %" PRIu64 "\n"),
578 FORKNAME(whichfork), dabno, cursor->ino);
579 return 1;
580 }
581
582 bp = da_read_buf(mp, nex, bmp, &xfs_da3_node_buf_ops);
583 if (bmp != &lbmp)
584 free(bmp);
585
586 if (!bp) {
587 do_warn(
588 _("can't read %s block %u for inode %" PRIu64 "\n"),
589 FORKNAME(whichfork), dabno, cursor->ino);
590 return 1;
591 }
592
593 newnode = bp->b_addr;
594 btree = M_DIROPS(mp)->node_tree_p(newnode);
595 M_DIROPS(mp)->node_hdr_from_disk(&nodehdr, newnode);
596
597 /*
598 * verify magic number and back pointer, sanity-check
599 * entry count, verify level
600 */
601 bad = 0;
602 if (nodehdr.magic != XFS_DA_NODE_MAGIC &&
603 nodehdr.magic != XFS_DA3_NODE_MAGIC) {
604 do_warn(
605 _("bad magic number %x in %s block %u for inode %" PRIu64 "\n"),
606 nodehdr.magic, FORKNAME(whichfork),
607 dabno, cursor->ino);
608 bad++;
609 }
610 if (nodehdr.back != cursor->level[this_level].bno) {
611 do_warn(
612 _("bad back pointer in %s block %u for inode %" PRIu64 "\n"),
613 FORKNAME(whichfork), dabno, cursor->ino);
614 bad++;
615 }
616 if (nodehdr.count > geo->node_ents) {
617 do_warn(
618 _("entry count %d too large in %s block %u for inode %" PRIu64 "\n"),
619 nodehdr.count, FORKNAME(whichfork),
620 dabno, cursor->ino);
621 bad++;
622 }
623 if (nodehdr.level != this_level) {
624 do_warn(
625 _("bad level %d in %s block %u for inode %" PRIu64 "\n"),
626 nodehdr.level, FORKNAME(whichfork),
627 dabno, cursor->ino);
628 bad++;
629 }
630 if (bad) {
631 #ifdef XR_DIR_TRACE
632 fprintf(stderr, "verify_da_path returns 1 (bad) #4\n");
633 #endif
634 libxfs_putbuf(bp);
635 return 1;
636 }
637
638 /*
639 * update cursor, write out the *current* level if
640 * required. don't write out the descendant level
641 */
642 ASSERT(cursor->level[this_level].dirty == 0 ||
643 (cursor->level[this_level].dirty && !no_modify));
644
645 /*
646 * If block looks ok but CRC didn't match, make sure to
647 * recompute it.
648 */
649 if (!no_modify &&
650 cursor->level[this_level].bp->b_error == -EFSBADCRC)
651 cursor->level[this_level].dirty = 1;
652
653 if (cursor->level[this_level].dirty && !no_modify)
654 libxfs_writebuf(cursor->level[this_level].bp, 0);
655 else
656 libxfs_putbuf(cursor->level[this_level].bp);
657
658 /* switch cursor to point at the new buffer we just read */
659 cursor->level[this_level].bp = bp;
660 cursor->level[this_level].dirty = 0;
661 cursor->level[this_level].bno = dabno;
662 cursor->level[this_level].hashval =
663 be32_to_cpu(btree[0].hashval);
664
665 entry = cursor->level[this_level].index = 0;
666 }
667 /*
668 * ditto for block numbers
669 */
670 if (cursor->level[p_level].bno != be32_to_cpu(btree[entry].before)) {
671 #ifdef XR_DIR_TRACE
672 fprintf(stderr, "bad %s btree pointer, child bno "
673 "should be %d, block bno is %d, hashval is %u\n",
674 FORKNAME(whichfork), be32_to_cpu(btree[entry].before),
675 cursor->level[p_level].bno,
676 cursor->level[p_level].hashval);
677 fprintf(stderr, "verify_da_path returns 1 (bad) #1a\n");
678 #endif
679 return 1;
680 }
681 /*
682 * ok, now validate last hashvalue in the descendant
683 * block against the hashval in the current entry
684 */
685 if (cursor->level[p_level].hashval !=
686 be32_to_cpu(btree[entry].hashval)) {
687 if (!no_modify) {
688 do_warn(
689 _("correcting bad hashval in interior %s block\n"
690 "\tin (level %d) in inode %" PRIu64 ".\n"),
691 FORKNAME(whichfork), this_level, cursor->ino);
692 btree[entry].hashval = cpu_to_be32(
693 cursor->level[p_level].hashval);
694 cursor->level[this_level].dirty++;
695 } else {
696 do_warn(
697 _("would correct bad hashval in interior %s block\n"
698 "\tin (level %d) in inode %" PRIu64 ".\n"),
699 FORKNAME(whichfork), this_level, cursor->ino);
700 }
701 }
702 /*
703 * increment index for this level to point to next entry
704 * (which should point to the next descendant block)
705 */
706 cursor->level[this_level].index++;
707 #ifdef XR_DIR_TRACE
708 fprintf(stderr, "verify_da_path returns 0 (ok)\n");
709 #endif
710 return 0;
711 }