1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
9 #include <sys/statvfs.h>
13 #include "workqueue.h"
14 #include "xfs_scrub.h"
19 #include "fscounters.h"
21 #include "read_verify.h"
26 * Phase 6: Verify data file integrity.
28 * Identify potential data block extents with GETFSMAP, then feed those
29 * extents to the read-verify pool to get the verify commands batched,
30 * issued, and (if there are problems) reported back to us. If there
31 * are errors, we'll record the bad regions and (if available) use rmap
32 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
33 * whole directory tree looking for files that overlap the bad regions
34 * and report the paths of the now corrupt files.
37 /* Find the fd for a given device identifier. */
40 struct scrub_ctx
*ctx
,
43 if (dev
== ctx
->fsinfo
.fs_datadev
)
45 else if (dev
== ctx
->fsinfo
.fs_logdev
)
47 else if (dev
== ctx
->fsinfo
.fs_rtdev
)
52 /* Find the device major/minor for a given file descriptor. */
55 struct scrub_ctx
*ctx
,
58 if (disk
== ctx
->datadev
)
59 return ctx
->fsinfo
.fs_datadev
;
60 else if (disk
== ctx
->logdev
)
61 return ctx
->fsinfo
.fs_logdev
;
62 else if (disk
== ctx
->rtdev
)
63 return ctx
->fsinfo
.fs_rtdev
;
72 static const struct owner_decode special_owners
[] = {
73 {XFS_FMR_OWN_FREE
, "free space"},
74 {XFS_FMR_OWN_UNKNOWN
, "unknown owner"},
75 {XFS_FMR_OWN_FS
, "static FS metadata"},
76 {XFS_FMR_OWN_LOG
, "journalling log"},
77 {XFS_FMR_OWN_AG
, "per-AG metadata"},
78 {XFS_FMR_OWN_INOBT
, "inode btree blocks"},
79 {XFS_FMR_OWN_INODES
, "inodes"},
80 {XFS_FMR_OWN_REFC
, "refcount btree"},
81 {XFS_FMR_OWN_COW
, "CoW staging"},
82 {XFS_FMR_OWN_DEFECTIVE
, "bad blocks"},
86 /* Decode a special owner. */
88 xfs_decode_special_owner(
91 const struct owner_decode
*od
= special_owners
;
94 if (od
->owner
== owner
)
102 /* Routines to translate bad physical extents into file paths and offsets. */
104 struct xfs_verify_error_info
{
105 struct bitmap
*d_bad
; /* bytes */
106 struct bitmap
*r_bad
; /* bytes */
109 /* Report if this extent overlaps a bad region. */
111 xfs_report_verify_inode_bmap(
112 struct scrub_ctx
*ctx
,
117 struct xfs_bmap
*bmap
,
120 struct xfs_verify_error_info
*vei
= arg
;
123 /* Only report errors for real extents. */
124 if (bmap
->bm_flags
& (BMV_OF_PREALLOC
| BMV_OF_DELALLOC
))
127 if (fsx
->fsx_xflags
& FS_XFLAG_REALTIME
)
132 if (!bitmap_test(bmp
, bmap
->bm_physical
, bmap
->bm_length
))
135 str_error(ctx
, descr
,
136 _("offset %llu failed read verification."), bmap
->bm_offset
);
140 /* Iterate the extent mappings of a file to report errors. */
142 xfs_report_verify_fd(
143 struct scrub_ctx
*ctx
,
148 struct xfs_bmap key
= {0};
152 moveon
= xfs_iterate_filemaps(ctx
, descr
, fd
, XFS_DATA_FORK
, &key
,
153 xfs_report_verify_inode_bmap
, arg
);
158 moveon
= xfs_iterate_filemaps(ctx
, descr
, fd
, XFS_ATTR_FORK
, &key
,
159 xfs_report_verify_inode_bmap
, arg
);
165 /* Report read verify errors in unlinked (but still open) files. */
167 xfs_report_verify_inode(
168 struct scrub_ctx
*ctx
,
169 struct xfs_handle
*handle
,
170 struct xfs_bstat
*bstat
,
173 char descr
[DESCR_BUFSZ
];
178 snprintf(descr
, DESCR_BUFSZ
, _("inode %"PRIu64
" (unlinked)"),
179 (uint64_t)bstat
->bs_ino
);
181 /* Ignore linked files and things we can't open. */
182 if (bstat
->bs_nlink
!= 0)
184 if (!S_ISREG(bstat
->bs_mode
) && !S_ISDIR(bstat
->bs_mode
))
187 /* Try to open the inode. */
188 fd
= xfs_open_handle(handle
);
195 _("Disappeared during read error reporting."));
199 /* Go find the badness. */
200 moveon
= xfs_report_verify_fd(ctx
, descr
, fd
, arg
);
203 str_errno(ctx
, descr
);
205 return moveon
? 0 : XFS_ITERATE_INODES_ABORT
;
208 /* Scan a directory for matches in the read verify error list. */
210 xfs_report_verify_dir(
211 struct scrub_ctx
*ctx
,
216 return xfs_report_verify_fd(ctx
, path
, dir_fd
, arg
);
220 * Scan the inode associated with a directory entry for matches with
221 * the read verify error list.
224 xfs_report_verify_dirent(
225 struct scrub_ctx
*ctx
,
228 struct dirent
*dirent
,
236 /* Ignore things we can't open. */
237 if (!S_ISREG(sb
->st_mode
) && !S_ISDIR(sb
->st_mode
))
240 /* Ignore . and .. */
241 if (!strcmp(".", dirent
->d_name
) || !strcmp("..", dirent
->d_name
))
245 * If we were given a dirent, open the associated file under
246 * dir_fd for badblocks scanning. If dirent is NULL, then it's
247 * the directory itself we want to scan.
249 fd
= openat(dir_fd
, dirent
->d_name
,
250 O_RDONLY
| O_NOATIME
| O_NOFOLLOW
| O_NOCTTY
);
254 /* Go find the badness. */
255 moveon
= xfs_report_verify_fd(ctx
, path
, fd
, arg
);
262 str_errno(ctx
, path
);
266 /* Given bad extent lists for the data & rtdev, find bad files. */
268 xfs_report_verify_errors(
269 struct scrub_ctx
*ctx
,
270 struct bitmap
*d_bad
,
271 struct bitmap
*r_bad
)
273 struct xfs_verify_error_info vei
;
279 /* Scan the directory tree to get file paths. */
280 moveon
= scan_fs_tree(ctx
, xfs_report_verify_dir
,
281 xfs_report_verify_dirent
, &vei
);
285 /* Scan for unlinked files. */
286 return xfs_scan_all_inodes(ctx
, xfs_report_verify_inode
, &vei
);
289 /* Verify disk blocks with GETFSMAP */
291 struct xfs_verify_extent
{
292 struct read_verify_pool
*readverify
;
293 struct ptvar
*rvstate
;
294 struct bitmap
*d_bad
; /* bytes */
295 struct bitmap
*r_bad
; /* bytes */
298 /* Report an IO error resulting from read-verify based off getfsmap. */
300 xfs_check_rmap_error_report(
301 struct scrub_ctx
*ctx
,
308 uint64_t err_physical
= *(uint64_t *)arg
;
311 if (err_physical
> map
->fmr_physical
)
312 err_off
= err_physical
- map
->fmr_physical
;
316 snprintf(buf
, 32, _("disk offset %"PRIu64
),
317 (uint64_t)BTOBB(map
->fmr_physical
+ err_off
));
319 if (map
->fmr_flags
& FMR_OF_SPECIAL_OWNER
) {
320 type
= xfs_decode_special_owner(map
->fmr_owner
);
322 _("%s failed read verification."),
327 * XXX: If we had a getparent() call we could report IO errors
328 * efficiently. Until then, we'll have to scan the dir tree
329 * to find the bad file's pathname.
336 * Remember a read error for later, and see if rmap will tell us about the
337 * owner ahead of time.
340 xfs_check_rmap_ioerr(
341 struct scrub_ctx
*ctx
,
348 struct fsmap keys
[2];
349 char descr
[DESCR_BUFSZ
];
350 struct xfs_verify_extent
*ve
= arg
;
355 dev
= xfs_disk_to_dev(ctx
, disk
);
358 * If we don't have parent pointers, save the bad extent for
361 if (dev
== ctx
->fsinfo
.fs_datadev
)
363 else if (dev
== ctx
->fsinfo
.fs_rtdev
)
368 moveon
= bitmap_set(tree
, start
, length
);
370 str_errno(ctx
, ctx
->mntpoint
);
373 snprintf(descr
, DESCR_BUFSZ
, _("dev %d:%d ioerr @ %"PRIu64
":%"PRIu64
" "),
374 major(dev
), minor(dev
), start
, length
);
376 /* Go figure out which blocks are bad from the fsmap. */
377 memset(keys
, 0, sizeof(struct fsmap
) * 2);
378 keys
->fmr_device
= dev
;
379 keys
->fmr_physical
= start
;
380 (keys
+ 1)->fmr_device
= dev
;
381 (keys
+ 1)->fmr_physical
= start
+ length
- 1;
382 (keys
+ 1)->fmr_owner
= ULLONG_MAX
;
383 (keys
+ 1)->fmr_offset
= ULLONG_MAX
;
384 (keys
+ 1)->fmr_flags
= UINT_MAX
;
385 xfs_iterate_fsmap(ctx
, descr
, keys
, xfs_check_rmap_error_report
,
389 /* Schedule a read-verify of a (data block) extent. */
392 struct scrub_ctx
*ctx
,
397 struct xfs_verify_extent
*ve
= arg
;
400 dbg_printf("rmap dev %d:%d phys %"PRIu64
" owner %"PRId64
401 " offset %"PRIu64
" len %"PRIu64
" flags 0x%x\n",
402 major(map
->fmr_device
), minor(map
->fmr_device
),
403 (uint64_t)map
->fmr_physical
, (int64_t)map
->fmr_owner
,
404 (uint64_t)map
->fmr_offset
, (uint64_t)map
->fmr_length
,
407 /* "Unknown" extents should be verified; they could be data. */
408 if ((map
->fmr_flags
& FMR_OF_SPECIAL_OWNER
) &&
409 map
->fmr_owner
== XFS_FMR_OWN_UNKNOWN
)
410 map
->fmr_flags
&= ~FMR_OF_SPECIAL_OWNER
;
413 * We only care about read-verifying data extents that have been
414 * written to disk. This means we can skip "special" owners
415 * (metadata), xattr blocks, unwritten extents, and extent maps.
416 * These should all get checked elsewhere in the scrubber.
418 if (map
->fmr_flags
& (FMR_OF_PREALLOC
| FMR_OF_ATTR_FORK
|
419 FMR_OF_EXTENT_MAP
| FMR_OF_SPECIAL_OWNER
))
422 /* XXX: Filter out directory data blocks. */
424 /* Schedule the read verify command for (eventual) running. */
425 disk
= xfs_dev_to_disk(ctx
, map
->fmr_device
);
427 read_verify_schedule_io(ve
->readverify
, ptvar_get(ve
->rvstate
), disk
,
428 map
->fmr_physical
, map
->fmr_length
, ve
);
431 /* Is this the last extent? Fire off the read. */
432 if (map
->fmr_flags
& FMR_OF_LAST
)
433 read_verify_force_io(ve
->readverify
, ptvar_get(ve
->rvstate
));
439 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
440 * do data checksums, we trust that the underlying storage will pass back
441 * an IO error if it can't retrieve whatever we previously stored there.
442 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
443 * scan the extent maps of the entire fs tree to figure (and the unlinked
444 * inodes) out which files are now broken.
448 struct scrub_ctx
*ctx
)
450 struct xfs_verify_extent ve
;
453 ve
.rvstate
= ptvar_init(scrub_nproc(ctx
), sizeof(struct read_verify
));
455 str_errno(ctx
, ctx
->mntpoint
);
459 moveon
= bitmap_init(&ve
.d_bad
);
461 str_errno(ctx
, ctx
->mntpoint
);
465 moveon
= bitmap_init(&ve
.r_bad
);
467 str_errno(ctx
, ctx
->mntpoint
);
471 ve
.readverify
= read_verify_pool_init(ctx
, ctx
->geo
.blocksize
,
472 xfs_check_rmap_ioerr
, disk_heads(ctx
->datadev
));
473 if (!ve
.readverify
) {
475 str_info(ctx
, ctx
->mntpoint
,
476 _("Could not create media verifier."));
479 moveon
= xfs_scan_all_spacemaps(ctx
, xfs_check_rmap
, &ve
);
482 read_verify_pool_flush(ve
.readverify
);
483 ctx
->bytes_checked
+= read_verify_bytes(ve
.readverify
);
484 read_verify_pool_destroy(ve
.readverify
);
486 /* Scan the whole dir tree to see what matches the bad extents. */
487 if (!bitmap_empty(ve
.d_bad
) || !bitmap_empty(ve
.r_bad
))
488 moveon
= xfs_report_verify_errors(ctx
, ve
.d_bad
, ve
.r_bad
);
490 bitmap_free(&ve
.r_bad
);
491 bitmap_free(&ve
.d_bad
);
492 ptvar_free(ve
.rvstate
);
496 read_verify_pool_destroy(ve
.readverify
);
498 bitmap_free(&ve
.r_bad
);
500 bitmap_free(&ve
.d_bad
);
502 ptvar_free(ve
.rvstate
);
506 /* Estimate how much work we're going to do. */
508 xfs_estimate_verify_work(
509 struct scrub_ctx
*ctx
,
511 unsigned int *nr_threads
,
514 unsigned long long d_blocks
;
515 unsigned long long d_bfree
;
516 unsigned long long r_blocks
;
517 unsigned long long r_bfree
;
518 unsigned long long f_files
;
519 unsigned long long f_free
;
522 moveon
= xfs_scan_estimate_blocks(ctx
, &d_blocks
, &d_bfree
,
523 &r_blocks
, &r_bfree
, &f_files
, &f_free
);
527 *items
= ((d_blocks
- d_bfree
) + (r_blocks
- r_bfree
)) << ctx
->blocklog
;
528 *nr_threads
= disk_heads(ctx
->datadev
);