1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
9 #include <sys/statvfs.h>
12 #include "workqueue.h"
13 #include "xfs_scrub.h"
18 #include "fscounters.h"
20 #include "read_verify.h"
25 * Phase 6: Verify data file integrity.
27 * Identify potential data block extents with GETFSMAP, then feed those
28 * extents to the read-verify pool to get the verify commands batched,
29 * issued, and (if there are problems) reported back to us. If there
30 * are errors, we'll record the bad regions and (if available) use rmap
31 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
32 * whole directory tree looking for files that overlap the bad regions
33 * and report the paths of the now corrupt files.
36 /* Find the fd for a given device identifier. */
39 struct scrub_ctx
*ctx
,
42 if (dev
== ctx
->fsinfo
.fs_datadev
)
44 else if (dev
== ctx
->fsinfo
.fs_logdev
)
46 else if (dev
== ctx
->fsinfo
.fs_rtdev
)
51 /* Find the device major/minor for a given file descriptor. */
54 struct scrub_ctx
*ctx
,
57 if (disk
== ctx
->datadev
)
58 return ctx
->fsinfo
.fs_datadev
;
59 else if (disk
== ctx
->logdev
)
60 return ctx
->fsinfo
.fs_logdev
;
61 else if (disk
== ctx
->rtdev
)
62 return ctx
->fsinfo
.fs_rtdev
;
71 static const struct owner_decode special_owners
[] = {
72 {XFS_FMR_OWN_FREE
, "free space"},
73 {XFS_FMR_OWN_UNKNOWN
, "unknown owner"},
74 {XFS_FMR_OWN_FS
, "static FS metadata"},
75 {XFS_FMR_OWN_LOG
, "journalling log"},
76 {XFS_FMR_OWN_AG
, "per-AG metadata"},
77 {XFS_FMR_OWN_INOBT
, "inode btree blocks"},
78 {XFS_FMR_OWN_INODES
, "inodes"},
79 {XFS_FMR_OWN_REFC
, "refcount btree"},
80 {XFS_FMR_OWN_COW
, "CoW staging"},
81 {XFS_FMR_OWN_DEFECTIVE
, "bad blocks"},
85 /* Decode a special owner. */
87 xfs_decode_special_owner(
90 const struct owner_decode
*od
= special_owners
;
93 if (od
->owner
== owner
)
101 /* Routines to translate bad physical extents into file paths and offsets. */
103 struct xfs_verify_error_info
{
104 struct bitmap
*d_bad
; /* bytes */
105 struct bitmap
*r_bad
; /* bytes */
108 /* Report if this extent overlaps a bad region. */
110 xfs_report_verify_inode_bmap(
111 struct scrub_ctx
*ctx
,
116 struct xfs_bmap
*bmap
,
119 struct xfs_verify_error_info
*vei
= arg
;
122 /* Only report errors for real extents. */
123 if (bmap
->bm_flags
& (BMV_OF_PREALLOC
| BMV_OF_DELALLOC
))
126 if (fsx
->fsx_xflags
& FS_XFLAG_REALTIME
)
131 if (!bitmap_test(bmp
, bmap
->bm_physical
, bmap
->bm_length
))
134 str_error(ctx
, descr
,
135 _("offset %llu failed read verification."), bmap
->bm_offset
);
139 /* Iterate the extent mappings of a file to report errors. */
141 xfs_report_verify_fd(
142 struct scrub_ctx
*ctx
,
147 struct xfs_bmap key
= {0};
151 moveon
= xfs_iterate_filemaps(ctx
, descr
, fd
, XFS_DATA_FORK
, &key
,
152 xfs_report_verify_inode_bmap
, arg
);
157 moveon
= xfs_iterate_filemaps(ctx
, descr
, fd
, XFS_ATTR_FORK
, &key
,
158 xfs_report_verify_inode_bmap
, arg
);
164 /* Report read verify errors in unlinked (but still open) files. */
166 xfs_report_verify_inode(
167 struct scrub_ctx
*ctx
,
168 struct xfs_handle
*handle
,
169 struct xfs_bstat
*bstat
,
172 char descr
[DESCR_BUFSZ
];
177 snprintf(descr
, DESCR_BUFSZ
, _("inode %"PRIu64
" (unlinked)"),
178 (uint64_t)bstat
->bs_ino
);
180 /* Ignore linked files and things we can't open. */
181 if (bstat
->bs_nlink
!= 0)
183 if (!S_ISREG(bstat
->bs_mode
) && !S_ISDIR(bstat
->bs_mode
))
186 /* Try to open the inode. */
187 fd
= xfs_open_handle(handle
);
194 _("Disappeared during read error reporting."));
198 /* Go find the badness. */
199 moveon
= xfs_report_verify_fd(ctx
, descr
, fd
, arg
);
202 str_errno(ctx
, descr
);
204 return moveon
? 0 : XFS_ITERATE_INODES_ABORT
;
207 /* Scan a directory for matches in the read verify error list. */
209 xfs_report_verify_dir(
210 struct scrub_ctx
*ctx
,
215 return xfs_report_verify_fd(ctx
, path
, dir_fd
, arg
);
219 * Scan the inode associated with a directory entry for matches with
220 * the read verify error list.
223 xfs_report_verify_dirent(
224 struct scrub_ctx
*ctx
,
227 struct dirent
*dirent
,
235 /* Ignore things we can't open. */
236 if (!S_ISREG(sb
->st_mode
) && !S_ISDIR(sb
->st_mode
))
239 /* Ignore . and .. */
240 if (!strcmp(".", dirent
->d_name
) || !strcmp("..", dirent
->d_name
))
244 * If we were given a dirent, open the associated file under
245 * dir_fd for badblocks scanning. If dirent is NULL, then it's
246 * the directory itself we want to scan.
248 fd
= openat(dir_fd
, dirent
->d_name
,
249 O_RDONLY
| O_NOATIME
| O_NOFOLLOW
| O_NOCTTY
);
253 /* Go find the badness. */
254 moveon
= xfs_report_verify_fd(ctx
, path
, fd
, arg
);
261 str_errno(ctx
, path
);
265 /* Given bad extent lists for the data & rtdev, find bad files. */
267 xfs_report_verify_errors(
268 struct scrub_ctx
*ctx
,
269 struct bitmap
*d_bad
,
270 struct bitmap
*r_bad
)
272 struct xfs_verify_error_info vei
;
278 /* Scan the directory tree to get file paths. */
279 moveon
= scan_fs_tree(ctx
, xfs_report_verify_dir
,
280 xfs_report_verify_dirent
, &vei
);
284 /* Scan for unlinked files. */
285 return xfs_scan_all_inodes(ctx
, xfs_report_verify_inode
, &vei
);
288 /* Verify disk blocks with GETFSMAP */
290 struct xfs_verify_extent
{
291 struct read_verify_pool
*readverify
;
292 struct bitmap
*d_bad
; /* bytes */
293 struct bitmap
*r_bad
; /* bytes */
296 /* Report an IO error resulting from read-verify based off getfsmap. */
298 xfs_check_rmap_error_report(
299 struct scrub_ctx
*ctx
,
306 uint64_t err_physical
= *(uint64_t *)arg
;
309 if (err_physical
> map
->fmr_physical
)
310 err_off
= err_physical
- map
->fmr_physical
;
314 snprintf(buf
, 32, _("disk offset %"PRIu64
),
315 (uint64_t)BTOBB(map
->fmr_physical
+ err_off
));
317 if (map
->fmr_flags
& FMR_OF_SPECIAL_OWNER
) {
318 type
= xfs_decode_special_owner(map
->fmr_owner
);
320 _("%s failed read verification."),
325 * XXX: If we had a getparent() call we could report IO errors
326 * efficiently. Until then, we'll have to scan the dir tree
327 * to find the bad file's pathname.
334 * Remember a read error for later, and see if rmap will tell us about the
335 * owner ahead of time.
338 xfs_check_rmap_ioerr(
339 struct scrub_ctx
*ctx
,
346 struct fsmap keys
[2];
347 char descr
[DESCR_BUFSZ
];
348 struct xfs_verify_extent
*ve
= arg
;
353 dev
= xfs_disk_to_dev(ctx
, disk
);
356 * If we don't have parent pointers, save the bad extent for
359 if (dev
== ctx
->fsinfo
.fs_datadev
)
361 else if (dev
== ctx
->fsinfo
.fs_rtdev
)
366 moveon
= bitmap_set(tree
, start
, length
);
368 str_errno(ctx
, ctx
->mntpoint
);
371 snprintf(descr
, DESCR_BUFSZ
, _("dev %d:%d ioerr @ %"PRIu64
":%"PRIu64
" "),
372 major(dev
), minor(dev
), start
, length
);
374 /* Go figure out which blocks are bad from the fsmap. */
375 memset(keys
, 0, sizeof(struct fsmap
) * 2);
376 keys
->fmr_device
= dev
;
377 keys
->fmr_physical
= start
;
378 (keys
+ 1)->fmr_device
= dev
;
379 (keys
+ 1)->fmr_physical
= start
+ length
- 1;
380 (keys
+ 1)->fmr_owner
= ULLONG_MAX
;
381 (keys
+ 1)->fmr_offset
= ULLONG_MAX
;
382 (keys
+ 1)->fmr_flags
= UINT_MAX
;
383 xfs_iterate_fsmap(ctx
, descr
, keys
, xfs_check_rmap_error_report
,
387 /* Schedule a read-verify of a (data block) extent. */
390 struct scrub_ctx
*ctx
,
395 struct xfs_verify_extent
*ve
= arg
;
398 dbg_printf("rmap dev %d:%d phys %"PRIu64
" owner %"PRId64
399 " offset %"PRIu64
" len %"PRIu64
" flags 0x%x\n",
400 major(map
->fmr_device
), minor(map
->fmr_device
),
401 (uint64_t)map
->fmr_physical
, (int64_t)map
->fmr_owner
,
402 (uint64_t)map
->fmr_offset
, (uint64_t)map
->fmr_length
,
405 /* "Unknown" extents should be verified; they could be data. */
406 if ((map
->fmr_flags
& FMR_OF_SPECIAL_OWNER
) &&
407 map
->fmr_owner
== XFS_FMR_OWN_UNKNOWN
)
408 map
->fmr_flags
&= ~FMR_OF_SPECIAL_OWNER
;
411 * We only care about read-verifying data extents that have been
412 * written to disk. This means we can skip "special" owners
413 * (metadata), xattr blocks, unwritten extents, and extent maps.
414 * These should all get checked elsewhere in the scrubber.
416 if (map
->fmr_flags
& (FMR_OF_PREALLOC
| FMR_OF_ATTR_FORK
|
417 FMR_OF_EXTENT_MAP
| FMR_OF_SPECIAL_OWNER
))
420 /* XXX: Filter out directory data blocks. */
422 /* Schedule the read verify command for (eventual) running. */
423 disk
= xfs_dev_to_disk(ctx
, map
->fmr_device
);
425 read_verify_schedule_io(ve
->readverify
, disk
, map
->fmr_physical
,
426 map
->fmr_length
, ve
);
429 /* Is this the last extent? Fire off the read. */
430 if (map
->fmr_flags
& FMR_OF_LAST
)
431 read_verify_force_io(ve
->readverify
);
437 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
438 * do data checksums, we trust that the underlying storage will pass back
439 * an IO error if it can't retrieve whatever we previously stored there.
440 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
441 * scan the extent maps of the entire fs tree to figure (and the unlinked
442 * inodes) out which files are now broken.
446 struct scrub_ctx
*ctx
)
448 struct xfs_verify_extent ve
;
451 moveon
= bitmap_init(&ve
.d_bad
);
453 str_errno(ctx
, ctx
->mntpoint
);
457 moveon
= bitmap_init(&ve
.r_bad
);
459 str_errno(ctx
, ctx
->mntpoint
);
463 ve
.readverify
= read_verify_pool_init(ctx
, ctx
->geo
.blocksize
,
464 xfs_check_rmap_ioerr
, disk_heads(ctx
->datadev
),
466 if (!ve
.readverify
) {
468 str_info(ctx
, ctx
->mntpoint
,
469 _("Could not create media verifier."));
472 moveon
= xfs_scan_all_spacemaps(ctx
, xfs_check_rmap
, &ve
);
475 read_verify_pool_flush(ve
.readverify
);
476 ctx
->bytes_checked
+= read_verify_bytes(ve
.readverify
);
477 read_verify_pool_destroy(ve
.readverify
);
479 /* Scan the whole dir tree to see what matches the bad extents. */
480 if (!bitmap_empty(ve
.d_bad
) || !bitmap_empty(ve
.r_bad
))
481 moveon
= xfs_report_verify_errors(ctx
, ve
.d_bad
, ve
.r_bad
);
483 bitmap_free(&ve
.r_bad
);
484 bitmap_free(&ve
.d_bad
);
488 read_verify_pool_destroy(ve
.readverify
);
490 bitmap_free(&ve
.r_bad
);
492 bitmap_free(&ve
.d_bad
);
497 /* Estimate how much work we're going to do. */
499 xfs_estimate_verify_work(
500 struct scrub_ctx
*ctx
,
502 unsigned int *nr_threads
,
505 unsigned long long d_blocks
;
506 unsigned long long d_bfree
;
507 unsigned long long r_blocks
;
508 unsigned long long r_bfree
;
509 unsigned long long f_files
;
510 unsigned long long f_free
;
513 moveon
= xfs_scan_estimate_blocks(ctx
, &d_blocks
, &d_bfree
,
514 &r_blocks
, &r_bfree
, &f_files
, &f_free
);
518 *items
= ((d_blocks
- d_bfree
) + (r_blocks
- r_bfree
)) << ctx
->blocklog
;
519 *nr_threads
= disk_heads(ctx
->datadev
);