1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
9 #include <sys/statvfs.h>
11 #include "libfrog/paths.h"
12 #include "libfrog/workqueue.h"
13 #include "xfs_scrub.h"
15 #include "libfrog/bitmap.h"
18 #include "fscounters.h"
20 #include "read_verify.h"
25 * Phase 6: Verify data file integrity.
27 * Identify potential data block extents with GETFSMAP, then feed those
28 * extents to the read-verify pool to get the verify commands batched,
29 * issued, and (if there are problems) reported back to us. If there
30 * are errors, we'll record the bad regions and (if available) use rmap
31 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
32 * whole directory tree looking for files that overlap the bad regions
33 * and report the paths of the now corrupt files.
36 /* Verify disk blocks with GETFSMAP */
38 struct media_verify_state
{
39 struct read_verify_pool
*rvp_data
;
40 struct read_verify_pool
*rvp_log
;
41 struct read_verify_pool
*rvp_realtime
;
42 struct bitmap
*d_bad
; /* bytes */
43 struct bitmap
*r_bad
; /* bytes */
46 /* Find the fd for a given device identifier. */
47 static struct read_verify_pool
*
49 struct scrub_ctx
*ctx
,
50 struct media_verify_state
*vs
,
53 if (dev
== ctx
->fsinfo
.fs_datadev
)
55 else if (dev
== ctx
->fsinfo
.fs_logdev
)
57 else if (dev
== ctx
->fsinfo
.fs_rtdev
)
58 return vs
->rvp_realtime
;
62 /* Find the device major/minor for a given file descriptor. */
65 struct scrub_ctx
*ctx
,
68 if (disk
== ctx
->datadev
)
69 return ctx
->fsinfo
.fs_datadev
;
70 else if (disk
== ctx
->logdev
)
71 return ctx
->fsinfo
.fs_logdev
;
72 else if (disk
== ctx
->rtdev
)
73 return ctx
->fsinfo
.fs_rtdev
;
82 static const struct owner_decode special_owners
[] = {
83 {XFS_FMR_OWN_FREE
, "free space"},
84 {XFS_FMR_OWN_UNKNOWN
, "unknown owner"},
85 {XFS_FMR_OWN_FS
, "static FS metadata"},
86 {XFS_FMR_OWN_LOG
, "journalling log"},
87 {XFS_FMR_OWN_AG
, "per-AG metadata"},
88 {XFS_FMR_OWN_INOBT
, "inode btree blocks"},
89 {XFS_FMR_OWN_INODES
, "inodes"},
90 {XFS_FMR_OWN_REFC
, "refcount btree"},
91 {XFS_FMR_OWN_COW
, "CoW staging"},
92 {XFS_FMR_OWN_DEFECTIVE
, "bad blocks"},
96 /* Decode a special owner. */
98 xfs_decode_special_owner(
101 const struct owner_decode
*od
= special_owners
;
104 if (od
->owner
== owner
)
112 /* Routines to translate bad physical extents into file paths and offsets. */
114 /* Report if this extent overlaps a bad region. */
116 xfs_report_verify_inode_bmap(
117 struct scrub_ctx
*ctx
,
122 struct xfs_bmap
*bmap
,
125 struct media_verify_state
*vs
= arg
;
128 /* Only report errors for real extents. */
129 if (bmap
->bm_flags
& (BMV_OF_PREALLOC
| BMV_OF_DELALLOC
))
132 if (fsx
->fsx_xflags
& FS_XFLAG_REALTIME
)
137 if (!bitmap_test(bmp
, bmap
->bm_physical
, bmap
->bm_length
))
140 str_error(ctx
, descr
,
141 _("offset %llu failed read verification."), bmap
->bm_offset
);
145 /* Iterate the extent mappings of a file to report errors. */
147 xfs_report_verify_fd(
148 struct scrub_ctx
*ctx
,
153 struct xfs_bmap key
= {0};
157 moveon
= xfs_iterate_filemaps(ctx
, descr
, fd
, XFS_DATA_FORK
, &key
,
158 xfs_report_verify_inode_bmap
, arg
);
163 moveon
= xfs_iterate_filemaps(ctx
, descr
, fd
, XFS_ATTR_FORK
, &key
,
164 xfs_report_verify_inode_bmap
, arg
);
170 /* Report read verify errors in unlinked (but still open) files. */
172 xfs_report_verify_inode(
173 struct scrub_ctx
*ctx
,
174 struct xfs_handle
*handle
,
175 struct xfs_bulkstat
*bstat
,
178 char descr
[DESCR_BUFSZ
];
183 snprintf(descr
, DESCR_BUFSZ
, _("inode %"PRIu64
" (unlinked)"),
184 (uint64_t)bstat
->bs_ino
);
186 /* Ignore linked files and things we can't open. */
187 if (bstat
->bs_nlink
!= 0)
189 if (!S_ISREG(bstat
->bs_mode
) && !S_ISDIR(bstat
->bs_mode
))
192 /* Try to open the inode. */
193 fd
= xfs_open_handle(handle
);
200 _("Disappeared during read error reporting."));
204 /* Go find the badness. */
205 moveon
= xfs_report_verify_fd(ctx
, descr
, fd
, arg
);
208 str_errno(ctx
, descr
);
210 return moveon
? 0 : XFS_ITERATE_INODES_ABORT
;
213 /* Scan a directory for matches in the read verify error list. */
215 xfs_report_verify_dir(
216 struct scrub_ctx
*ctx
,
221 return xfs_report_verify_fd(ctx
, path
, dir_fd
, arg
);
225 * Scan the inode associated with a directory entry for matches with
226 * the read verify error list.
229 xfs_report_verify_dirent(
230 struct scrub_ctx
*ctx
,
233 struct dirent
*dirent
,
241 /* Ignore things we can't open. */
242 if (!S_ISREG(sb
->st_mode
) && !S_ISDIR(sb
->st_mode
))
245 /* Ignore . and .. */
246 if (!strcmp(".", dirent
->d_name
) || !strcmp("..", dirent
->d_name
))
250 * If we were given a dirent, open the associated file under
251 * dir_fd for badblocks scanning. If dirent is NULL, then it's
252 * the directory itself we want to scan.
254 fd
= openat(dir_fd
, dirent
->d_name
,
255 O_RDONLY
| O_NOATIME
| O_NOFOLLOW
| O_NOCTTY
);
259 /* Go find the badness. */
260 moveon
= xfs_report_verify_fd(ctx
, path
, fd
, arg
);
267 str_errno(ctx
, path
);
271 /* Given bad extent lists for the data & rtdev, find bad files. */
273 xfs_report_verify_errors(
274 struct scrub_ctx
*ctx
,
275 struct media_verify_state
*vs
)
279 /* Scan the directory tree to get file paths. */
280 moveon
= scan_fs_tree(ctx
, xfs_report_verify_dir
,
281 xfs_report_verify_dirent
, vs
);
285 /* Scan for unlinked files. */
286 return xfs_scan_all_inodes(ctx
, xfs_report_verify_inode
, vs
);
289 /* Report an IO error resulting from read-verify based off getfsmap. */
291 xfs_check_rmap_error_report(
292 struct scrub_ctx
*ctx
,
299 uint64_t err_physical
= *(uint64_t *)arg
;
302 if (err_physical
> map
->fmr_physical
)
303 err_off
= err_physical
- map
->fmr_physical
;
307 snprintf(buf
, 32, _("disk offset %"PRIu64
),
308 (uint64_t)BTOBB(map
->fmr_physical
+ err_off
));
310 if (map
->fmr_flags
& FMR_OF_SPECIAL_OWNER
) {
311 type
= xfs_decode_special_owner(map
->fmr_owner
);
313 _("%s failed read verification."),
318 * XXX: If we had a getparent() call we could report IO errors
319 * efficiently. Until then, we'll have to scan the dir tree
320 * to find the bad file's pathname.
327 * Remember a read error for later, and see if rmap will tell us about the
328 * owner ahead of time.
331 xfs_check_rmap_ioerr(
332 struct scrub_ctx
*ctx
,
339 struct fsmap keys
[2];
340 char descr
[DESCR_BUFSZ
];
341 struct media_verify_state
*vs
= arg
;
346 dev
= xfs_disk_to_dev(ctx
, disk
);
349 * If we don't have parent pointers, save the bad extent for
352 if (dev
== ctx
->fsinfo
.fs_datadev
)
354 else if (dev
== ctx
->fsinfo
.fs_rtdev
)
359 ret
= bitmap_set(tree
, start
, length
);
361 str_liberror(ctx
, ret
, _("setting bad block bitmap"));
364 snprintf(descr
, DESCR_BUFSZ
, _("dev %d:%d ioerr @ %"PRIu64
":%"PRIu64
" "),
365 major(dev
), minor(dev
), start
, length
);
367 /* Go figure out which blocks are bad from the fsmap. */
368 memset(keys
, 0, sizeof(struct fsmap
) * 2);
369 keys
->fmr_device
= dev
;
370 keys
->fmr_physical
= start
;
371 (keys
+ 1)->fmr_device
= dev
;
372 (keys
+ 1)->fmr_physical
= start
+ length
- 1;
373 (keys
+ 1)->fmr_owner
= ULLONG_MAX
;
374 (keys
+ 1)->fmr_offset
= ULLONG_MAX
;
375 (keys
+ 1)->fmr_flags
= UINT_MAX
;
376 xfs_iterate_fsmap(ctx
, descr
, keys
, xfs_check_rmap_error_report
,
380 /* Schedule a read-verify of a (data block) extent. */
383 struct scrub_ctx
*ctx
,
388 struct media_verify_state
*vs
= arg
;
389 struct read_verify_pool
*rvp
;
391 rvp
= xfs_dev_to_pool(ctx
, vs
, map
->fmr_device
);
393 dbg_printf("rmap dev %d:%d phys %"PRIu64
" owner %"PRId64
394 " offset %"PRIu64
" len %"PRIu64
" flags 0x%x\n",
395 major(map
->fmr_device
), minor(map
->fmr_device
),
396 (uint64_t)map
->fmr_physical
, (int64_t)map
->fmr_owner
,
397 (uint64_t)map
->fmr_offset
, (uint64_t)map
->fmr_length
,
400 /* "Unknown" extents should be verified; they could be data. */
401 if ((map
->fmr_flags
& FMR_OF_SPECIAL_OWNER
) &&
402 map
->fmr_owner
== XFS_FMR_OWN_UNKNOWN
)
403 map
->fmr_flags
&= ~FMR_OF_SPECIAL_OWNER
;
406 * We only care about read-verifying data extents that have been
407 * written to disk. This means we can skip "special" owners
408 * (metadata), xattr blocks, unwritten extents, and extent maps.
409 * These should all get checked elsewhere in the scrubber.
411 if (map
->fmr_flags
& (FMR_OF_PREALLOC
| FMR_OF_ATTR_FORK
|
412 FMR_OF_EXTENT_MAP
| FMR_OF_SPECIAL_OWNER
))
415 /* XXX: Filter out directory data blocks. */
417 /* Schedule the read verify command for (eventual) running. */
418 read_verify_schedule_io(rvp
, map
->fmr_physical
, map
->fmr_length
, vs
);
421 /* Is this the last extent? Fire off the read. */
422 if (map
->fmr_flags
& FMR_OF_LAST
)
423 read_verify_force_io(rvp
);
428 /* Wait for read/verify actions to finish, then return # bytes checked. */
431 struct read_verify_pool
*rvp
)
438 read_verify_pool_flush(rvp
);
439 ret
= read_verify_bytes(rvp
);
440 read_verify_pool_destroy(rvp
);
445 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
446 * do data checksums, we trust that the underlying storage will pass back
447 * an IO error if it can't retrieve whatever we previously stored there.
448 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
449 * scan the extent maps of the entire fs tree to figure (and the unlinked
450 * inodes) out which files are now broken.
454 struct scrub_ctx
*ctx
)
456 struct media_verify_state vs
= { NULL
};
460 ret
= bitmap_alloc(&vs
.d_bad
);
462 str_liberror(ctx
, ret
, _("creating datadev badblock bitmap"));
466 ret
= bitmap_alloc(&vs
.r_bad
);
468 str_liberror(ctx
, ret
, _("creating realtime badblock bitmap"));
472 vs
.rvp_data
= read_verify_pool_init(ctx
, ctx
->datadev
,
473 ctx
->mnt
.fsgeom
.blocksize
, xfs_check_rmap_ioerr
,
476 str_info(ctx
, ctx
->mntpoint
,
477 _("Could not create data device media verifier."));
481 vs
.rvp_log
= read_verify_pool_init(ctx
, ctx
->logdev
,
482 ctx
->mnt
.fsgeom
.blocksize
, xfs_check_rmap_ioerr
,
485 str_info(ctx
, ctx
->mntpoint
,
486 _("Could not create log device media verifier."));
491 vs
.rvp_realtime
= read_verify_pool_init(ctx
, ctx
->rtdev
,
492 ctx
->mnt
.fsgeom
.blocksize
, xfs_check_rmap_ioerr
,
494 if (!vs
.rvp_realtime
) {
495 str_info(ctx
, ctx
->mntpoint
,
496 _("Could not create realtime device media verifier."));
500 moveon
= xfs_scan_all_spacemaps(ctx
, xfs_check_rmap
, &vs
);
503 ctx
->bytes_checked
+= clean_pool(vs
.rvp_data
);
504 ctx
->bytes_checked
+= clean_pool(vs
.rvp_log
);
505 ctx
->bytes_checked
+= clean_pool(vs
.rvp_realtime
);
507 /* Scan the whole dir tree to see what matches the bad extents. */
508 if (!bitmap_empty(vs
.d_bad
) || !bitmap_empty(vs
.r_bad
))
509 moveon
= xfs_report_verify_errors(ctx
, &vs
);
511 bitmap_free(&vs
.r_bad
);
512 bitmap_free(&vs
.d_bad
);
516 if (vs
.rvp_realtime
) {
517 read_verify_pool_abort(vs
.rvp_realtime
);
518 read_verify_pool_destroy(vs
.rvp_realtime
);
522 read_verify_pool_abort(vs
.rvp_log
);
523 read_verify_pool_destroy(vs
.rvp_log
);
526 read_verify_pool_abort(vs
.rvp_data
);
527 read_verify_pool_destroy(vs
.rvp_data
);
529 bitmap_free(&vs
.r_bad
);
531 bitmap_free(&vs
.d_bad
);
536 /* Estimate how much work we're going to do. */
538 xfs_estimate_verify_work(
539 struct scrub_ctx
*ctx
,
541 unsigned int *nr_threads
,
544 unsigned long long d_blocks
;
545 unsigned long long d_bfree
;
546 unsigned long long r_blocks
;
547 unsigned long long r_bfree
;
548 unsigned long long f_files
;
549 unsigned long long f_free
;
552 moveon
= xfs_scan_estimate_blocks(ctx
, &d_blocks
, &d_bfree
,
553 &r_blocks
, &r_bfree
, &f_files
, &f_free
);
557 *items
= cvt_off_fsb_to_b(&ctx
->mnt
,
558 (d_blocks
- d_bfree
) + (r_blocks
- r_bfree
));
559 *nr_threads
= disk_heads(ctx
->datadev
);