2 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
23 #include <sys/statvfs.h>
27 #include "workqueue.h"
28 #include "xfs_scrub.h"
33 #include "fscounters.h"
35 #include "read_verify.h"
40 * Phase 6: Verify data file integrity.
42 * Identify potential data block extents with GETFSMAP, then feed those
43 * extents to the read-verify pool to get the verify commands batched,
44 * issued, and (if there are problems) reported back to us. If there
45 * are errors, we'll record the bad regions and (if available) use rmap
46 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
47 * whole directory tree looking for files that overlap the bad regions
48 * and report the paths of the now corrupt files.
51 /* Find the fd for a given device identifier. */
54 struct scrub_ctx
*ctx
,
57 if (dev
== ctx
->fsinfo
.fs_datadev
)
59 else if (dev
== ctx
->fsinfo
.fs_logdev
)
61 else if (dev
== ctx
->fsinfo
.fs_rtdev
)
66 /* Find the device major/minor for a given file descriptor. */
69 struct scrub_ctx
*ctx
,
72 if (disk
== ctx
->datadev
)
73 return ctx
->fsinfo
.fs_datadev
;
74 else if (disk
== ctx
->logdev
)
75 return ctx
->fsinfo
.fs_logdev
;
76 else if (disk
== ctx
->rtdev
)
77 return ctx
->fsinfo
.fs_rtdev
;
86 static const struct owner_decode special_owners
[] = {
87 {XFS_FMR_OWN_FREE
, "free space"},
88 {XFS_FMR_OWN_UNKNOWN
, "unknown owner"},
89 {XFS_FMR_OWN_FS
, "static FS metadata"},
90 {XFS_FMR_OWN_LOG
, "journalling log"},
91 {XFS_FMR_OWN_AG
, "per-AG metadata"},
92 {XFS_FMR_OWN_INOBT
, "inode btree blocks"},
93 {XFS_FMR_OWN_INODES
, "inodes"},
94 {XFS_FMR_OWN_REFC
, "refcount btree"},
95 {XFS_FMR_OWN_COW
, "CoW staging"},
96 {XFS_FMR_OWN_DEFECTIVE
, "bad blocks"},
100 /* Decode a special owner. */
102 xfs_decode_special_owner(
105 const struct owner_decode
*od
= special_owners
;
108 if (od
->owner
== owner
)
116 /* Routines to translate bad physical extents into file paths and offsets. */
118 struct xfs_verify_error_info
{
119 struct bitmap
*d_bad
; /* bytes */
120 struct bitmap
*r_bad
; /* bytes */
123 /* Report if this extent overlaps a bad region. */
125 xfs_report_verify_inode_bmap(
126 struct scrub_ctx
*ctx
,
131 struct xfs_bmap
*bmap
,
134 struct xfs_verify_error_info
*vei
= arg
;
137 /* Only report errors for real extents. */
138 if (bmap
->bm_flags
& (BMV_OF_PREALLOC
| BMV_OF_DELALLOC
))
141 if (fsx
->fsx_xflags
& FS_XFLAG_REALTIME
)
146 if (!bitmap_test(bmp
, bmap
->bm_physical
, bmap
->bm_length
))
149 str_error(ctx
, descr
,
150 _("offset %llu failed read verification."), bmap
->bm_offset
);
154 /* Iterate the extent mappings of a file to report errors. */
156 xfs_report_verify_fd(
157 struct scrub_ctx
*ctx
,
162 struct xfs_bmap key
= {0};
166 moveon
= xfs_iterate_filemaps(ctx
, descr
, fd
, XFS_DATA_FORK
, &key
,
167 xfs_report_verify_inode_bmap
, arg
);
172 moveon
= xfs_iterate_filemaps(ctx
, descr
, fd
, XFS_ATTR_FORK
, &key
,
173 xfs_report_verify_inode_bmap
, arg
);
179 /* Report read verify errors in unlinked (but still open) files. */
181 xfs_report_verify_inode(
182 struct scrub_ctx
*ctx
,
183 struct xfs_handle
*handle
,
184 struct xfs_bstat
*bstat
,
187 char descr
[DESCR_BUFSZ
];
192 snprintf(descr
, DESCR_BUFSZ
, _("inode %"PRIu64
" (unlinked)"),
193 (uint64_t)bstat
->bs_ino
);
195 /* Ignore linked files and things we can't open. */
196 if (bstat
->bs_nlink
!= 0)
198 if (!S_ISREG(bstat
->bs_mode
) && !S_ISDIR(bstat
->bs_mode
))
201 /* Try to open the inode. */
202 fd
= xfs_open_handle(handle
);
209 _("Disappeared during read error reporting."));
213 /* Go find the badness. */
214 moveon
= xfs_report_verify_fd(ctx
, descr
, fd
, arg
);
217 str_errno(ctx
, descr
);
219 return moveon
? 0 : XFS_ITERATE_INODES_ABORT
;
222 /* Scan a directory for matches in the read verify error list. */
224 xfs_report_verify_dir(
225 struct scrub_ctx
*ctx
,
230 return xfs_report_verify_fd(ctx
, path
, dir_fd
, arg
);
234 * Scan the inode associated with a directory entry for matches with
235 * the read verify error list.
238 xfs_report_verify_dirent(
239 struct scrub_ctx
*ctx
,
242 struct dirent
*dirent
,
250 /* Ignore things we can't open. */
251 if (!S_ISREG(sb
->st_mode
) && !S_ISDIR(sb
->st_mode
))
254 /* Ignore . and .. */
255 if (!strcmp(".", dirent
->d_name
) || !strcmp("..", dirent
->d_name
))
259 * If we were given a dirent, open the associated file under
260 * dir_fd for badblocks scanning. If dirent is NULL, then it's
261 * the directory itself we want to scan.
263 fd
= openat(dir_fd
, dirent
->d_name
,
264 O_RDONLY
| O_NOATIME
| O_NOFOLLOW
| O_NOCTTY
);
268 /* Go find the badness. */
269 moveon
= xfs_report_verify_fd(ctx
, path
, fd
, arg
);
276 str_errno(ctx
, path
);
280 /* Given bad extent lists for the data & rtdev, find bad files. */
282 xfs_report_verify_errors(
283 struct scrub_ctx
*ctx
,
284 struct bitmap
*d_bad
,
285 struct bitmap
*r_bad
)
287 struct xfs_verify_error_info vei
;
293 /* Scan the directory tree to get file paths. */
294 moveon
= scan_fs_tree(ctx
, xfs_report_verify_dir
,
295 xfs_report_verify_dirent
, &vei
);
299 /* Scan for unlinked files. */
300 return xfs_scan_all_inodes(ctx
, xfs_report_verify_inode
, &vei
);
303 /* Verify disk blocks with GETFSMAP */
305 struct xfs_verify_extent
{
306 struct read_verify_pool
*readverify
;
307 struct ptvar
*rvstate
;
308 struct bitmap
*d_bad
; /* bytes */
309 struct bitmap
*r_bad
; /* bytes */
312 /* Report an IO error resulting from read-verify based off getfsmap. */
314 xfs_check_rmap_error_report(
315 struct scrub_ctx
*ctx
,
322 uint64_t err_physical
= *(uint64_t *)arg
;
325 if (err_physical
> map
->fmr_physical
)
326 err_off
= err_physical
- map
->fmr_physical
;
330 snprintf(buf
, 32, _("disk offset %"PRIu64
),
331 (uint64_t)BTOBB(map
->fmr_physical
+ err_off
));
333 if (map
->fmr_flags
& FMR_OF_SPECIAL_OWNER
) {
334 type
= xfs_decode_special_owner(map
->fmr_owner
);
336 _("%s failed read verification."),
341 * XXX: If we had a getparent() call we could report IO errors
342 * efficiently. Until then, we'll have to scan the dir tree
343 * to find the bad file's pathname.
350 * Remember a read error for later, and see if rmap will tell us about the
351 * owner ahead of time.
354 xfs_check_rmap_ioerr(
355 struct scrub_ctx
*ctx
,
362 struct fsmap keys
[2];
363 char descr
[DESCR_BUFSZ
];
364 struct xfs_verify_extent
*ve
= arg
;
369 dev
= xfs_disk_to_dev(ctx
, disk
);
372 * If we don't have parent pointers, save the bad extent for
375 if (dev
== ctx
->fsinfo
.fs_datadev
)
377 else if (dev
== ctx
->fsinfo
.fs_rtdev
)
382 moveon
= bitmap_set(tree
, start
, length
);
384 str_errno(ctx
, ctx
->mntpoint
);
387 snprintf(descr
, DESCR_BUFSZ
, _("dev %d:%d ioerr @ %"PRIu64
":%"PRIu64
" "),
388 major(dev
), minor(dev
), start
, length
);
390 /* Go figure out which blocks are bad from the fsmap. */
391 memset(keys
, 0, sizeof(struct fsmap
) * 2);
392 keys
->fmr_device
= dev
;
393 keys
->fmr_physical
= start
;
394 (keys
+ 1)->fmr_device
= dev
;
395 (keys
+ 1)->fmr_physical
= start
+ length
- 1;
396 (keys
+ 1)->fmr_owner
= ULLONG_MAX
;
397 (keys
+ 1)->fmr_offset
= ULLONG_MAX
;
398 (keys
+ 1)->fmr_flags
= UINT_MAX
;
399 xfs_iterate_fsmap(ctx
, descr
, keys
, xfs_check_rmap_error_report
,
403 /* Schedule a read-verify of a (data block) extent. */
406 struct scrub_ctx
*ctx
,
411 struct xfs_verify_extent
*ve
= arg
;
414 dbg_printf("rmap dev %d:%d phys %"PRIu64
" owner %"PRId64
415 " offset %"PRIu64
" len %"PRIu64
" flags 0x%x\n",
416 major(map
->fmr_device
), minor(map
->fmr_device
),
417 (uint64_t)map
->fmr_physical
, (int64_t)map
->fmr_owner
,
418 (uint64_t)map
->fmr_offset
, (uint64_t)map
->fmr_length
,
421 /* "Unknown" extents should be verified; they could be data. */
422 if ((map
->fmr_flags
& FMR_OF_SPECIAL_OWNER
) &&
423 map
->fmr_owner
== XFS_FMR_OWN_UNKNOWN
)
424 map
->fmr_flags
&= ~FMR_OF_SPECIAL_OWNER
;
427 * We only care about read-verifying data extents that have been
428 * written to disk. This means we can skip "special" owners
429 * (metadata), xattr blocks, unwritten extents, and extent maps.
430 * These should all get checked elsewhere in the scrubber.
432 if (map
->fmr_flags
& (FMR_OF_PREALLOC
| FMR_OF_ATTR_FORK
|
433 FMR_OF_EXTENT_MAP
| FMR_OF_SPECIAL_OWNER
))
436 /* XXX: Filter out directory data blocks. */
438 /* Schedule the read verify command for (eventual) running. */
439 disk
= xfs_dev_to_disk(ctx
, map
->fmr_device
);
441 read_verify_schedule_io(ve
->readverify
, ptvar_get(ve
->rvstate
), disk
,
442 map
->fmr_physical
, map
->fmr_length
, ve
);
445 /* Is this the last extent? Fire off the read. */
446 if (map
->fmr_flags
& FMR_OF_LAST
)
447 read_verify_force_io(ve
->readverify
, ptvar_get(ve
->rvstate
));
453 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
454 * do data checksums, we trust that the underlying storage will pass back
455 * an IO error if it can't retrieve whatever we previously stored there.
456 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
457 * scan the extent maps of the entire fs tree to figure (and the unlinked
458 * inodes) out which files are now broken.
462 struct scrub_ctx
*ctx
)
464 struct xfs_verify_extent ve
;
467 ve
.rvstate
= ptvar_init(scrub_nproc(ctx
), sizeof(struct read_verify
));
469 str_errno(ctx
, ctx
->mntpoint
);
473 moveon
= bitmap_init(&ve
.d_bad
);
475 str_errno(ctx
, ctx
->mntpoint
);
479 moveon
= bitmap_init(&ve
.r_bad
);
481 str_errno(ctx
, ctx
->mntpoint
);
485 ve
.readverify
= read_verify_pool_init(ctx
, ctx
->geo
.blocksize
,
486 xfs_check_rmap_ioerr
, disk_heads(ctx
->datadev
));
487 if (!ve
.readverify
) {
489 str_info(ctx
, ctx
->mntpoint
,
490 _("Could not create media verifier."));
493 moveon
= xfs_scan_all_spacemaps(ctx
, xfs_check_rmap
, &ve
);
496 read_verify_pool_flush(ve
.readverify
);
497 ctx
->bytes_checked
+= read_verify_bytes(ve
.readverify
);
498 read_verify_pool_destroy(ve
.readverify
);
500 /* Scan the whole dir tree to see what matches the bad extents. */
501 if (!bitmap_empty(ve
.d_bad
) || !bitmap_empty(ve
.r_bad
))
502 moveon
= xfs_report_verify_errors(ctx
, ve
.d_bad
, ve
.r_bad
);
504 bitmap_free(&ve
.r_bad
);
505 bitmap_free(&ve
.d_bad
);
506 ptvar_free(ve
.rvstate
);
510 read_verify_pool_destroy(ve
.readverify
);
512 bitmap_free(&ve
.r_bad
);
514 bitmap_free(&ve
.d_bad
);
516 ptvar_free(ve
.rvstate
);
520 /* Estimate how much work we're going to do. */
522 xfs_estimate_verify_work(
523 struct scrub_ctx
*ctx
,
525 unsigned int *nr_threads
,
528 unsigned long long d_blocks
;
529 unsigned long long d_bfree
;
530 unsigned long long r_blocks
;
531 unsigned long long r_bfree
;
532 unsigned long long f_files
;
533 unsigned long long f_free
;
536 moveon
= xfs_scan_estimate_blocks(ctx
, &d_blocks
, &d_bfree
,
537 &r_blocks
, &r_bfree
, &f_files
, &f_free
);
541 *items
= ((d_blocks
- d_bfree
) + (r_blocks
- r_bfree
)) << ctx
->blocklog
;
542 *nr_threads
= disk_heads(ctx
->datadev
);