2 * Copyright (C) 2018 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
24 #include <sys/statvfs.h>
30 #include "workqueue.h"
31 #include "xfs_scrub.h"
36 #include "fscounters.h"
38 #include "read_verify.h"
43 * Phase 6: Verify data file integrity.
45 * Identify potential data block extents with GETFSMAP, then feed those
46 * extents to the read-verify pool to get the verify commands batched,
47 * issued, and (if there are problems) reported back to us. If there
48 * are errors, we'll record the bad regions and (if available) use rmap
49 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
50 * whole directory tree looking for files that overlap the bad regions
51 * and report the paths of the now corrupt files.
54 /* Find the fd for a given device identifier. */
57 struct scrub_ctx
*ctx
,
60 if (dev
== ctx
->fsinfo
.fs_datadev
)
62 else if (dev
== ctx
->fsinfo
.fs_logdev
)
64 else if (dev
== ctx
->fsinfo
.fs_rtdev
)
69 /* Find the device major/minor for a given file descriptor. */
72 struct scrub_ctx
*ctx
,
75 if (disk
== ctx
->datadev
)
76 return ctx
->fsinfo
.fs_datadev
;
77 else if (disk
== ctx
->logdev
)
78 return ctx
->fsinfo
.fs_logdev
;
79 else if (disk
== ctx
->rtdev
)
80 return ctx
->fsinfo
.fs_rtdev
;
89 static const struct owner_decode special_owners
[] = {
90 {XFS_FMR_OWN_FREE
, "free space"},
91 {XFS_FMR_OWN_UNKNOWN
, "unknown owner"},
92 {XFS_FMR_OWN_FS
, "static FS metadata"},
93 {XFS_FMR_OWN_LOG
, "journalling log"},
94 {XFS_FMR_OWN_AG
, "per-AG metadata"},
95 {XFS_FMR_OWN_INOBT
, "inode btree blocks"},
96 {XFS_FMR_OWN_INODES
, "inodes"},
97 {XFS_FMR_OWN_REFC
, "refcount btree"},
98 {XFS_FMR_OWN_COW
, "CoW staging"},
99 {XFS_FMR_OWN_DEFECTIVE
, "bad blocks"},
103 /* Decode a special owner. */
105 xfs_decode_special_owner(
108 const struct owner_decode
*od
= special_owners
;
111 if (od
->owner
== owner
)
119 /* Routines to translate bad physical extents into file paths and offsets. */
121 struct xfs_verify_error_info
{
122 struct bitmap
*d_bad
; /* bytes */
123 struct bitmap
*r_bad
; /* bytes */
126 /* Report if this extent overlaps a bad region. */
128 xfs_report_verify_inode_bmap(
129 struct scrub_ctx
*ctx
,
134 struct xfs_bmap
*bmap
,
137 struct xfs_verify_error_info
*vei
= arg
;
140 /* Only report errors for real extents. */
141 if (bmap
->bm_flags
& (BMV_OF_PREALLOC
| BMV_OF_DELALLOC
))
144 if (fsx
->fsx_xflags
& FS_XFLAG_REALTIME
)
149 if (!bitmap_test(bmp
, bmap
->bm_physical
, bmap
->bm_length
))
152 str_error(ctx
, descr
,
153 _("offset %llu failed read verification."), bmap
->bm_offset
);
157 /* Iterate the extent mappings of a file to report errors. */
159 xfs_report_verify_fd(
160 struct scrub_ctx
*ctx
,
165 struct xfs_bmap key
= {0};
169 moveon
= xfs_iterate_filemaps(ctx
, descr
, fd
, XFS_DATA_FORK
, &key
,
170 xfs_report_verify_inode_bmap
, arg
);
175 moveon
= xfs_iterate_filemaps(ctx
, descr
, fd
, XFS_ATTR_FORK
, &key
,
176 xfs_report_verify_inode_bmap
, arg
);
182 /* Report read verify errors in unlinked (but still open) files. */
184 xfs_report_verify_inode(
185 struct scrub_ctx
*ctx
,
186 struct xfs_handle
*handle
,
187 struct xfs_bstat
*bstat
,
190 char descr
[DESCR_BUFSZ
];
191 char buf
[DESCR_BUFSZ
];
196 snprintf(descr
, DESCR_BUFSZ
, _("inode %"PRIu64
" (unlinked)"),
197 (uint64_t)bstat
->bs_ino
);
199 /* Ignore linked files and things we can't open. */
200 if (bstat
->bs_nlink
!= 0)
202 if (!S_ISREG(bstat
->bs_mode
) && !S_ISDIR(bstat
->bs_mode
))
205 /* Try to open the inode. */
206 fd
= xfs_open_handle(handle
);
212 str_warn(ctx
, descr
, "%s", strerror_r(error
, buf
, DESCR_BUFSZ
));
216 /* Go find the badness. */
217 moveon
= xfs_report_verify_fd(ctx
, descr
, fd
, arg
);
220 return moveon
? 0 : XFS_ITERATE_INODES_ABORT
;
223 /* Scan a directory for matches in the read verify error list. */
225 xfs_report_verify_dir(
226 struct scrub_ctx
*ctx
,
231 return xfs_report_verify_fd(ctx
, path
, dir_fd
, arg
);
235 * Scan the inode associated with a directory entry for matches with
236 * the read verify error list.
239 xfs_report_verify_dirent(
240 struct scrub_ctx
*ctx
,
243 struct dirent
*dirent
,
250 /* Ignore things we can't open. */
251 if (!S_ISREG(sb
->st_mode
) && !S_ISDIR(sb
->st_mode
))
254 /* Ignore . and .. */
255 if (!strcmp(".", dirent
->d_name
) || !strcmp("..", dirent
->d_name
))
259 * If we were given a dirent, open the associated file under
260 * dir_fd for badblocks scanning. If dirent is NULL, then it's
261 * the directory itself we want to scan.
263 fd
= openat(dir_fd
, dirent
->d_name
,
264 O_RDONLY
| O_NOATIME
| O_NOFOLLOW
| O_NOCTTY
);
268 /* Go find the badness. */
269 moveon
= xfs_report_verify_fd(ctx
, path
, fd
, arg
);
279 /* Given bad extent lists for the data & rtdev, find bad files. */
281 xfs_report_verify_errors(
282 struct scrub_ctx
*ctx
,
283 struct bitmap
*d_bad
,
284 struct bitmap
*r_bad
)
286 struct xfs_verify_error_info vei
;
292 /* Scan the directory tree to get file paths. */
293 moveon
= scan_fs_tree(ctx
, xfs_report_verify_dir
,
294 xfs_report_verify_dirent
, &vei
);
298 /* Scan for unlinked files. */
299 return xfs_scan_all_inodes(ctx
, xfs_report_verify_inode
, &vei
);
302 /* Verify disk blocks with GETFSMAP */
304 struct xfs_verify_extent
{
305 struct read_verify_pool
*readverify
;
306 struct ptvar
*rvstate
;
307 struct bitmap
*d_bad
; /* bytes */
308 struct bitmap
*r_bad
; /* bytes */
311 /* Report an IO error resulting from read-verify based off getfsmap. */
313 xfs_check_rmap_error_report(
314 struct scrub_ctx
*ctx
,
321 uint64_t err_physical
= *(uint64_t *)arg
;
324 if (err_physical
> map
->fmr_physical
)
325 err_off
= err_physical
- map
->fmr_physical
;
329 snprintf(buf
, 32, _("disk offset %"PRIu64
),
330 (uint64_t)BTOBB(map
->fmr_physical
+ err_off
));
332 if (map
->fmr_flags
& FMR_OF_SPECIAL_OWNER
) {
333 type
= xfs_decode_special_owner(map
->fmr_owner
);
335 _("%s failed read verification."),
340 * XXX: If we had a getparent() call we could report IO errors
341 * efficiently. Until then, we'll have to scan the dir tree
342 * to find the bad file's pathname.
349 * Remember a read error for later, and see if rmap will tell us about the
350 * owner ahead of time.
353 xfs_check_rmap_ioerr(
354 struct scrub_ctx
*ctx
,
361 struct fsmap keys
[2];
362 char descr
[DESCR_BUFSZ
];
363 struct xfs_verify_extent
*ve
= arg
;
368 dev
= xfs_disk_to_dev(ctx
, disk
);
371 * If we don't have parent pointers, save the bad extent for
374 if (dev
== ctx
->fsinfo
.fs_datadev
)
376 else if (dev
== ctx
->fsinfo
.fs_rtdev
)
381 moveon
= bitmap_set(tree
, start
, length
);
383 str_errno(ctx
, ctx
->mntpoint
);
386 snprintf(descr
, DESCR_BUFSZ
, _("dev %d:%d ioerr @ %"PRIu64
":%"PRIu64
" "),
387 major(dev
), minor(dev
), start
, length
);
389 /* Go figure out which blocks are bad from the fsmap. */
390 memset(keys
, 0, sizeof(struct fsmap
) * 2);
391 keys
->fmr_device
= dev
;
392 keys
->fmr_physical
= start
;
393 (keys
+ 1)->fmr_device
= dev
;
394 (keys
+ 1)->fmr_physical
= start
+ length
- 1;
395 (keys
+ 1)->fmr_owner
= ULLONG_MAX
;
396 (keys
+ 1)->fmr_offset
= ULLONG_MAX
;
397 (keys
+ 1)->fmr_flags
= UINT_MAX
;
398 xfs_iterate_fsmap(ctx
, descr
, keys
, xfs_check_rmap_error_report
,
402 /* Schedule a read-verify of a (data block) extent. */
405 struct scrub_ctx
*ctx
,
410 struct xfs_verify_extent
*ve
= arg
;
413 dbg_printf("rmap dev %d:%d phys %"PRIu64
" owner %"PRId64
414 " offset %"PRIu64
" len %"PRIu64
" flags 0x%x\n",
415 major(map
->fmr_device
), minor(map
->fmr_device
),
416 (uint64_t)map
->fmr_physical
, (int64_t)map
->fmr_owner
,
417 (uint64_t)map
->fmr_offset
, (uint64_t)map
->fmr_length
,
420 /* "Unknown" extents should be verified; they could be data. */
421 if ((map
->fmr_flags
& FMR_OF_SPECIAL_OWNER
) &&
422 map
->fmr_owner
== XFS_FMR_OWN_UNKNOWN
)
423 map
->fmr_flags
&= ~FMR_OF_SPECIAL_OWNER
;
426 * We only care about read-verifying data extents that have been
427 * written to disk. This means we can skip "special" owners
428 * (metadata), xattr blocks, unwritten extents, and extent maps.
429 * These should all get checked elsewhere in the scrubber.
431 if (map
->fmr_flags
& (FMR_OF_PREALLOC
| FMR_OF_ATTR_FORK
|
432 FMR_OF_EXTENT_MAP
| FMR_OF_SPECIAL_OWNER
))
435 /* XXX: Filter out directory data blocks. */
437 /* Schedule the read verify command for (eventual) running. */
438 disk
= xfs_dev_to_disk(ctx
, map
->fmr_device
);
440 read_verify_schedule_io(ve
->readverify
, ptvar_get(ve
->rvstate
), disk
,
441 map
->fmr_physical
, map
->fmr_length
, ve
);
444 /* Is this the last extent? Fire off the read. */
445 if (map
->fmr_flags
& FMR_OF_LAST
)
446 read_verify_force_io(ve
->readverify
, ptvar_get(ve
->rvstate
));
452 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
453 * do data checksums, we trust that the underlying storage will pass back
454 * an IO error if it can't retrieve whatever we previously stored there.
455 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
456 * scan the extent maps of the entire fs tree to figure (and the unlinked
457 * inodes) out which files are now broken.
461 struct scrub_ctx
*ctx
)
463 struct xfs_verify_extent ve
;
466 ve
.rvstate
= ptvar_init(scrub_nproc(ctx
), sizeof(struct read_verify
));
468 str_errno(ctx
, ctx
->mntpoint
);
472 moveon
= bitmap_init(&ve
.d_bad
);
474 str_errno(ctx
, ctx
->mntpoint
);
478 moveon
= bitmap_init(&ve
.r_bad
);
480 str_errno(ctx
, ctx
->mntpoint
);
484 ve
.readverify
= read_verify_pool_init(ctx
, ctx
->geo
.blocksize
,
485 xfs_check_rmap_ioerr
, disk_heads(ctx
->datadev
));
486 if (!ve
.readverify
) {
488 str_error(ctx
, ctx
->mntpoint
,
489 _("Could not create media verifier."));
492 moveon
= xfs_scan_all_spacemaps(ctx
, xfs_check_rmap
, &ve
);
495 read_verify_pool_flush(ve
.readverify
);
496 ctx
->bytes_checked
+= read_verify_bytes(ve
.readverify
);
497 read_verify_pool_destroy(ve
.readverify
);
499 /* Scan the whole dir tree to see what matches the bad extents. */
500 if (!bitmap_empty(ve
.d_bad
) || !bitmap_empty(ve
.r_bad
))
501 moveon
= xfs_report_verify_errors(ctx
, ve
.d_bad
, ve
.r_bad
);
503 bitmap_free(&ve
.r_bad
);
504 bitmap_free(&ve
.d_bad
);
505 ptvar_free(ve
.rvstate
);
509 read_verify_pool_destroy(ve
.readverify
);
511 bitmap_free(&ve
.r_bad
);
513 bitmap_free(&ve
.d_bad
);
515 ptvar_free(ve
.rvstate
);
519 /* Estimate how much work we're going to do. */
521 xfs_estimate_verify_work(
522 struct scrub_ctx
*ctx
,
524 unsigned int *nr_threads
,
527 unsigned long long d_blocks
;
528 unsigned long long d_bfree
;
529 unsigned long long r_blocks
;
530 unsigned long long r_bfree
;
531 unsigned long long f_files
;
532 unsigned long long f_free
;
535 moveon
= xfs_scan_estimate_blocks(ctx
, &d_blocks
, &d_bfree
,
536 &r_blocks
, &r_bfree
, &f_files
, &f_free
);
540 *items
= ((d_blocks
- d_bfree
) + (r_blocks
- r_bfree
)) << ctx
->blocklog
;
541 *nr_threads
= disk_heads(ctx
->datadev
);