]>
| Commit | Line | Data |
|---|---|---|
| 8d318d62 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| b364a9c0 | 2 | /* |
| 52520522 | 3 | * Copyright (C) 2018-2024 Oracle. All Rights Reserved. |
| 8d318d62 | 4 | * Author: Darrick J. Wong <djwong@kernel.org> |
| b364a9c0 | 5 | */ |
| a440f877 | 6 | #include "xfs.h" |
| b364a9c0 | 7 | #include <stdint.h> |
| b364a9c0 DW |
8 | #include <dirent.h> |
| 9 | #include <sys/statvfs.h> | |
| 9b72515a | 10 | #include <linux/fsmap.h> |
| b364a9c0 | 11 | #include "handle.h" |
| 42b4c8e8 | 12 | #include "libfrog/paths.h" |
| 56598728 | 13 | #include "libfrog/workqueue.h" |
| b364a9c0 DW |
14 | #include "xfs_scrub.h" |
| 15 | #include "common.h" | |
| a58400ed | 16 | #include "libfrog/bitmap.h" |
| b364a9c0 DW |
17 | #include "disk.h" |
| 18 | #include "filemap.h" | |
| ed60d210 | 19 | #include "fscounters.h" |
| b364a9c0 DW |
20 | #include "inodes.h" |
| 21 | #include "read_verify.h" | |
| 22 | #include "spacemap.h" | |
| 23 | #include "vfs.h" | |
| 4d0ce76d | 24 | #include "common.h" |
| 9b5d1349 | 25 | #include "libfrog/bulkstat.h" |
| b364a9c0 DW |
26 | |
| 27 | /* | |
| 28 | * Phase 6: Verify data file integrity. | |
| 29 | * | |
| 30 | * Identify potential data block extents with GETFSMAP, then feed those | |
| 31 | * extents to the read-verify pool to get the verify commands batched, | |
| 32 | * issued, and (if there are problems) reported back to us. If there | |
| 33 | * are errors, we'll record the bad regions and (if available) use rmap | |
| 34 | * to tell us if metadata are now corrupt. Otherwise, we'll scan the | |
| 35 | * whole directory tree looking for files that overlap the bad regions | |
| 36 | * and report the paths of the now corrupt files. | |
| 37 | */ | |
| 38 | ||
| f1bb1696 DW |
39 | /* Verify disk blocks with GETFSMAP */ |
| 40 | ||
| 557f98d7 | 41 | struct media_verify_state { |
| f1bb1696 DW |
42 | struct read_verify_pool *rvp_data; |
| 43 | struct read_verify_pool *rvp_log; | |
| 44 | struct read_verify_pool *rvp_realtime; | |
| 45 | struct bitmap *d_bad; /* bytes */ | |
| 46 | struct bitmap *r_bad; /* bytes */ | |
| a6e08990 DW |
47 | bool d_trunc:1; |
| 48 | bool r_trunc:1; | |
| 49 | bool l_trunc:1; | |
| f1bb1696 DW |
50 | }; |
| 51 | ||
| b364a9c0 | 52 | /* Find the fd for a given device identifier. */ |
| f1bb1696 | 53 | static struct read_verify_pool * |
| af9eb208 | 54 | dev_to_pool( |
| f1bb1696 | 55 | struct scrub_ctx *ctx, |
| 557f98d7 | 56 | struct media_verify_state *vs, |
| f1bb1696 | 57 | dev_t dev) |
| b364a9c0 | 58 | { |
| 37591ef3 CH |
59 | if (ctx->mnt.fsgeom.rtstart) { |
| 60 | if (dev == XFS_DEV_DATA) | |
| 61 | return vs->rvp_data; | |
| 62 | if (dev == XFS_DEV_LOG) | |
| 63 | return vs->rvp_log; | |
| 64 | if (dev == XFS_DEV_RT) | |
| 65 | return vs->rvp_realtime; | |
| 66 | } else { | |
| 67 | if (dev == ctx->fsinfo.fs_datadev) | |
| 68 | return vs->rvp_data; | |
| 69 | if (dev == ctx->fsinfo.fs_logdev) | |
| 70 | return vs->rvp_log; | |
| 71 | if (dev == ctx->fsinfo.fs_rtdev) | |
| 72 | return vs->rvp_realtime; | |
| 73 | } | |
| b364a9c0 DW |
74 | abort(); |
| 75 | } | |
| 76 | ||
| 77 | /* Find the device major/minor for a given file descriptor. */ | |
| 78 | static dev_t | |
| af9eb208 | 79 | disk_to_dev( |
| b364a9c0 DW |
80 | struct scrub_ctx *ctx, |
| 81 | struct disk *disk) | |
| 82 | { | |
| 37591ef3 CH |
83 | if (ctx->mnt.fsgeom.rtstart) { |
| 84 | if (disk == ctx->datadev) | |
| 85 | return XFS_DEV_DATA; | |
| 86 | if (disk == ctx->logdev) | |
| 87 | return XFS_DEV_LOG; | |
| 88 | if (disk == ctx->rtdev) | |
| 89 | return XFS_DEV_RT; | |
| 90 | } else { | |
| 91 | if (disk == ctx->datadev) | |
| 92 | return ctx->fsinfo.fs_datadev; | |
| 93 | if (disk == ctx->logdev) | |
| 94 | return ctx->fsinfo.fs_logdev; | |
| 95 | if (disk == ctx->rtdev) | |
| 96 | return ctx->fsinfo.fs_rtdev; | |
| 97 | } | |
| b364a9c0 DW |
98 | abort(); |
| 99 | } | |
| 100 | ||
| c9b349bd DW |
101 | /* Find the incore bad blocks bitmap for a given disk. */ |
| 102 | static struct bitmap * | |
| 103 | bitmap_for_disk( | |
| 104 | struct scrub_ctx *ctx, | |
| 105 | struct disk *disk, | |
| 106 | struct media_verify_state *vs) | |
| 107 | { | |
| 37591ef3 | 108 | if (disk == ctx->datadev) |
| c9b349bd | 109 | return vs->d_bad; |
| 37591ef3 | 110 | if (disk == ctx->rtdev) |
| c9b349bd DW |
111 | return vs->r_bad; |
| 112 | return NULL; | |
| 113 | } | |
| 114 | ||
| 115 | struct disk_ioerr_report { | |
| 116 | struct scrub_ctx *ctx; | |
| 117 | struct disk *disk; | |
| 118 | }; | |
| 119 | ||
| b364a9c0 DW |
120 | struct owner_decode { |
| 121 | uint64_t owner; | |
| 122 | const char *descr; | |
| 123 | }; | |
| 124 | ||
| 125 | static const struct owner_decode special_owners[] = { | |
| 126 | {XFS_FMR_OWN_FREE, "free space"}, | |
| 127 | {XFS_FMR_OWN_UNKNOWN, "unknown owner"}, | |
| 128 | {XFS_FMR_OWN_FS, "static FS metadata"}, | |
| 129 | {XFS_FMR_OWN_LOG, "journalling log"}, | |
| 130 | {XFS_FMR_OWN_AG, "per-AG metadata"}, | |
| 131 | {XFS_FMR_OWN_INOBT, "inode btree blocks"}, | |
| 132 | {XFS_FMR_OWN_INODES, "inodes"}, | |
| 133 | {XFS_FMR_OWN_REFC, "refcount btree"}, | |
| 134 | {XFS_FMR_OWN_COW, "CoW staging"}, | |
| 135 | {XFS_FMR_OWN_DEFECTIVE, "bad blocks"}, | |
| 136 | {0, NULL}, | |
| 137 | }; | |
| 138 | ||
| 139 | /* Decode a special owner. */ | |
| 140 | static const char * | |
| af9eb208 | 141 | decode_special_owner( |
| b364a9c0 DW |
142 | uint64_t owner) |
| 143 | { | |
| 144 | const struct owner_decode *od = special_owners; | |
| 145 | ||
| 146 | while (od->descr) { | |
| 147 | if (od->owner == owner) | |
| 148 | return od->descr; | |
| 149 | od++; | |
| 150 | } | |
| 151 | ||
| 152 | return NULL; | |
| 153 | } | |
| 154 | ||
| 155 | /* Routines to translate bad physical extents into file paths and offsets. */ | |
| 156 | ||
| ed953d26 | 157 | struct badfile_report { |
| 73ce9669 DW |
158 | struct scrub_ctx *ctx; |
| 159 | const char *descr; | |
| 160 | struct media_verify_state *vs; | |
| 161 | struct file_bmap *bmap; | |
| ed953d26 DW |
162 | }; |
| 163 | ||
| 164 | /* Report on bad extents found during a media scan. */ | |
| 165 | static int | |
| 166 | report_badfile( | |
| 167 | uint64_t start, | |
| 168 | uint64_t length, | |
| 169 | void *arg) | |
| 170 | { | |
| 171 | struct badfile_report *br = arg; | |
| 172 | unsigned long long bad_offset; | |
| 173 | unsigned long long bad_length; | |
| 174 | ||
| 175 | /* Clamp the bad region to the file mapping. */ | |
| 176 | if (start < br->bmap->bm_physical) { | |
| 177 | length -= br->bmap->bm_physical - start; | |
| 178 | start = br->bmap->bm_physical; | |
| 179 | } | |
| 180 | length = min(length, br->bmap->bm_length); | |
| 181 | ||
| 182 | /* Figure out how far into the bmap is the bad mapping and report it. */ | |
| 183 | bad_offset = start - br->bmap->bm_physical; | |
| 184 | bad_length = min(start + length, | |
| 185 | br->bmap->bm_physical + br->bmap->bm_length) - start; | |
| 186 | ||
| 49e05cb0 | 187 | str_unfixable_error(br->ctx, br->descr, |
| ed953d26 DW |
188 | _("media error at data offset %llu length %llu."), |
| 189 | br->bmap->bm_offset + bad_offset, bad_length); | |
| 190 | return 0; | |
| 191 | } | |
| 192 | ||
| b364a9c0 | 193 | /* Report if this extent overlaps a bad region. */ |
| 73ce9669 | 194 | static int |
| 663e02a0 | 195 | report_data_loss( |
| b364a9c0 | 196 | struct scrub_ctx *ctx, |
| b364a9c0 DW |
197 | int fd, |
| 198 | int whichfork, | |
| 199 | struct fsxattr *fsx, | |
| 73ce9669 | 200 | struct file_bmap *bmap, |
| b364a9c0 DW |
201 | void *arg) |
| 202 | { | |
| 73ce9669 DW |
203 | struct badfile_report *br = arg; |
| 204 | struct media_verify_state *vs = br->vs; | |
| b364a9c0 | 205 | struct bitmap *bmp; |
| 73ce9669 DW |
206 | |
| 207 | br->bmap = bmap; | |
| b364a9c0 DW |
208 | |
| 209 | /* Only report errors for real extents. */ | |
| 210 | if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) | |
| 73ce9669 | 211 | return 0; |
| b364a9c0 DW |
212 | |
| 213 | if (fsx->fsx_xflags & FS_XFLAG_REALTIME) | |
| ed5f9cc7 | 214 | bmp = vs->r_bad; |
| b364a9c0 | 215 | else |
| ed5f9cc7 | 216 | bmp = vs->d_bad; |
| b364a9c0 | 217 | |
| 93d69bc7 | 218 | return -bitmap_iterate_range(bmp, bmap->bm_physical, bmap->bm_length, |
| 73ce9669 | 219 | report_badfile, br); |
| b364a9c0 DW |
220 | } |
| 221 | ||
| 663e02a0 | 222 | /* Report if the extended attribute data overlaps a bad region. */ |
| 73ce9669 | 223 | static int |
| 663e02a0 DW |
224 | report_attr_loss( |
| 225 | struct scrub_ctx *ctx, | |
| 663e02a0 DW |
226 | int fd, |
| 227 | int whichfork, | |
| 228 | struct fsxattr *fsx, | |
| 73ce9669 | 229 | struct file_bmap *bmap, |
| 663e02a0 DW |
230 | void *arg) |
| 231 | { | |
| 73ce9669 DW |
232 | struct badfile_report *br = arg; |
| 233 | struct media_verify_state *vs = br->vs; | |
| 663e02a0 DW |
234 | struct bitmap *bmp = vs->d_bad; |
| 235 | ||
| 236 | /* Complain about attr fork extents that don't look right. */ | |
| 237 | if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) { | |
| 73ce9669 | 238 | str_info(ctx, br->descr, |
| 663e02a0 | 239 | _("found unexpected unwritten/delalloc attr fork extent.")); |
| 73ce9669 | 240 | return 0; |
| 663e02a0 DW |
241 | } |
| 242 | ||
| 243 | if (fsx->fsx_xflags & FS_XFLAG_REALTIME) { | |
| 73ce9669 | 244 | str_info(ctx, br->descr, |
| 663e02a0 | 245 | _("found unexpected realtime attr fork extent.")); |
| 73ce9669 | 246 | return 0; |
| 663e02a0 DW |
247 | } |
| 248 | ||
| 249 | if (bitmap_test(bmp, bmap->bm_physical, bmap->bm_length)) | |
| 73ce9669 | 250 | str_corrupt(ctx, br->descr, |
| 663e02a0 DW |
251 | _("media error in extended attribute data.")); |
| 252 | ||
| 73ce9669 | 253 | return 0; |
| 663e02a0 DW |
254 | } |
| 255 | ||
| b364a9c0 | 256 | /* Iterate the extent mappings of a file to report errors. */ |
| af9eb208 DW |
257 | static int |
| 258 | report_fd_loss( | |
| b364a9c0 DW |
259 | struct scrub_ctx *ctx, |
| 260 | const char *descr, | |
| 261 | int fd, | |
| 262 | void *arg) | |
| 263 | { | |
| 73ce9669 DW |
264 | struct badfile_report br = { |
| 265 | .ctx = ctx, | |
| 266 | .vs = arg, | |
| 267 | .descr = descr, | |
| 268 | }; | |
| 269 | struct file_bmap key = {0}; | |
| 270 | int ret; | |
| b364a9c0 DW |
271 | |
| 272 | /* data fork */ | |
| 73ce9669 DW |
273 | ret = scrub_iterate_filemaps(ctx, fd, XFS_DATA_FORK, &key, |
| 274 | report_data_loss, &br); | |
| 275 | if (ret) { | |
| 276 | str_liberror(ctx, ret, descr); | |
| af9eb208 | 277 | return ret; |
| 73ce9669 | 278 | } |
| b364a9c0 DW |
279 | |
| 280 | /* attr fork */ | |
| 73ce9669 DW |
281 | ret = scrub_iterate_filemaps(ctx, fd, XFS_ATTR_FORK, &key, |
| 282 | report_attr_loss, &br); | |
| 283 | if (ret) { | |
| 284 | str_liberror(ctx, ret, descr); | |
| af9eb208 | 285 | return ret; |
| 73ce9669 | 286 | } |
| af9eb208 DW |
287 | |
| 288 | return 0; | |
| b364a9c0 DW |
289 | } |
| 290 | ||
| 291 | /* Report read verify errors in unlinked (but still open) files. */ | |
| 292 | static int | |
| af9eb208 | 293 | report_inode_loss( |
| b364a9c0 DW |
294 | struct scrub_ctx *ctx, |
| 295 | struct xfs_handle *handle, | |
| 4cca629d | 296 | struct xfs_bulkstat *bstat, |
| b364a9c0 DW |
297 | void *arg) |
| 298 | { | |
| 299 | char descr[DESCR_BUFSZ]; | |
| b364a9c0 | 300 | int fd; |
| af9eb208 | 301 | int error, err2; |
| b364a9c0 | 302 | |
| b364a9c0 DW |
303 | /* Ignore linked files and things we can't open. */ |
| 304 | if (bstat->bs_nlink != 0) | |
| 305 | return 0; | |
| 306 | if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode)) | |
| 307 | return 0; | |
| 308 | ||
| 15589f0a DW |
309 | scrub_render_ino_descr(ctx, descr, DESCR_BUFSZ, |
| 310 | bstat->bs_ino, bstat->bs_gen, _("(unlinked)")); | |
| 311 | ||
| b364a9c0 | 312 | /* Try to open the inode. */ |
| 59f79e0a | 313 | fd = scrub_open_handle(handle); |
| b364a9c0 | 314 | if (fd < 0) { |
| 4d0ce76d DW |
315 | /* Handle is stale, try again. */ |
| 316 | if (errno == ESTALE) | |
| 317 | return ESTALE; | |
| b364a9c0 | 318 | |
| 4d0ce76d DW |
319 | str_error(ctx, descr, |
| 320 | _("Could not open to report read errors: %s."), | |
| 321 | strerror(errno)); | |
| 322 | return 0; | |
| b364a9c0 DW |
323 | } |
| 324 | ||
| 325 | /* Go find the badness. */ | |
| af9eb208 DW |
326 | error = report_fd_loss(ctx, descr, fd, arg); |
| 327 | ||
| 328 | err2 = close(fd); | |
| 329 | if (err2) | |
| 6c05cc5d | 330 | str_errno(ctx, descr); |
| b364a9c0 | 331 | |
| af9eb208 | 332 | return error; |
| b364a9c0 DW |
333 | } |
| 334 | ||
| 335 | /* Scan a directory for matches in the read verify error list. */ | |
| f544ec31 | 336 | static int |
| af9eb208 | 337 | report_dir_loss( |
| b364a9c0 DW |
338 | struct scrub_ctx *ctx, |
| 339 | const char *path, | |
| 340 | int dir_fd, | |
| 341 | void *arg) | |
| 342 | { | |
| af9eb208 | 343 | return report_fd_loss(ctx, path, dir_fd, arg); |
| b364a9c0 DW |
344 | } |
| 345 | ||
| 346 | /* | |
| 347 | * Scan the inode associated with a directory entry for matches with | |
| 348 | * the read verify error list. | |
| 349 | */ | |
| f544ec31 | 350 | static int |
| af9eb208 | 351 | report_dirent_loss( |
| b364a9c0 DW |
352 | struct scrub_ctx *ctx, |
| 353 | const char *path, | |
| 354 | int dir_fd, | |
| 355 | struct dirent *dirent, | |
| 356 | struct stat *sb, | |
| 357 | void *arg) | |
| 358 | { | |
| b364a9c0 | 359 | int fd; |
| af9eb208 | 360 | int error, err2; |
| b364a9c0 DW |
361 | |
| 362 | /* Ignore things we can't open. */ | |
| 363 | if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode)) | |
| f544ec31 | 364 | return 0; |
| b364a9c0 DW |
365 | |
| 366 | /* Ignore . and .. */ | |
| 367 | if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name)) | |
| f544ec31 | 368 | return 0; |
| b364a9c0 DW |
369 | |
| 370 | /* | |
| 371 | * If we were given a dirent, open the associated file under | |
| 372 | * dir_fd for badblocks scanning. If dirent is NULL, then it's | |
| 373 | * the directory itself we want to scan. | |
| 374 | */ | |
| 375 | fd = openat(dir_fd, dirent->d_name, | |
| 376 | O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY); | |
| f544ec31 | 377 | if (fd < 0) { |
| 4d0ce76d DW |
378 | char descr[PATH_MAX + 1]; |
| 379 | ||
| f544ec31 DW |
380 | if (errno == ENOENT) |
| 381 | return 0; | |
| 4d0ce76d DW |
382 | |
| 383 | snprintf(descr, PATH_MAX, "%s/%s", path, dirent->d_name); | |
| 384 | descr[PATH_MAX] = 0; | |
| 385 | ||
| 386 | str_error(ctx, descr, | |
| 387 | _("Could not open to report read errors: %s."), | |
| 388 | strerror(errno)); | |
| 389 | return 0; | |
| f544ec31 | 390 | } |
| b364a9c0 DW |
391 | |
| 392 | /* Go find the badness. */ | |
| af9eb208 | 393 | error = report_fd_loss(ctx, path, fd, arg); |
| b364a9c0 | 394 | |
| af9eb208 DW |
395 | err2 = close(fd); |
| 396 | if (err2) | |
| 6c05cc5d | 397 | str_errno(ctx, path); |
| af9eb208 DW |
398 | if (!error && err2) |
| 399 | error = err2; | |
| 400 | ||
| 401 | return error; | |
| b364a9c0 DW |
402 | } |
| 403 | ||
| 9b5d1349 DW |
404 | struct ioerr_filerange { |
| 405 | uint64_t physical; | |
| 406 | uint64_t length; | |
| 407 | }; | |
| 408 | ||
| 409 | /* | |
| 410 | * If reverse mapping and parent pointers are enabled, we can map media errors | |
| 411 | * directly back to a filename and a file position without needing to walk the | |
| 412 | * directory tree. | |
| 413 | */ | |
| 414 | static inline bool | |
| 415 | can_use_pptrs( | |
| 416 | const struct scrub_ctx *ctx) | |
| 417 | { | |
| 418 | return (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_PARENT) && | |
| 419 | (ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT); | |
| 420 | } | |
| 421 | ||
| c9b349bd | 422 | /* Use a fsmap to report metadata lost to a media error. */ |
| 7a2eef2b | 423 | static int |
| c9b349bd | 424 | report_ioerr_fsmap( |
| b364a9c0 | 425 | struct scrub_ctx *ctx, |
| b364a9c0 DW |
426 | struct fsmap *map, |
| 427 | void *arg) | |
| 428 | { | |
| 429 | const char *type; | |
| 9b5d1349 | 430 | struct xfs_bulkstat bs = { }; |
| f1f5fd3a | 431 | char buf[DESCR_BUFSZ]; |
| 9b5d1349 | 432 | struct ioerr_filerange *fr = arg; |
| b364a9c0 | 433 | uint64_t err_off; |
| 9b5d1349 | 434 | int ret; |
| b364a9c0 | 435 | |
| 909c6a54 DW |
436 | /* Don't care about unwritten extents. */ |
| 437 | if (map->fmr_flags & FMR_OF_PREALLOC) | |
| 7a2eef2b | 438 | return 0; |
| 909c6a54 | 439 | |
| 9b5d1349 DW |
440 | if (fr->physical > map->fmr_physical) |
| 441 | err_off = fr->physical - map->fmr_physical; | |
| b364a9c0 DW |
442 | else |
| 443 | err_off = 0; | |
| 444 | ||
| f1f5fd3a | 445 | /* Report special owners */ |
| b364a9c0 | 446 | if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) { |
| f1f5fd3a DW |
447 | snprintf(buf, DESCR_BUFSZ, _("disk offset %"PRIu64), |
| 448 | (uint64_t)map->fmr_physical + err_off); | |
| af9eb208 | 449 | type = decode_special_owner(map->fmr_owner); |
| 96ac83c8 DW |
450 | /* |
| 451 | * On filesystems that don't store reverse mappings, the | |
| 452 | * GETFSMAP call returns OWNER_UNKNOWN for allocated space. | |
| 453 | * We'll have to let the directory tree walker find the file | |
| 454 | * that lost data. | |
| 455 | */ | |
| 456 | if (!(ctx->mnt.fsgeom.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT) && | |
| 457 | map->fmr_owner == XFS_FMR_OWN_UNKNOWN) { | |
| 458 | str_info(ctx, buf, _("media error detected.")); | |
| 459 | } else { | |
| 460 | str_corrupt(ctx, buf, _("media error in %s."), type); | |
| 461 | } | |
| b364a9c0 DW |
462 | } |
| 463 | ||
| 9b5d1349 DW |
464 | if (can_use_pptrs(ctx)) { |
| 465 | ret = -xfrog_bulkstat_single(&ctx->mnt, map->fmr_owner, 0, &bs); | |
| 466 | if (ret) | |
| 467 | str_liberror(ctx, ret, | |
| 468 | _("bulkstat for media error report")); | |
| 469 | } | |
| 470 | ||
| 02d0069e DW |
471 | /* Report extent maps */ |
| 472 | if (map->fmr_flags & FMR_OF_EXTENT_MAP) { | |
| 473 | bool attr = (map->fmr_flags & FMR_OF_ATTR_FORK); | |
| 474 | ||
| 475 | scrub_render_ino_descr(ctx, buf, DESCR_BUFSZ, | |
| 9b5d1349 | 476 | map->fmr_owner, bs.bs_gen, " %s", |
| 02d0069e DW |
477 | attr ? _("extended attribute") : |
| 478 | _("file data")); | |
| abc2e70d | 479 | str_corrupt(ctx, buf, _("media error in extent map")); |
| 02d0069e DW |
480 | } |
| 481 | ||
| b364a9c0 | 482 | /* |
| 9b5d1349 DW |
483 | * If directory parent pointers are available, use that to find the |
| 484 | * pathname to a file, and report that path as having lost its | |
| 485 | * extended attributes, or the precise offset of the lost file data. | |
| b364a9c0 | 486 | */ |
| 9b5d1349 DW |
487 | if (!can_use_pptrs(ctx)) |
| 488 | return 0; | |
| b364a9c0 | 489 | |
| 9b5d1349 DW |
490 | scrub_render_ino_descr(ctx, buf, DESCR_BUFSZ, map->fmr_owner, |
| 491 | bs.bs_gen, NULL); | |
| 492 | ||
| 493 | if (map->fmr_flags & FMR_OF_ATTR_FORK) { | |
| 494 | str_corrupt(ctx, buf, _("media error in extended attributes")); | |
| 495 | return 0; | |
| 496 | } | |
| 497 | ||
| 498 | str_unfixable_error(ctx, buf, | |
| 499 | _("media error at data offset %llu length %llu."), | |
| 500 | err_off, fr->length); | |
| 7a2eef2b | 501 | return 0; |
| b364a9c0 DW |
502 | } |
| 503 | ||
| 504 | /* | |
| c9b349bd DW |
505 | * For a range of bad blocks, visit each space mapping that overlaps the bad |
| 506 | * range so that we can report lost metadata. | |
| b364a9c0 | 507 | */ |
| c9b349bd DW |
508 | static int |
| 509 | report_ioerr( | |
| b364a9c0 DW |
510 | uint64_t start, |
| 511 | uint64_t length, | |
| b364a9c0 DW |
512 | void *arg) |
| 513 | { | |
| 04c33913 | 514 | struct fsmap keys[2] = { }; |
| 9b5d1349 DW |
515 | struct ioerr_filerange fr = { |
| 516 | .physical = start, | |
| 517 | .length = length, | |
| 518 | }; | |
| c9b349bd | 519 | struct disk_ioerr_report *dioerr = arg; |
| b364a9c0 | 520 | |
| b364a9c0 | 521 | /* Go figure out which blocks are bad from the fsmap. */ |
| 37591ef3 | 522 | keys[0].fmr_device = disk_to_dev(dioerr->ctx, dioerr->disk); |
| 04c33913 | 523 | keys[0].fmr_physical = start; |
| 37591ef3 | 524 | keys[1].fmr_device = keys[0].fmr_device; |
| 04c33913 CH |
525 | keys[1].fmr_physical = start + length - 1; |
| 526 | keys[1].fmr_owner = ULLONG_MAX; | |
| 527 | keys[1].fmr_offset = ULLONG_MAX; | |
| 528 | keys[1].fmr_flags = UINT_MAX; | |
| 93d69bc7 | 529 | return -scrub_iterate_fsmap(dioerr->ctx, keys, report_ioerr_fsmap, |
| 9b5d1349 | 530 | &fr); |
| c9b349bd DW |
531 | } |
| 532 | ||
| 533 | /* Report all the media errors found on a disk. */ | |
| 534 | static int | |
| 535 | report_disk_ioerrs( | |
| 536 | struct scrub_ctx *ctx, | |
| 537 | struct disk *disk, | |
| 538 | struct media_verify_state *vs) | |
| 539 | { | |
| 540 | struct disk_ioerr_report dioerr = { | |
| 541 | .ctx = ctx, | |
| 542 | .disk = disk, | |
| 543 | }; | |
| 544 | struct bitmap *tree; | |
| 545 | ||
| 546 | if (!disk) | |
| 547 | return 0; | |
| 548 | tree = bitmap_for_disk(ctx, disk, vs); | |
| 549 | if (!tree) | |
| 550 | return 0; | |
| 93d69bc7 | 551 | return -bitmap_iterate(tree, report_ioerr, &dioerr); |
| c9b349bd DW |
552 | } |
| 553 | ||
| 554 | /* Given bad extent lists for the data & rtdev, find bad files. */ | |
| af9eb208 | 555 | static int |
| c9b349bd DW |
556 | report_all_media_errors( |
| 557 | struct scrub_ctx *ctx, | |
| 558 | struct media_verify_state *vs) | |
| 559 | { | |
| c9b349bd DW |
560 | int ret; |
| 561 | ||
| a6e08990 DW |
562 | if (vs->d_trunc) |
| 563 | str_corrupt(ctx, ctx->mntpoint, _("data device truncated")); | |
| 564 | if (vs->l_trunc) | |
| 565 | str_corrupt(ctx, ctx->mntpoint, _("log device truncated")); | |
| 566 | if (vs->r_trunc) | |
| 567 | str_corrupt(ctx, ctx->mntpoint, _("rt device truncated")); | |
| 568 | ||
| c9b349bd DW |
569 | ret = report_disk_ioerrs(ctx, ctx->datadev, vs); |
| 570 | if (ret) { | |
| 571 | str_liberror(ctx, ret, _("walking datadev io errors")); | |
| af9eb208 | 572 | return ret; |
| c9b349bd DW |
573 | } |
| 574 | ||
| 575 | ret = report_disk_ioerrs(ctx, ctx->rtdev, vs); | |
| 576 | if (ret) { | |
| 577 | str_liberror(ctx, ret, _("walking rtdev io errors")); | |
| af9eb208 | 578 | return ret; |
| c9b349bd DW |
579 | } |
| 580 | ||
| 9b5d1349 DW |
581 | /* |
| 582 | * Scan the directory tree to get file paths if we didn't already use | |
| cb3647bb DW |
583 | * directory parent pointers to report the loss. If parent pointers |
| 584 | * are enabled, report_ioerr_fsmap will have already reported file | |
| 585 | * paths that have lost file data and xattrs. | |
| 9b5d1349 | 586 | */ |
| cb3647bb DW |
587 | if (can_use_pptrs(ctx)) |
| 588 | return 0; | |
| 589 | ||
| 590 | ret = scan_fs_tree(ctx, report_dir_loss, report_dirent_loss, vs); | |
| 591 | if (ret) | |
| 592 | return ret; | |
| c9b349bd DW |
593 | |
| 594 | /* Scan for unlinked files. */ | |
| 279b0d0e | 595 | return scrub_scan_user_files(ctx, report_inode_loss, vs); |
| b364a9c0 DW |
596 | } |
| 597 | ||
| 598 | /* Schedule a read-verify of a (data block) extent. */ | |
| 7a2eef2b DW |
599 | static int |
| 600 | check_rmap( | |
| b364a9c0 | 601 | struct scrub_ctx *ctx, |
| b364a9c0 DW |
602 | struct fsmap *map, |
| 603 | void *arg) | |
| 604 | { | |
| 557f98d7 | 605 | struct media_verify_state *vs = arg; |
| f1bb1696 | 606 | struct read_verify_pool *rvp; |
| 8cab77d3 | 607 | int ret; |
| f1bb1696 | 608 | |
| af9eb208 | 609 | rvp = dev_to_pool(ctx, vs, map->fmr_device); |
| b364a9c0 DW |
610 | |
| 611 | dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64 | |
| 612 | " offset %"PRIu64" len %"PRIu64" flags 0x%x\n", | |
| 613 | major(map->fmr_device), minor(map->fmr_device), | |
| 614 | (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner, | |
| 615 | (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length, | |
| 616 | map->fmr_flags); | |
| 617 | ||
| 618 | /* "Unknown" extents should be verified; they could be data. */ | |
| 619 | if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) && | |
| 620 | map->fmr_owner == XFS_FMR_OWN_UNKNOWN) | |
| 621 | map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER; | |
| 622 | ||
| 623 | /* | |
| 624 | * We only care about read-verifying data extents that have been | |
| 625 | * written to disk. This means we can skip "special" owners | |
| 626 | * (metadata), xattr blocks, unwritten extents, and extent maps. | |
| 627 | * These should all get checked elsewhere in the scrubber. | |
| 628 | */ | |
| 629 | if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK | | |
| 630 | FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER)) | |
| 7a2eef2b | 631 | return 0; |
| b364a9c0 DW |
632 | |
| 633 | /* XXX: Filter out directory data blocks. */ | |
| 634 | ||
| 635 | /* Schedule the read verify command for (eventual) running. */ | |
| 8cab77d3 DW |
636 | ret = read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length, |
| 637 | vs); | |
| 638 | if (ret) { | |
| 7a2eef2b DW |
639 | str_liberror(ctx, ret, _("scheduling media verify command")); |
| 640 | return ret; | |
| 8cab77d3 | 641 | } |
| b364a9c0 | 642 | |
| 7a2eef2b | 643 | return 0; |
| b364a9c0 DW |
644 | } |
| 645 | ||
| f1bb1696 | 646 | /* Wait for read/verify actions to finish, then return # bytes checked. */ |
| 8cab77d3 | 647 | static int |
| f1bb1696 | 648 | clean_pool( |
| 8cab77d3 DW |
649 | struct read_verify_pool *rvp, |
| 650 | unsigned long long *bytes_checked) | |
| f1bb1696 | 651 | { |
| 8cab77d3 DW |
652 | uint64_t pool_checked; |
| 653 | int ret; | |
| f1bb1696 DW |
654 | |
| 655 | if (!rvp) | |
| 656 | return 0; | |
| 657 | ||
| 22d658ec DW |
658 | ret = read_verify_force_io(rvp); |
| 659 | if (ret) | |
| 660 | return ret; | |
| 661 | ||
| 8cab77d3 DW |
662 | ret = read_verify_pool_flush(rvp); |
| 663 | if (ret) | |
| 664 | goto out_destroy; | |
| 665 | ||
| 666 | ret = read_verify_bytes(rvp, &pool_checked); | |
| 667 | if (ret) | |
| 668 | goto out_destroy; | |
| 669 | ||
| 670 | *bytes_checked += pool_checked; | |
| 671 | out_destroy: | |
| f1bb1696 DW |
672 | read_verify_pool_destroy(rvp); |
| 673 | return ret; | |
| 674 | } | |
| 675 | ||
| c9b349bd DW |
676 | /* Remember a media error for later. */ |
| 677 | static void | |
| 678 | remember_ioerr( | |
| 679 | struct scrub_ctx *ctx, | |
| 680 | struct disk *disk, | |
| 681 | uint64_t start, | |
| 682 | uint64_t length, | |
| 683 | int error, | |
| 684 | void *arg) | |
| 685 | { | |
| 686 | struct media_verify_state *vs = arg; | |
| 687 | struct bitmap *tree; | |
| 688 | int ret; | |
| 689 | ||
| a6e08990 | 690 | if (!length) { |
| 37591ef3 | 691 | if (disk == ctx->datadev) |
| a6e08990 | 692 | vs->d_trunc = true; |
| 37591ef3 | 693 | else if (disk == ctx->logdev) |
| a6e08990 | 694 | vs->l_trunc = true; |
| 37591ef3 CH |
695 | else if (disk == ctx->rtdev) |
| 696 | vs->r_trunc = true; | |
| a6e08990 DW |
697 | return; |
| 698 | } | |
| 699 | ||
| c9b349bd DW |
700 | tree = bitmap_for_disk(ctx, disk, vs); |
| 701 | if (!tree) { | |
| 702 | str_liberror(ctx, ENOENT, _("finding bad block bitmap")); | |
| 703 | return; | |
| 704 | } | |
| 705 | ||
| 93d69bc7 | 706 | ret = -bitmap_set(tree, start, length); |
| c9b349bd DW |
707 | if (ret) |
| 708 | str_liberror(ctx, ret, _("setting bad block bitmap")); | |
| 709 | } | |
| 710 | ||
| b364a9c0 DW |
711 | /* |
| 712 | * Read verify all the file data blocks in a filesystem. Since XFS doesn't | |
| 713 | * do data checksums, we trust that the underlying storage will pass back | |
| 714 | * an IO error if it can't retrieve whatever we previously stored there. | |
| 715 | * If we hit an IO error, we'll record the bad blocks in a bitmap and then | |
| 716 | * scan the extent maps of the entire fs tree to figure (and the unlinked | |
| 717 | * inodes) out which files are now broken. | |
| 718 | */ | |
| af9eb208 DW |
719 | int |
| 720 | phase6_func( | |
| b364a9c0 DW |
721 | struct scrub_ctx *ctx) |
| 722 | { | |
| 557f98d7 | 723 | struct media_verify_state vs = { NULL }; |
| af9eb208 | 724 | int ret, ret2, ret3; |
| b364a9c0 | 725 | |
| 93d69bc7 | 726 | ret = -bitmap_alloc(&vs.d_bad); |
| 233fabee DW |
727 | if (ret) { |
| 728 | str_liberror(ctx, ret, _("creating datadev badblock bitmap")); | |
| af9eb208 | 729 | return ret; |
| b364a9c0 DW |
730 | } |
| 731 | ||
| 93d69bc7 | 732 | ret = -bitmap_alloc(&vs.r_bad); |
| 233fabee DW |
733 | if (ret) { |
| 734 | str_liberror(ctx, ret, _("creating realtime badblock bitmap")); | |
| b364a9c0 DW |
735 | goto out_dbad; |
| 736 | } | |
| 737 | ||
| 8cab77d3 | 738 | ret = read_verify_pool_alloc(ctx, ctx->datadev, |
| c9b349bd | 739 | ctx->mnt.fsgeom.blocksize, remember_ioerr, |
| 8cab77d3 DW |
740 | scrub_nproc(ctx), &vs.rvp_data); |
| 741 | if (ret) { | |
| 742 | str_liberror(ctx, ret, _("creating datadev media verifier")); | |
| b364a9c0 DW |
743 | goto out_rbad; |
| 744 | } | |
| f1bb1696 | 745 | if (ctx->logdev) { |
| 8cab77d3 | 746 | ret = read_verify_pool_alloc(ctx, ctx->logdev, |
| c9b349bd | 747 | ctx->mnt.fsgeom.blocksize, remember_ioerr, |
| 8cab77d3 DW |
748 | scrub_nproc(ctx), &vs.rvp_log); |
| 749 | if (ret) { | |
| 750 | str_liberror(ctx, ret, | |
| 751 | _("creating logdev media verifier")); | |
| f1bb1696 DW |
752 | goto out_datapool; |
| 753 | } | |
| 754 | } | |
| 755 | if (ctx->rtdev) { | |
| 8cab77d3 | 756 | ret = read_verify_pool_alloc(ctx, ctx->rtdev, |
| c9b349bd | 757 | ctx->mnt.fsgeom.blocksize, remember_ioerr, |
| 8cab77d3 DW |
758 | scrub_nproc(ctx), &vs.rvp_realtime); |
| 759 | if (ret) { | |
| 760 | str_liberror(ctx, ret, | |
| 761 | _("creating rtdev media verifier")); | |
| f1bb1696 DW |
762 | goto out_logpool; |
| 763 | } | |
| 764 | } | |
| 7a2eef2b | 765 | ret = scrub_scan_all_spacemaps(ctx, check_rmap, &vs); |
| af9eb208 | 766 | if (ret) |
| f1bb1696 | 767 | goto out_rtpool; |
| 8cab77d3 DW |
768 | |
| 769 | ret = clean_pool(vs.rvp_data, &ctx->bytes_checked); | |
| af9eb208 | 770 | if (ret) |
| 8cab77d3 | 771 | str_liberror(ctx, ret, _("flushing datadev verify pool")); |
| 8cab77d3 | 772 | |
| af9eb208 DW |
773 | ret2 = clean_pool(vs.rvp_log, &ctx->bytes_checked); |
| 774 | if (ret2) | |
| 775 | str_liberror(ctx, ret2, _("flushing logdev verify pool")); | |
| 8cab77d3 | 776 | |
| af9eb208 DW |
777 | ret3 = clean_pool(vs.rvp_realtime, &ctx->bytes_checked); |
| 778 | if (ret3) | |
| 779 | str_liberror(ctx, ret3, _("flushing rtdev verify pool")); | |
| 780 | ||
| 781 | /* | |
| 782 | * If the verify flush didn't work or we found no bad blocks, we're | |
| 783 | * done! No errors detected. | |
| 784 | */ | |
| 785 | if (ret || ret2 || ret3) | |
| 786 | goto out_rbad; | |
| 787 | if (bitmap_empty(vs.d_bad) && bitmap_empty(vs.r_bad)) | |
| 788 | goto out_rbad; | |
| b364a9c0 DW |
789 | |
| 790 | /* Scan the whole dir tree to see what matches the bad extents. */ | |
| af9eb208 | 791 | ret = report_all_media_errors(ctx, &vs); |
| b364a9c0 | 792 | |
| 557f98d7 DW |
793 | bitmap_free(&vs.r_bad); |
| 794 | bitmap_free(&vs.d_bad); | |
| af9eb208 | 795 | return ret; |
| b364a9c0 | 796 | |
| f1bb1696 | 797 | out_rtpool: |
| 7668d01d | 798 | if (vs.rvp_realtime) { |
| 4cd869e5 | 799 | read_verify_pool_abort(vs.rvp_realtime); |
| 557f98d7 | 800 | read_verify_pool_destroy(vs.rvp_realtime); |
| 7668d01d | 801 | } |
| f1bb1696 | 802 | out_logpool: |
| 7668d01d | 803 | if (vs.rvp_log) { |
| 4cd869e5 | 804 | read_verify_pool_abort(vs.rvp_log); |
| 557f98d7 | 805 | read_verify_pool_destroy(vs.rvp_log); |
| 7668d01d | 806 | } |
| f1bb1696 | 807 | out_datapool: |
| 4cd869e5 | 808 | read_verify_pool_abort(vs.rvp_data); |
| 557f98d7 | 809 | read_verify_pool_destroy(vs.rvp_data); |
| b364a9c0 | 810 | out_rbad: |
| 557f98d7 | 811 | bitmap_free(&vs.r_bad); |
| b364a9c0 | 812 | out_dbad: |
| 557f98d7 | 813 | bitmap_free(&vs.d_bad); |
| af9eb208 | 814 | return ret; |
| b364a9c0 | 815 | } |
| ed60d210 | 816 | |
| af9eb208 DW |
817 | /* Estimate how much work we're going to do. */ |
| 818 | int | |
| 819 | phase6_estimate( | |
| ed60d210 DW |
820 | struct scrub_ctx *ctx, |
| 821 | uint64_t *items, | |
| 822 | unsigned int *nr_threads, | |
| 823 | int *rshift) | |
| 824 | { | |
| 825 | unsigned long long d_blocks; | |
| 826 | unsigned long long d_bfree; | |
| 827 | unsigned long long r_blocks; | |
| 828 | unsigned long long r_bfree; | |
| 0b78ac05 | 829 | unsigned long long dontcare; |
| 934d8d3a | 830 | int ret; |
| ed60d210 | 831 | |
| 0b78ac05 DW |
832 | ret = scrub_scan_estimate_blocks(ctx, &d_blocks, &d_bfree, &r_blocks, |
| 833 | &r_bfree, &dontcare); | |
| 934d8d3a DW |
834 | if (ret) { |
| 835 | str_liberror(ctx, ret, _("estimating verify work")); | |
| af9eb208 | 836 | return ret; |
| 934d8d3a | 837 | } |
| ed60d210 | 838 | |
| a749451c DW |
839 | *items = cvt_off_fsb_to_b(&ctx->mnt, |
| 840 | (d_blocks - d_bfree) + (r_blocks - r_bfree)); | |
| 13eedd45 DW |
841 | |
| 842 | /* | |
| 843 | * Each read-verify pool starts a thread pool, and each worker thread | |
| 844 | * can contribute to the progress counter. Hence we need to set | |
| 845 | * nr_threads appropriately to handle that many threads. | |
| 846 | */ | |
| ed60d210 | 847 | *nr_threads = disk_heads(ctx->datadev); |
| 13eedd45 DW |
848 | if (ctx->rtdev) |
| 849 | *nr_threads += disk_heads(ctx->rtdev); | |
| 850 | if (ctx->logdev) | |
| 851 | *nr_threads += disk_heads(ctx->logdev); | |
| ed60d210 | 852 | *rshift = 20; |
| af9eb208 DW |
853 | return 0; |
| 854 | } |