]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - scrub/phase6.c
xfs_scrub: progress indicator
[thirdparty/xfsprogs-dev.git] / scrub / phase6.c
1 /*
2 * Copyright (C) 2018 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20 #include <stdio.h>
21 #include <stdint.h>
22 #include <stdbool.h>
23 #include <dirent.h>
24 #include <sys/statvfs.h>
25 #include "xfs.h"
26 #include "xfs_fs.h"
27 #include "handle.h"
28 #include "path.h"
29 #include "ptvar.h"
30 #include "workqueue.h"
31 #include "xfs_scrub.h"
32 #include "common.h"
33 #include "bitmap.h"
34 #include "disk.h"
35 #include "filemap.h"
36 #include "fscounters.h"
37 #include "inodes.h"
38 #include "read_verify.h"
39 #include "spacemap.h"
40 #include "vfs.h"
41
42 /*
43 * Phase 6: Verify data file integrity.
44 *
45 * Identify potential data block extents with GETFSMAP, then feed those
46 * extents to the read-verify pool to get the verify commands batched,
47 * issued, and (if there are problems) reported back to us. If there
48 * are errors, we'll record the bad regions and (if available) use rmap
49 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
50 * whole directory tree looking for files that overlap the bad regions
51 * and report the paths of the now corrupt files.
52 */
53
54 /* Find the fd for a given device identifier. */
55 static struct disk *
56 xfs_dev_to_disk(
57 struct scrub_ctx *ctx,
58 dev_t dev)
59 {
60 if (dev == ctx->fsinfo.fs_datadev)
61 return ctx->datadev;
62 else if (dev == ctx->fsinfo.fs_logdev)
63 return ctx->logdev;
64 else if (dev == ctx->fsinfo.fs_rtdev)
65 return ctx->rtdev;
66 abort();
67 }
68
69 /* Find the device major/minor for a given file descriptor. */
70 static dev_t
71 xfs_disk_to_dev(
72 struct scrub_ctx *ctx,
73 struct disk *disk)
74 {
75 if (disk == ctx->datadev)
76 return ctx->fsinfo.fs_datadev;
77 else if (disk == ctx->logdev)
78 return ctx->fsinfo.fs_logdev;
79 else if (disk == ctx->rtdev)
80 return ctx->fsinfo.fs_rtdev;
81 abort();
82 }
83
84 struct owner_decode {
85 uint64_t owner;
86 const char *descr;
87 };
88
89 static const struct owner_decode special_owners[] = {
90 {XFS_FMR_OWN_FREE, "free space"},
91 {XFS_FMR_OWN_UNKNOWN, "unknown owner"},
92 {XFS_FMR_OWN_FS, "static FS metadata"},
93 {XFS_FMR_OWN_LOG, "journalling log"},
94 {XFS_FMR_OWN_AG, "per-AG metadata"},
95 {XFS_FMR_OWN_INOBT, "inode btree blocks"},
96 {XFS_FMR_OWN_INODES, "inodes"},
97 {XFS_FMR_OWN_REFC, "refcount btree"},
98 {XFS_FMR_OWN_COW, "CoW staging"},
99 {XFS_FMR_OWN_DEFECTIVE, "bad blocks"},
100 {0, NULL},
101 };
102
103 /* Decode a special owner. */
104 static const char *
105 xfs_decode_special_owner(
106 uint64_t owner)
107 {
108 const struct owner_decode *od = special_owners;
109
110 while (od->descr) {
111 if (od->owner == owner)
112 return od->descr;
113 od++;
114 }
115
116 return NULL;
117 }
118
119 /* Routines to translate bad physical extents into file paths and offsets. */
120
121 struct xfs_verify_error_info {
122 struct bitmap *d_bad; /* bytes */
123 struct bitmap *r_bad; /* bytes */
124 };
125
126 /* Report if this extent overlaps a bad region. */
127 static bool
128 xfs_report_verify_inode_bmap(
129 struct scrub_ctx *ctx,
130 const char *descr,
131 int fd,
132 int whichfork,
133 struct fsxattr *fsx,
134 struct xfs_bmap *bmap,
135 void *arg)
136 {
137 struct xfs_verify_error_info *vei = arg;
138 struct bitmap *bmp;
139
140 /* Only report errors for real extents. */
141 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
142 return true;
143
144 if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
145 bmp = vei->r_bad;
146 else
147 bmp = vei->d_bad;
148
149 if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
150 return true;
151
152 str_error(ctx, descr,
153 _("offset %llu failed read verification."), bmap->bm_offset);
154 return true;
155 }
156
157 /* Iterate the extent mappings of a file to report errors. */
158 static bool
159 xfs_report_verify_fd(
160 struct scrub_ctx *ctx,
161 const char *descr,
162 int fd,
163 void *arg)
164 {
165 struct xfs_bmap key = {0};
166 bool moveon;
167
168 /* data fork */
169 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key,
170 xfs_report_verify_inode_bmap, arg);
171 if (!moveon)
172 return false;
173
174 /* attr fork */
175 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
176 xfs_report_verify_inode_bmap, arg);
177 if (!moveon)
178 return false;
179 return true;
180 }
181
182 /* Report read verify errors in unlinked (but still open) files. */
183 static int
184 xfs_report_verify_inode(
185 struct scrub_ctx *ctx,
186 struct xfs_handle *handle,
187 struct xfs_bstat *bstat,
188 void *arg)
189 {
190 char descr[DESCR_BUFSZ];
191 char buf[DESCR_BUFSZ];
192 bool moveon;
193 int fd;
194 int error;
195
196 snprintf(descr, DESCR_BUFSZ, _("inode %"PRIu64" (unlinked)"),
197 (uint64_t)bstat->bs_ino);
198
199 /* Ignore linked files and things we can't open. */
200 if (bstat->bs_nlink != 0)
201 return 0;
202 if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode))
203 return 0;
204
205 /* Try to open the inode. */
206 fd = xfs_open_handle(handle);
207 if (fd < 0) {
208 error = errno;
209 if (error == ESTALE)
210 return error;
211
212 str_warn(ctx, descr, "%s", strerror_r(error, buf, DESCR_BUFSZ));
213 return error;
214 }
215
216 /* Go find the badness. */
217 moveon = xfs_report_verify_fd(ctx, descr, fd, arg);
218 close(fd);
219
220 return moveon ? 0 : XFS_ITERATE_INODES_ABORT;
221 }
222
223 /* Scan a directory for matches in the read verify error list. */
224 static bool
225 xfs_report_verify_dir(
226 struct scrub_ctx *ctx,
227 const char *path,
228 int dir_fd,
229 void *arg)
230 {
231 return xfs_report_verify_fd(ctx, path, dir_fd, arg);
232 }
233
234 /*
235 * Scan the inode associated with a directory entry for matches with
236 * the read verify error list.
237 */
238 static bool
239 xfs_report_verify_dirent(
240 struct scrub_ctx *ctx,
241 const char *path,
242 int dir_fd,
243 struct dirent *dirent,
244 struct stat *sb,
245 void *arg)
246 {
247 bool moveon;
248 int fd;
249
250 /* Ignore things we can't open. */
251 if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode))
252 return true;
253
254 /* Ignore . and .. */
255 if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name))
256 return true;
257
258 /*
259 * If we were given a dirent, open the associated file under
260 * dir_fd for badblocks scanning. If dirent is NULL, then it's
261 * the directory itself we want to scan.
262 */
263 fd = openat(dir_fd, dirent->d_name,
264 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
265 if (fd < 0)
266 return true;
267
268 /* Go find the badness. */
269 moveon = xfs_report_verify_fd(ctx, path, fd, arg);
270 if (moveon)
271 goto out;
272
273 out:
274 close(fd);
275
276 return moveon;
277 }
278
279 /* Given bad extent lists for the data & rtdev, find bad files. */
280 static bool
281 xfs_report_verify_errors(
282 struct scrub_ctx *ctx,
283 struct bitmap *d_bad,
284 struct bitmap *r_bad)
285 {
286 struct xfs_verify_error_info vei;
287 bool moveon;
288
289 vei.d_bad = d_bad;
290 vei.r_bad = r_bad;
291
292 /* Scan the directory tree to get file paths. */
293 moveon = scan_fs_tree(ctx, xfs_report_verify_dir,
294 xfs_report_verify_dirent, &vei);
295 if (!moveon)
296 return false;
297
298 /* Scan for unlinked files. */
299 return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, &vei);
300 }
301
302 /* Verify disk blocks with GETFSMAP */
303
304 struct xfs_verify_extent {
305 struct read_verify_pool *readverify;
306 struct ptvar *rvstate;
307 struct bitmap *d_bad; /* bytes */
308 struct bitmap *r_bad; /* bytes */
309 };
310
311 /* Report an IO error resulting from read-verify based off getfsmap. */
312 static bool
313 xfs_check_rmap_error_report(
314 struct scrub_ctx *ctx,
315 const char *descr,
316 struct fsmap *map,
317 void *arg)
318 {
319 const char *type;
320 char buf[32];
321 uint64_t err_physical = *(uint64_t *)arg;
322 uint64_t err_off;
323
324 if (err_physical > map->fmr_physical)
325 err_off = err_physical - map->fmr_physical;
326 else
327 err_off = 0;
328
329 snprintf(buf, 32, _("disk offset %"PRIu64),
330 (uint64_t)BTOBB(map->fmr_physical + err_off));
331
332 if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) {
333 type = xfs_decode_special_owner(map->fmr_owner);
334 str_error(ctx, buf,
335 _("%s failed read verification."),
336 type);
337 }
338
339 /*
340 * XXX: If we had a getparent() call we could report IO errors
341 * efficiently. Until then, we'll have to scan the dir tree
342 * to find the bad file's pathname.
343 */
344
345 return true;
346 }
347
348 /*
349 * Remember a read error for later, and see if rmap will tell us about the
350 * owner ahead of time.
351 */
352 static void
353 xfs_check_rmap_ioerr(
354 struct scrub_ctx *ctx,
355 struct disk *disk,
356 uint64_t start,
357 uint64_t length,
358 int error,
359 void *arg)
360 {
361 struct fsmap keys[2];
362 char descr[DESCR_BUFSZ];
363 struct xfs_verify_extent *ve = arg;
364 struct bitmap *tree;
365 dev_t dev;
366 bool moveon;
367
368 dev = xfs_disk_to_dev(ctx, disk);
369
370 /*
371 * If we don't have parent pointers, save the bad extent for
372 * later rescanning.
373 */
374 if (dev == ctx->fsinfo.fs_datadev)
375 tree = ve->d_bad;
376 else if (dev == ctx->fsinfo.fs_rtdev)
377 tree = ve->r_bad;
378 else
379 tree = NULL;
380 if (tree) {
381 moveon = bitmap_set(tree, start, length);
382 if (!moveon)
383 str_errno(ctx, ctx->mntpoint);
384 }
385
386 snprintf(descr, DESCR_BUFSZ, _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "),
387 major(dev), minor(dev), start, length);
388
389 /* Go figure out which blocks are bad from the fsmap. */
390 memset(keys, 0, sizeof(struct fsmap) * 2);
391 keys->fmr_device = dev;
392 keys->fmr_physical = start;
393 (keys + 1)->fmr_device = dev;
394 (keys + 1)->fmr_physical = start + length - 1;
395 (keys + 1)->fmr_owner = ULLONG_MAX;
396 (keys + 1)->fmr_offset = ULLONG_MAX;
397 (keys + 1)->fmr_flags = UINT_MAX;
398 xfs_iterate_fsmap(ctx, descr, keys, xfs_check_rmap_error_report,
399 &start);
400 }
401
402 /* Schedule a read-verify of a (data block) extent. */
403 static bool
404 xfs_check_rmap(
405 struct scrub_ctx *ctx,
406 const char *descr,
407 struct fsmap *map,
408 void *arg)
409 {
410 struct xfs_verify_extent *ve = arg;
411 struct disk *disk;
412
413 dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
414 " offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
415 major(map->fmr_device), minor(map->fmr_device),
416 (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner,
417 (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length,
418 map->fmr_flags);
419
420 /* "Unknown" extents should be verified; they could be data. */
421 if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
422 map->fmr_owner == XFS_FMR_OWN_UNKNOWN)
423 map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER;
424
425 /*
426 * We only care about read-verifying data extents that have been
427 * written to disk. This means we can skip "special" owners
428 * (metadata), xattr blocks, unwritten extents, and extent maps.
429 * These should all get checked elsewhere in the scrubber.
430 */
431 if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK |
432 FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER))
433 goto out;
434
435 /* XXX: Filter out directory data blocks. */
436
437 /* Schedule the read verify command for (eventual) running. */
438 disk = xfs_dev_to_disk(ctx, map->fmr_device);
439
440 read_verify_schedule_io(ve->readverify, ptvar_get(ve->rvstate), disk,
441 map->fmr_physical, map->fmr_length, ve);
442
443 out:
444 /* Is this the last extent? Fire off the read. */
445 if (map->fmr_flags & FMR_OF_LAST)
446 read_verify_force_io(ve->readverify, ptvar_get(ve->rvstate));
447
448 return true;
449 }
450
451 /*
452 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
453 * do data checksums, we trust that the underlying storage will pass back
454 * an IO error if it can't retrieve whatever we previously stored there.
455 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
456 * scan the extent maps of the entire fs tree to figure (and the unlinked
457 * inodes) out which files are now broken.
458 */
459 bool
460 xfs_scan_blocks(
461 struct scrub_ctx *ctx)
462 {
463 struct xfs_verify_extent ve;
464 bool moveon;
465
466 ve.rvstate = ptvar_init(scrub_nproc(ctx), sizeof(struct read_verify));
467 if (!ve.rvstate) {
468 str_errno(ctx, ctx->mntpoint);
469 return false;
470 }
471
472 moveon = bitmap_init(&ve.d_bad);
473 if (!moveon) {
474 str_errno(ctx, ctx->mntpoint);
475 goto out_ve;
476 }
477
478 moveon = bitmap_init(&ve.r_bad);
479 if (!moveon) {
480 str_errno(ctx, ctx->mntpoint);
481 goto out_dbad;
482 }
483
484 ve.readverify = read_verify_pool_init(ctx, ctx->geo.blocksize,
485 xfs_check_rmap_ioerr, disk_heads(ctx->datadev));
486 if (!ve.readverify) {
487 moveon = false;
488 str_error(ctx, ctx->mntpoint,
489 _("Could not create media verifier."));
490 goto out_rbad;
491 }
492 moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &ve);
493 if (!moveon)
494 goto out_pool;
495 read_verify_pool_flush(ve.readverify);
496 ctx->bytes_checked += read_verify_bytes(ve.readverify);
497 read_verify_pool_destroy(ve.readverify);
498
499 /* Scan the whole dir tree to see what matches the bad extents. */
500 if (!bitmap_empty(ve.d_bad) || !bitmap_empty(ve.r_bad))
501 moveon = xfs_report_verify_errors(ctx, ve.d_bad, ve.r_bad);
502
503 bitmap_free(&ve.r_bad);
504 bitmap_free(&ve.d_bad);
505 ptvar_free(ve.rvstate);
506 return moveon;
507
508 out_pool:
509 read_verify_pool_destroy(ve.readverify);
510 out_rbad:
511 bitmap_free(&ve.r_bad);
512 out_dbad:
513 bitmap_free(&ve.d_bad);
514 out_ve:
515 ptvar_free(ve.rvstate);
516 return moveon;
517 }
518
519 /* Estimate how much work we're going to do. */
520 bool
521 xfs_estimate_verify_work(
522 struct scrub_ctx *ctx,
523 uint64_t *items,
524 unsigned int *nr_threads,
525 int *rshift)
526 {
527 unsigned long long d_blocks;
528 unsigned long long d_bfree;
529 unsigned long long r_blocks;
530 unsigned long long r_bfree;
531 unsigned long long f_files;
532 unsigned long long f_free;
533 bool moveon;
534
535 moveon = xfs_scan_estimate_blocks(ctx, &d_blocks, &d_bfree,
536 &r_blocks, &r_bfree, &f_files, &f_free);
537 if (!moveon)
538 return moveon;
539
540 *items = ((d_blocks - d_bfree) + (r_blocks - r_bfree)) << ctx->blocklog;
541 *nr_threads = disk_heads(ctx->datadev);
542 *rshift = 20;
543 return moveon;
544 }