]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - scrub/phase6.c
xfs_scrub: actually check for errors coming from close()
[thirdparty/xfsprogs-dev.git] / scrub / phase6.c
1 /*
2 * Copyright (C) 2018 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20 #include "xfs.h"
21 #include <stdint.h>
22 #include <dirent.h>
23 #include <sys/statvfs.h>
24 #include "handle.h"
25 #include "path.h"
26 #include "ptvar.h"
27 #include "workqueue.h"
28 #include "xfs_scrub.h"
29 #include "common.h"
30 #include "bitmap.h"
31 #include "disk.h"
32 #include "filemap.h"
33 #include "fscounters.h"
34 #include "inodes.h"
35 #include "read_verify.h"
36 #include "spacemap.h"
37 #include "vfs.h"
38
39 /*
40 * Phase 6: Verify data file integrity.
41 *
42 * Identify potential data block extents with GETFSMAP, then feed those
43 * extents to the read-verify pool to get the verify commands batched,
44 * issued, and (if there are problems) reported back to us. If there
45 * are errors, we'll record the bad regions and (if available) use rmap
46 * to tell us if metadata are now corrupt. Otherwise, we'll scan the
47 * whole directory tree looking for files that overlap the bad regions
48 * and report the paths of the now corrupt files.
49 */
50
51 /* Find the fd for a given device identifier. */
52 static struct disk *
53 xfs_dev_to_disk(
54 struct scrub_ctx *ctx,
55 dev_t dev)
56 {
57 if (dev == ctx->fsinfo.fs_datadev)
58 return ctx->datadev;
59 else if (dev == ctx->fsinfo.fs_logdev)
60 return ctx->logdev;
61 else if (dev == ctx->fsinfo.fs_rtdev)
62 return ctx->rtdev;
63 abort();
64 }
65
66 /* Find the device major/minor for a given file descriptor. */
67 static dev_t
68 xfs_disk_to_dev(
69 struct scrub_ctx *ctx,
70 struct disk *disk)
71 {
72 if (disk == ctx->datadev)
73 return ctx->fsinfo.fs_datadev;
74 else if (disk == ctx->logdev)
75 return ctx->fsinfo.fs_logdev;
76 else if (disk == ctx->rtdev)
77 return ctx->fsinfo.fs_rtdev;
78 abort();
79 }
80
81 struct owner_decode {
82 uint64_t owner;
83 const char *descr;
84 };
85
86 static const struct owner_decode special_owners[] = {
87 {XFS_FMR_OWN_FREE, "free space"},
88 {XFS_FMR_OWN_UNKNOWN, "unknown owner"},
89 {XFS_FMR_OWN_FS, "static FS metadata"},
90 {XFS_FMR_OWN_LOG, "journalling log"},
91 {XFS_FMR_OWN_AG, "per-AG metadata"},
92 {XFS_FMR_OWN_INOBT, "inode btree blocks"},
93 {XFS_FMR_OWN_INODES, "inodes"},
94 {XFS_FMR_OWN_REFC, "refcount btree"},
95 {XFS_FMR_OWN_COW, "CoW staging"},
96 {XFS_FMR_OWN_DEFECTIVE, "bad blocks"},
97 {0, NULL},
98 };
99
100 /* Decode a special owner. */
101 static const char *
102 xfs_decode_special_owner(
103 uint64_t owner)
104 {
105 const struct owner_decode *od = special_owners;
106
107 while (od->descr) {
108 if (od->owner == owner)
109 return od->descr;
110 od++;
111 }
112
113 return NULL;
114 }
115
116 /* Routines to translate bad physical extents into file paths and offsets. */
117
118 struct xfs_verify_error_info {
119 struct bitmap *d_bad; /* bytes */
120 struct bitmap *r_bad; /* bytes */
121 };
122
123 /* Report if this extent overlaps a bad region. */
124 static bool
125 xfs_report_verify_inode_bmap(
126 struct scrub_ctx *ctx,
127 const char *descr,
128 int fd,
129 int whichfork,
130 struct fsxattr *fsx,
131 struct xfs_bmap *bmap,
132 void *arg)
133 {
134 struct xfs_verify_error_info *vei = arg;
135 struct bitmap *bmp;
136
137 /* Only report errors for real extents. */
138 if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC))
139 return true;
140
141 if (fsx->fsx_xflags & FS_XFLAG_REALTIME)
142 bmp = vei->r_bad;
143 else
144 bmp = vei->d_bad;
145
146 if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length))
147 return true;
148
149 str_error(ctx, descr,
150 _("offset %llu failed read verification."), bmap->bm_offset);
151 return true;
152 }
153
154 /* Iterate the extent mappings of a file to report errors. */
155 static bool
156 xfs_report_verify_fd(
157 struct scrub_ctx *ctx,
158 const char *descr,
159 int fd,
160 void *arg)
161 {
162 struct xfs_bmap key = {0};
163 bool moveon;
164
165 /* data fork */
166 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_DATA_FORK, &key,
167 xfs_report_verify_inode_bmap, arg);
168 if (!moveon)
169 return false;
170
171 /* attr fork */
172 moveon = xfs_iterate_filemaps(ctx, descr, fd, XFS_ATTR_FORK, &key,
173 xfs_report_verify_inode_bmap, arg);
174 if (!moveon)
175 return false;
176 return true;
177 }
178
179 /* Report read verify errors in unlinked (but still open) files. */
180 static int
181 xfs_report_verify_inode(
182 struct scrub_ctx *ctx,
183 struct xfs_handle *handle,
184 struct xfs_bstat *bstat,
185 void *arg)
186 {
187 char descr[DESCR_BUFSZ];
188 bool moveon;
189 int fd;
190 int error;
191
192 snprintf(descr, DESCR_BUFSZ, _("inode %"PRIu64" (unlinked)"),
193 (uint64_t)bstat->bs_ino);
194
195 /* Ignore linked files and things we can't open. */
196 if (bstat->bs_nlink != 0)
197 return 0;
198 if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode))
199 return 0;
200
201 /* Try to open the inode. */
202 fd = xfs_open_handle(handle);
203 if (fd < 0) {
204 error = errno;
205 if (error == ESTALE)
206 return error;
207
208 str_info(ctx, descr,
209 _("Disappeared during read error reporting."));
210 return error;
211 }
212
213 /* Go find the badness. */
214 moveon = xfs_report_verify_fd(ctx, descr, fd, arg);
215 error = close(fd);
216 if (error)
217 str_errno(ctx, descr);
218
219 return moveon ? 0 : XFS_ITERATE_INODES_ABORT;
220 }
221
222 /* Scan a directory for matches in the read verify error list. */
223 static bool
224 xfs_report_verify_dir(
225 struct scrub_ctx *ctx,
226 const char *path,
227 int dir_fd,
228 void *arg)
229 {
230 return xfs_report_verify_fd(ctx, path, dir_fd, arg);
231 }
232
233 /*
234 * Scan the inode associated with a directory entry for matches with
235 * the read verify error list.
236 */
237 static bool
238 xfs_report_verify_dirent(
239 struct scrub_ctx *ctx,
240 const char *path,
241 int dir_fd,
242 struct dirent *dirent,
243 struct stat *sb,
244 void *arg)
245 {
246 bool moveon;
247 int fd;
248 int error;
249
250 /* Ignore things we can't open. */
251 if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode))
252 return true;
253
254 /* Ignore . and .. */
255 if (!strcmp(".", dirent->d_name) || !strcmp("..", dirent->d_name))
256 return true;
257
258 /*
259 * If we were given a dirent, open the associated file under
260 * dir_fd for badblocks scanning. If dirent is NULL, then it's
261 * the directory itself we want to scan.
262 */
263 fd = openat(dir_fd, dirent->d_name,
264 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY);
265 if (fd < 0)
266 return true;
267
268 /* Go find the badness. */
269 moveon = xfs_report_verify_fd(ctx, path, fd, arg);
270 if (moveon)
271 goto out;
272
273 out:
274 error = close(fd);
275 if (error)
276 str_errno(ctx, path);
277 return moveon;
278 }
279
280 /* Given bad extent lists for the data & rtdev, find bad files. */
281 static bool
282 xfs_report_verify_errors(
283 struct scrub_ctx *ctx,
284 struct bitmap *d_bad,
285 struct bitmap *r_bad)
286 {
287 struct xfs_verify_error_info vei;
288 bool moveon;
289
290 vei.d_bad = d_bad;
291 vei.r_bad = r_bad;
292
293 /* Scan the directory tree to get file paths. */
294 moveon = scan_fs_tree(ctx, xfs_report_verify_dir,
295 xfs_report_verify_dirent, &vei);
296 if (!moveon)
297 return false;
298
299 /* Scan for unlinked files. */
300 return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, &vei);
301 }
302
303 /* Verify disk blocks with GETFSMAP */
304
305 struct xfs_verify_extent {
306 struct read_verify_pool *readverify;
307 struct ptvar *rvstate;
308 struct bitmap *d_bad; /* bytes */
309 struct bitmap *r_bad; /* bytes */
310 };
311
312 /* Report an IO error resulting from read-verify based off getfsmap. */
313 static bool
314 xfs_check_rmap_error_report(
315 struct scrub_ctx *ctx,
316 const char *descr,
317 struct fsmap *map,
318 void *arg)
319 {
320 const char *type;
321 char buf[32];
322 uint64_t err_physical = *(uint64_t *)arg;
323 uint64_t err_off;
324
325 if (err_physical > map->fmr_physical)
326 err_off = err_physical - map->fmr_physical;
327 else
328 err_off = 0;
329
330 snprintf(buf, 32, _("disk offset %"PRIu64),
331 (uint64_t)BTOBB(map->fmr_physical + err_off));
332
333 if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) {
334 type = xfs_decode_special_owner(map->fmr_owner);
335 str_error(ctx, buf,
336 _("%s failed read verification."),
337 type);
338 }
339
340 /*
341 * XXX: If we had a getparent() call we could report IO errors
342 * efficiently. Until then, we'll have to scan the dir tree
343 * to find the bad file's pathname.
344 */
345
346 return true;
347 }
348
349 /*
350 * Remember a read error for later, and see if rmap will tell us about the
351 * owner ahead of time.
352 */
353 static void
354 xfs_check_rmap_ioerr(
355 struct scrub_ctx *ctx,
356 struct disk *disk,
357 uint64_t start,
358 uint64_t length,
359 int error,
360 void *arg)
361 {
362 struct fsmap keys[2];
363 char descr[DESCR_BUFSZ];
364 struct xfs_verify_extent *ve = arg;
365 struct bitmap *tree;
366 dev_t dev;
367 bool moveon;
368
369 dev = xfs_disk_to_dev(ctx, disk);
370
371 /*
372 * If we don't have parent pointers, save the bad extent for
373 * later rescanning.
374 */
375 if (dev == ctx->fsinfo.fs_datadev)
376 tree = ve->d_bad;
377 else if (dev == ctx->fsinfo.fs_rtdev)
378 tree = ve->r_bad;
379 else
380 tree = NULL;
381 if (tree) {
382 moveon = bitmap_set(tree, start, length);
383 if (!moveon)
384 str_errno(ctx, ctx->mntpoint);
385 }
386
387 snprintf(descr, DESCR_BUFSZ, _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "),
388 major(dev), minor(dev), start, length);
389
390 /* Go figure out which blocks are bad from the fsmap. */
391 memset(keys, 0, sizeof(struct fsmap) * 2);
392 keys->fmr_device = dev;
393 keys->fmr_physical = start;
394 (keys + 1)->fmr_device = dev;
395 (keys + 1)->fmr_physical = start + length - 1;
396 (keys + 1)->fmr_owner = ULLONG_MAX;
397 (keys + 1)->fmr_offset = ULLONG_MAX;
398 (keys + 1)->fmr_flags = UINT_MAX;
399 xfs_iterate_fsmap(ctx, descr, keys, xfs_check_rmap_error_report,
400 &start);
401 }
402
403 /* Schedule a read-verify of a (data block) extent. */
404 static bool
405 xfs_check_rmap(
406 struct scrub_ctx *ctx,
407 const char *descr,
408 struct fsmap *map,
409 void *arg)
410 {
411 struct xfs_verify_extent *ve = arg;
412 struct disk *disk;
413
414 dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
415 " offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
416 major(map->fmr_device), minor(map->fmr_device),
417 (uint64_t)map->fmr_physical, (int64_t)map->fmr_owner,
418 (uint64_t)map->fmr_offset, (uint64_t)map->fmr_length,
419 map->fmr_flags);
420
421 /* "Unknown" extents should be verified; they could be data. */
422 if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
423 map->fmr_owner == XFS_FMR_OWN_UNKNOWN)
424 map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER;
425
426 /*
427 * We only care about read-verifying data extents that have been
428 * written to disk. This means we can skip "special" owners
429 * (metadata), xattr blocks, unwritten extents, and extent maps.
430 * These should all get checked elsewhere in the scrubber.
431 */
432 if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK |
433 FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER))
434 goto out;
435
436 /* XXX: Filter out directory data blocks. */
437
438 /* Schedule the read verify command for (eventual) running. */
439 disk = xfs_dev_to_disk(ctx, map->fmr_device);
440
441 read_verify_schedule_io(ve->readverify, ptvar_get(ve->rvstate), disk,
442 map->fmr_physical, map->fmr_length, ve);
443
444 out:
445 /* Is this the last extent? Fire off the read. */
446 if (map->fmr_flags & FMR_OF_LAST)
447 read_verify_force_io(ve->readverify, ptvar_get(ve->rvstate));
448
449 return true;
450 }
451
452 /*
453 * Read verify all the file data blocks in a filesystem. Since XFS doesn't
454 * do data checksums, we trust that the underlying storage will pass back
455 * an IO error if it can't retrieve whatever we previously stored there.
456 * If we hit an IO error, we'll record the bad blocks in a bitmap and then
457 * scan the extent maps of the entire fs tree to figure (and the unlinked
458 * inodes) out which files are now broken.
459 */
460 bool
461 xfs_scan_blocks(
462 struct scrub_ctx *ctx)
463 {
464 struct xfs_verify_extent ve;
465 bool moveon;
466
467 ve.rvstate = ptvar_init(scrub_nproc(ctx), sizeof(struct read_verify));
468 if (!ve.rvstate) {
469 str_errno(ctx, ctx->mntpoint);
470 return false;
471 }
472
473 moveon = bitmap_init(&ve.d_bad);
474 if (!moveon) {
475 str_errno(ctx, ctx->mntpoint);
476 goto out_ve;
477 }
478
479 moveon = bitmap_init(&ve.r_bad);
480 if (!moveon) {
481 str_errno(ctx, ctx->mntpoint);
482 goto out_dbad;
483 }
484
485 ve.readverify = read_verify_pool_init(ctx, ctx->geo.blocksize,
486 xfs_check_rmap_ioerr, disk_heads(ctx->datadev));
487 if (!ve.readverify) {
488 moveon = false;
489 str_info(ctx, ctx->mntpoint,
490 _("Could not create media verifier."));
491 goto out_rbad;
492 }
493 moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &ve);
494 if (!moveon)
495 goto out_pool;
496 read_verify_pool_flush(ve.readverify);
497 ctx->bytes_checked += read_verify_bytes(ve.readverify);
498 read_verify_pool_destroy(ve.readverify);
499
500 /* Scan the whole dir tree to see what matches the bad extents. */
501 if (!bitmap_empty(ve.d_bad) || !bitmap_empty(ve.r_bad))
502 moveon = xfs_report_verify_errors(ctx, ve.d_bad, ve.r_bad);
503
504 bitmap_free(&ve.r_bad);
505 bitmap_free(&ve.d_bad);
506 ptvar_free(ve.rvstate);
507 return moveon;
508
509 out_pool:
510 read_verify_pool_destroy(ve.readverify);
511 out_rbad:
512 bitmap_free(&ve.r_bad);
513 out_dbad:
514 bitmap_free(&ve.d_bad);
515 out_ve:
516 ptvar_free(ve.rvstate);
517 return moveon;
518 }
519
520 /* Estimate how much work we're going to do. */
521 bool
522 xfs_estimate_verify_work(
523 struct scrub_ctx *ctx,
524 uint64_t *items,
525 unsigned int *nr_threads,
526 int *rshift)
527 {
528 unsigned long long d_blocks;
529 unsigned long long d_bfree;
530 unsigned long long r_blocks;
531 unsigned long long r_bfree;
532 unsigned long long f_files;
533 unsigned long long f_free;
534 bool moveon;
535
536 moveon = xfs_scan_estimate_blocks(ctx, &d_blocks, &d_bfree,
537 &r_blocks, &r_bfree, &f_files, &f_free);
538 if (!moveon)
539 return moveon;
540
541 *items = ((d_blocks - d_bfree) + (r_blocks - r_bfree)) << ctx->blocklog;
542 *nr_threads = disk_heads(ctx->datadev);
543 *rshift = 20;
544 return moveon;
545 }