scrub/phase1.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (C) 2018-2024 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <djwong@kernel.org>
   5  */
   6 #include "xfs.h"
   7 #include <unistd.h>
   8 #include <sys/types.h>
   9 #include <sys/time.h>
  10 #include <sys/resource.h>
  11 #include <sys/statvfs.h>
  12 #include <fcntl.h>
  13 #include <dirent.h>
  14 #include <stdint.h>
  15 #include <pthread.h>
  16 #include "libfrog/util.h"
  17 #include "libfrog/workqueue.h"
  18 #include "input.h"
  19 #include "libfrog/paths.h"
  20 #include "handle.h"
  21 #include "bitops.h"
  22 #include "libfrog/avl64.h"
  23 #include "list.h"
  24 #include "xfs_scrub.h"
  25 #include "common.h"
  26 #include "disk.h"
  27 #include "scrub.h"
  28 #include "repair.h"
  29 #include "libfrog/fsgeom.h"
  30 #include "xfs_errortag.h"
  31
  32 /* Phase 1: Find filesystem geometry (and clean up after) */
  33
  34 /* Shut down the filesystem. */
  35 void
  36 xfs_shutdown_fs(
  37         struct scrub_ctx                *ctx)
  38 {
  39         int                             flag;
  40
  41         flag = XFS_FSOP_GOING_FLAGS_LOGFLUSH;
  42         str_info(ctx, ctx->mntpoint, _("Shutting down filesystem!"));
  43         if (ioctl(ctx->mnt.fd, XFS_IOC_GOINGDOWN, &flag))
  44                 str_errno(ctx, ctx->mntpoint);
  45 }
  46
  47 /* Clean up the XFS-specific state data. */
  48 int
  49 scrub_cleanup(
  50         struct scrub_ctx        *ctx)
  51 {
  52         int                     error;
  53
  54         action_lists_free(&ctx->action_lists);
  55         if (ctx->fshandle)
  56                 free_handle(ctx->fshandle, ctx->fshandle_len);
  57         if (ctx->rtdev)
  58                 disk_close(ctx->rtdev);
  59         if (ctx->logdev)
  60                 disk_close(ctx->logdev);
  61         if (ctx->datadev)
  62                 disk_close(ctx->datadev);
  63         fshandle_destroy();
  64         error = -xfd_close(&ctx->mnt);
  65         if (error)
  66                 str_liberror(ctx, error, _("closing mountpoint fd"));
  67         fs_table_destroy();
  68
  69         return error;
  70 }
  71
  72 /* Decide if we're using FORCE_REBUILD or injecting FORCE_REPAIR. */
  73 static int
  74 enable_force_repair(
  75         struct scrub_ctx                *ctx)
  76 {
  77         struct xfs_error_injection      inject = {
  78                 .fd                     = ctx->mnt.fd,
  79                 .errtag                 = XFS_ERRTAG_FORCE_SCRUB_REPAIR,
  80         };
  81         int                             error;
  82
  83         use_force_rebuild = can_force_rebuild(ctx);
  84         if (use_force_rebuild)
  85                 return 0;
  86
  87         error = ioctl(ctx->mnt.fd, XFS_IOC_ERROR_INJECTION, &inject);
  88         if (error)
  89                 str_errno(ctx, _("force_repair"));
  90         return error;
  91 }
  92
  93 /*
  94  * Bind to the mountpoint, read the XFS geometry, bind to the block devices.
  95  * Anything we've already built will be cleaned up by scrub_cleanup.
  96  */
  97 int
  98 phase1_func(
  99         struct scrub_ctx                *ctx)
 100 {
 101         int                             error;
 102
 103         /*
 104          * Open the directory with O_NOATIME.  For mountpoints owned
 105          * by root, this should be sufficient to ensure that we have
 106          * CAP_SYS_ADMIN, which we probably need to do anything fancy
 107          * with the (XFS driver) kernel.
 108          */
 109         error = -xfd_open(&ctx->mnt, ctx->mntpoint,
 110                         O_RDONLY | O_NOATIME | O_DIRECTORY);
 111         if (error) {
 112                 if (error == EPERM)
 113                         str_error(ctx, ctx->mntpoint,
 114 _("Must be root to run scrub."));
 115                 else if (error == ENOTTY)
 116                         str_error(ctx, ctx->mntpoint,
 117 _("Not an XFS filesystem."));
 118                 else
 119                         str_liberror(ctx, error, ctx->mntpoint);
 120                 return error;
 121         }
 122
 123         error = fstat(ctx->mnt.fd, &ctx->mnt_sb);
 124         if (error) {
 125                 str_errno(ctx, ctx->mntpoint);
 126                 return error;
 127         }
 128         error = fstatvfs(ctx->mnt.fd, &ctx->mnt_sv);
 129         if (error) {
 130                 str_errno(ctx, ctx->mntpoint);
 131                 return error;
 132         }
 133         error = fstatfs(ctx->mnt.fd, &ctx->mnt_sf);
 134         if (error) {
 135                 str_errno(ctx, ctx->mntpoint);
 136                 return error;
 137         }
 138
 139         /*
 140          * Flush everything out to disk before we start checking.
 141          * This seems to reduce the incidence of stale file handle
 142          * errors when we open things by handle.
 143          */
 144         error = syncfs(ctx->mnt.fd);
 145         if (error) {
 146                 str_errno(ctx, ctx->mntpoint);
 147                 return error;
 148         }
 149
 150         error = action_lists_alloc(ctx->mnt.fsgeom.agcount,
 151                         &ctx->action_lists);
 152         if (error) {
 153                 str_liberror(ctx, error, _("allocating action lists"));
 154                 return error;
 155         }
 156
 157         error = path_to_fshandle(ctx->mntpoint, &ctx->fshandle,
 158                         &ctx->fshandle_len);
 159         if (error) {
 160                 str_errno(ctx, _("getting fshandle"));
 161                 return error;
 162         }
 163
 164         /* Do we have kernel-assisted metadata scrubbing? */
 165         if (!can_scrub_fs_metadata(ctx) || !can_scrub_inode(ctx) ||
 166             !can_scrub_bmap(ctx) || !can_scrub_dir(ctx) ||
 167             !can_scrub_attr(ctx) || !can_scrub_symlink(ctx) ||
 168             !can_scrub_parent(ctx)) {
 169                 str_error(ctx, ctx->mntpoint,
 170 _("Kernel metadata scrubbing facility is not available."));
 171                 return ECANCELED;
 172         }
 173
 174         /* Do we need kernel-assisted metadata repair? */
 175         if (ctx->mode != SCRUB_MODE_DRY_RUN && !xfs_can_repair(ctx)) {
 176                 str_error(ctx, ctx->mntpoint,
 177 _("Kernel metadata repair facility is not available.  Use -n to scrub."));
 178                 return ECANCELED;
 179         }
 180
 181         if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR")) {
 182                 error = enable_force_repair(ctx);
 183                 if (error)
 184                         return error;
 185         }
 186
 187         /* Did we find the log and rt devices, if they're present? */
 188         if (ctx->mnt.fsgeom.logstart == 0 && ctx->fsinfo.fs_log == NULL) {
 189                 str_error(ctx, ctx->mntpoint,
 190 _("Unable to find log device path."));
 191                 return ECANCELED;
 192         }
 193         if (ctx->mnt.fsgeom.rtblocks && ctx->fsinfo.fs_rt == NULL) {
 194                 str_error(ctx, ctx->mntpoint,
 195 _("Unable to find realtime device path."));
 196                 return ECANCELED;
 197         }
 198
 199         /* Open the raw devices. */
 200         ctx->datadev = disk_open(ctx->fsinfo.fs_name);
 201         if (!ctx->datadev) {
 202                 str_error(ctx, ctx->mntpoint, _("Unable to open data device."));
 203                 return ECANCELED;
 204         }
 205
 206         ctx->nr_io_threads = disk_heads(ctx->datadev);
 207         if (verbose) {
 208                 fprintf(stdout, _("%s: using %d threads to scrub.\n"),
 209                                 ctx->mntpoint, scrub_nproc(ctx));
 210                 fflush(stdout);
 211         }
 212
 213         if (ctx->fsinfo.fs_log) {
 214                 ctx->logdev = disk_open(ctx->fsinfo.fs_log);
 215                 if (!ctx->logdev) {
 216                         str_error(ctx, ctx->mntpoint,
 217                                 _("Unable to open external log device."));
 218                         return ECANCELED;
 219                 }
 220         }
 221         if (ctx->fsinfo.fs_rt) {
 222                 ctx->rtdev = disk_open(ctx->fsinfo.fs_rt);
 223                 if (!ctx->rtdev) {
 224                         str_error(ctx, ctx->mntpoint,
 225                                 _("Unable to open realtime device."));
 226                         return ECANCELED;
 227                 }
 228         }
 229
 230         /*
 231          * Everything's set up, which means any failures recorded after
 232          * this point are most probably corruption errors (as opposed to
 233          * purely setup errors).
 234          */
 235         log_info(ctx, _("Invoking online scrub."), ctx);
 236         ctx->scrub_setup_succeeded = true;
 237         return 0;
 238 }