scrub/phase4.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (C) 2018-2024 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <djwong@kernel.org>
   5  */
   6 #include "xfs.h"
   7 #include <stdint.h>
   8 #include <dirent.h>
   9 #include <sys/types.h>
  10 #include <sys/statvfs.h>
  11 #include "list.h"
  12 #include "libfrog/paths.h"
  13 #include "libfrog/workqueue.h"
  14 #include "xfs_scrub.h"
  15 #include "common.h"
  16 #include "progress.h"
  17 #include "scrub.h"
  18 #include "repair.h"
  19 #include "vfs.h"
  20 #include "atomic.h"
  21
  22 /* Phase 4: Repair filesystem. */
  23
  24 struct repair_list_schedule {
  25         struct action_list              *repair_list;
  26
  27         /* Action items that we could not resolve and want to try again. */
  28         struct action_list              requeue_list;
  29
  30         pthread_mutex_t                 lock;
  31
  32         /* Workers use this to signal the scheduler when all work is done. */
  33         pthread_cond_t                  done;
  34
  35         /* Number of workers that are still running. */
  36         unsigned int                    workers;
  37
  38         /* Or should we all abort? */
  39         bool                            aborted;
  40
  41         /* Did we make any progress this round? */
  42         bool                            made_progress;
  43 };
  44
  45 /* Try to repair as many things on our list as we can. */
  46 static void
  47 repair_list_worker(
  48         struct workqueue                *wq,
  49         xfs_agnumber_t                  agno,
  50         void                            *priv)
  51 {
  52         struct repair_list_schedule     *rls = priv;
  53         struct scrub_ctx                *ctx = (struct scrub_ctx *)wq->wq_ctx;
  54
  55         pthread_mutex_lock(&rls->lock);
  56         while (!rls->aborted) {
  57                 struct action_item      *aitem;
  58                 enum tryrepair_outcome  outcome;
  59                 int                     ret;
  60
  61                 aitem = action_list_pop(rls->repair_list);
  62                 if (!aitem)
  63                         break;
  64
  65                 pthread_mutex_unlock(&rls->lock);
  66                 ret = action_item_try_repair(ctx, aitem, &outcome);
  67                 pthread_mutex_lock(&rls->lock);
  68
  69                 if (ret) {
  70                         rls->aborted = true;
  71                         free(aitem);
  72                         break;
  73                 }
  74
  75                 switch (outcome) {
  76                 case TR_REQUEUE:
  77                         /*
  78                          * Partial progress.  Make a note of that and requeue
  79                          * this item for the next round.
  80                          */
  81                         rls->made_progress = true;
  82                         action_list_add(&rls->requeue_list, aitem);
  83                         break;
  84                 case TR_NOPROGRESS:
  85                         /*
  86                          * No progress.  Requeue this item for a later round,
  87                          * which could happen if something else makes progress.
  88                          */
  89                         action_list_add(&rls->requeue_list, aitem);
  90                         break;
  91                 case TR_REPAIRED:
  92                         /*
  93                          * All repairs for this item completed.  Free the item,
  94                          * and remember that progress was made.
  95                          */
  96                         rls->made_progress = true;
  97                         free(aitem);
  98                         break;
  99                 }
 100         }
 101
 102         rls->workers--;
 103         if (rls->workers == 0)
 104                 pthread_cond_broadcast(&rls->done);
 105         pthread_mutex_unlock(&rls->lock);
 106 }
 107
 108 /*
 109  * Schedule repair list workers.  Returns 1 if we made progress, 0 if we
 110  * did not, or -1 if we need to abort everything.
 111  */
 112 static int
 113 repair_list_schedule(
 114         struct scrub_ctx                *ctx,
 115         struct workqueue                *wq,
 116         struct action_list              *repair_list)
 117 {
 118         struct repair_list_schedule     rls = {
 119                 .lock                   = PTHREAD_MUTEX_INITIALIZER,
 120                 .done                   = PTHREAD_COND_INITIALIZER,
 121                 .repair_list            = repair_list,
 122         };
 123         unsigned int                    i;
 124         unsigned int                    nr_workers = scrub_nproc(ctx);
 125         bool                            made_any_progress = false;
 126         int                             ret = 0;
 127
 128         if (action_list_empty(repair_list))
 129                 return 0;
 130
 131         action_list_init(&rls.requeue_list);
 132
 133         /*
 134          * Use the workers to run through the entire repair list once.  Requeue
 135          * anything that did not make progress, and keep trying as long as the
 136          * workers made any kind of progress.
 137          */
 138         do {
 139                 rls.made_progress = false;
 140
 141                 /* Start all the worker threads. */
 142                 for (i = 0; i < nr_workers; i++) {
 143                         pthread_mutex_lock(&rls.lock);
 144                         rls.workers++;
 145                         pthread_mutex_unlock(&rls.lock);
 146
 147                         ret = -workqueue_add(wq, repair_list_worker, 0, &rls);
 148                         if (ret) {
 149                                 str_liberror(ctx, ret,
 150  _("queueing repair list worker"));
 151                                 pthread_mutex_lock(&rls.lock);
 152                                 rls.workers--;
 153                                 pthread_mutex_unlock(&rls.lock);
 154                                 break;
 155                         }
 156                 }
 157
 158                 /* Wait for all worker functions to return. */
 159                 pthread_mutex_lock(&rls.lock);
 160                 while (rls.workers > 0)
 161                         pthread_cond_wait(&rls.done, &rls.lock);
 162                 pthread_mutex_unlock(&rls.lock);
 163
 164                 action_list_merge(repair_list, &rls.requeue_list);
 165
 166                 if (ret || rls.aborted)
 167                         return -1;
 168                 if (rls.made_progress)
 169                         made_any_progress = true;
 170         } while (rls.made_progress && !action_list_empty(repair_list));
 171
 172         if (made_any_progress)
 173                return 1;
 174         return 0;
 175 }
 176
 177 /* Process both repair lists. */
 178 static int
 179 repair_everything(
 180         struct scrub_ctx                *ctx)
 181 {
 182         struct workqueue                wq;
 183         int                             fixed_anything;
 184         int                             ret;
 185
 186         ret = -workqueue_create(&wq, (struct xfs_mount *)ctx,
 187                         scrub_nproc_workqueue(ctx));
 188         if (ret) {
 189                 str_liberror(ctx, ret, _("creating repair workqueue"));
 190                 return ret;
 191         }
 192
 193         /*
 194          * Try to fix everything on the space metadata repair list and then the
 195          * file repair list until we stop making progress.  These repairs can
 196          * be threaded, if the user desires.
 197          */
 198         do {
 199                 fixed_anything = 0;
 200
 201                 ret = repair_list_schedule(ctx, &wq, ctx->fs_repair_list);
 202                 if (ret < 0)
 203                         break;
 204                 if (ret == 1)
 205                         fixed_anything++;
 206
 207                 ret = repair_list_schedule(ctx, &wq, ctx->file_repair_list);
 208                 if (ret < 0)
 209                         break;
 210                 if (ret == 1)
 211                         fixed_anything++;
 212         } while (fixed_anything > 0);
 213
 214         ret = -workqueue_terminate(&wq);
 215         if (ret)
 216                 str_liberror(ctx, ret, _("finishing repair work"));
 217         workqueue_destroy(&wq);
 218
 219         if (ret < 0)
 220                 return ret;
 221
 222         /*
 223          * Combine both repair lists and repair everything serially.  This is
 224          * the last chance to fix things.
 225          */
 226         action_list_merge(ctx->fs_repair_list, ctx->file_repair_list);
 227         return action_list_process(ctx, ctx->fs_repair_list, XRM_FINAL_WARNING);
 228 }
 229
 230 /* Fix everything that needs fixing. */
 231 int
 232 phase4_func(
 233         struct scrub_ctx        *ctx)
 234 {
 235         struct xfs_fsop_geom    fsgeom;
 236         struct scrub_item       sri;
 237         int                     ret;
 238
 239         if (action_list_empty(ctx->fs_repair_list) &&
 240             action_list_empty(ctx->file_repair_list))
 241                 return 0;
 242
 243         if (ctx->mode == SCRUB_MODE_PREEN && ctx->corruptions_found) {
 244                 str_info(ctx, ctx->mntpoint,
 245  _("Corruptions found; will not optimize.  Re-run without -p.\n"));
 246                 return 0;
 247         }
 248
 249         /*
 250          * Check the resource usage counters early.  Normally we do this during
 251          * phase 7, but some of the cross-referencing requires fairly accurate
 252          * summary counters.  Check and try to repair them now to minimize the
 253          * chance that repairs of primary metadata fail due to secondary
 254          * metadata.  If repairs fails, we'll come back during phase 7.
 255          */
 256         scrub_item_init_fs(&sri);
 257         scrub_item_schedule(&sri, XFS_SCRUB_TYPE_FSCOUNTERS);
 258
 259         /*
 260          * Repair possibly bad quota counts before starting other repairs,
 261          * because wildly incorrect quota counts can cause shutdowns.
 262          * Quotacheck scans all inodes, so we only want to do it if we know
 263          * it's sick.
 264          */
 265         ret = xfrog_geometry(ctx->mnt.fd, &fsgeom);
 266         if (ret)
 267                 return ret;
 268
 269         if (fsgeom.sick & XFS_FSOP_GEOM_SICK_QUOTACHECK)
 270                 scrub_item_schedule(&sri, XFS_SCRUB_TYPE_QUOTACHECK);
 271
 272         /* Check and repair counters before starting on the rest. */
 273         ret = scrub_item_check(ctx, &sri);
 274         if (ret)
 275                 return ret;
 276         ret = repair_item_corruption(ctx, &sri);
 277         if (ret)
 278                 return ret;
 279
 280         return repair_everything(ctx);
 281 }
 282
 283 /* Estimate how much work we're going to do. */
 284 int
 285 phase4_estimate(
 286         struct scrub_ctx        *ctx,
 287         uint64_t                *items,
 288         unsigned int            *nr_threads,
 289         int                     *rshift)
 290 {
 291         unsigned long long      need_fixing;
 292
 293         /* Everything on the repair lis. */
 294         need_fixing = action_list_length(ctx->fs_repair_list) +
 295                       action_list_length(ctx->file_repair_list);
 296
 297         *items = need_fixing;
 298         *nr_threads = scrub_nproc(ctx) + 1;
 299         *rshift = 0;
 300         return 0;
 301 }