]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
mkfs: allow sizing allocation groups for concurrency
authorDarrick J. Wong <djwong@kernel.org>
Fri, 25 Feb 2022 00:45:16 +0000 (16:45 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Thu, 17 Mar 2022 21:40:26 +0000 (14:40 -0700)
Add a -d concurrency= option to mkfs so that sysadmins can configure the
filesystem so that there are enough allocation groups that the specified
number of threads can (in theory) can find an uncontended group to
allocate space from.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
man/man8/mkfs.xfs.8.in
mkfs/xfs_mkfs.c

index 7b7e4f48d6f3e50066e6111083fbb2ffd812b157..a9a65c8750606bfd4f72748fd624656a7dbddd0e 100644 (file)
@@ -509,6 +509,33 @@ directories.
 By default,
 .B mkfs.xfs
 will not enable DAX mode.
+.TP
+.BI concurrency= value
+Create enough allocation groups to handle the desired level of concurrency.
+The goal of this calculation scheme is to set the number of allocation groups
+to an integer multiple of the number of writer threads desired, to minimize
+contention of AG locks.
+This scheme will neither create fewer AGs than would be created by the default
+configuration, nor will it create AGs smaller than 4GB.
+This option is not compatible with the
+.B agcount
+or
+.B agsize
+options.
+The magic value
+.I nr_cpus
+or
+.I 1
+or no value at all will set this parameter to the number of active processors
+in the system.
+If the kernel advertises that the data device is a non-mechanical storage
+device,
+.B mkfs.xfs
+will use this new geometry calculation scheme.
+The magic value of
+.I 0
+forces use of the older AG geometry calculations that is used for mechanical
+storage.
 .RE
 .TP
 .B \-f
index 15dcf48a03a01c011f6557401c2ea58ad83cdf51..0d5cb17d48bb0c4e463d79afdaed9ca669b80c69 100644 (file)
@@ -76,6 +76,7 @@ enum {
        D_EXTSZINHERIT,
        D_COWEXTSIZE,
        D_DAXINHERIT,
+       D_CONCURRENCY,
        D_MAX_OPTS,
 };
 
@@ -305,10 +306,12 @@ static struct opt_params dopts = {
                [D_EXTSZINHERIT] = "extszinherit",
                [D_COWEXTSIZE] = "cowextsize",
                [D_DAXINHERIT] = "daxinherit",
+               [D_CONCURRENCY] = "concurrency",
        },
        .subopt_params = {
                { .index = D_AGCOUNT,
                  .conflicts = { { &dopts, D_AGSIZE },
+                                { &dopts, D_CONCURRENCY },
                                 { NULL, LAST_CONFLICT } },
                  .minval = 1,
                  .maxval = XFS_MAX_AGNUMBER,
@@ -351,6 +354,7 @@ static struct opt_params dopts = {
                },
                { .index = D_AGSIZE,
                  .conflicts = { { &dopts, D_AGCOUNT },
+                                { &dopts, D_CONCURRENCY },
                                 { NULL, LAST_CONFLICT } },
                  .convert = true,
                  .minval = XFS_AG_MIN_BYTES,
@@ -426,6 +430,14 @@ static struct opt_params dopts = {
                  .maxval = 1,
                  .defaultval = 1,
                },
+               { .index = D_CONCURRENCY,
+                 .conflicts = { { &dopts, D_AGCOUNT },
+                                { &dopts, D_AGSIZE },
+                                { NULL, LAST_CONFLICT } },
+                 .minval = 0,
+                 .maxval = INT_MAX,
+                 .defaultval = 1,
+               },
        },
 };
 
@@ -839,6 +851,7 @@ struct cli_params {
        int     loginternal;
        int     lsunit;
        int     has_warranty;
+       int     data_concurrency;
 
        /* parameters where 0 is not a valid value */
        int64_t agcount;
@@ -941,7 +954,7 @@ usage( void )
                            inobtcount=0|1,bigtime=0|1]\n\
 /* data subvol */      [-d agcount=n,agsize=n,file,name=xxx,size=num,\n\
                            (sunit=value,swidth=value|su=num,sw=num|noalign),\n\
-                           sectsize=num\n\
+                           sectsize=num,concurrency=num]\n\
 /* force overwrite */  [-f]\n\
 /* inode size */       [-i perblock=n|size=num,maxpct=n,attr=0|1|2,\n\
                            projid32bit=0|1,sparse=0|1]\n\
@@ -1038,6 +1051,19 @@ invalid_cfgfile_opt(
                filename, section, name, value);
 }
 
+static int
+nr_cpus(void)
+{
+       static long     cpus = -1;
+
+       if (cpus < 0)
+               cpus = sysconf(_SC_NPROCESSORS_ONLN);
+       if (cpus < 0)
+               return 0;
+
+       return min(INT_MAX, cpus);
+}
+
 static void
 check_device_type(
        const char      *name,
@@ -1498,6 +1524,30 @@ cfgfile_opts_parser(
        return 0;
 }
 
+static void
+set_data_concurrency(
+       struct opt_params       *opts,
+       int                     subopt,
+       struct cli_params       *cli,
+       const char              *value)
+{
+       long long               optnum;
+
+       /*
+        * "nr_cpus" or "1" means set the concurrency level to the CPU count.
+        * If this cannot be determined, fall back to the default AG geometry.
+        */
+       if (!strcmp(value, "nr_cpus"))
+               optnum = 1;
+       else
+               optnum = getnum(value, opts, subopt);
+
+       if (optnum == 1)
+               cli->data_concurrency = nr_cpus();
+       else
+               cli->data_concurrency = optnum;
+}
+
 static int
 data_opts_parser(
        struct opt_params       *opts,
@@ -1569,6 +1619,9 @@ data_opts_parser(
                else
                        cli->fsx.fsx_xflags &= ~FS_XFLAG_DAX;
                break;
+       case D_CONCURRENCY:
+               set_data_concurrency(opts, subopt, cli, value);
+               break;
        default:
                return -EINVAL;
        }
@@ -2953,12 +3006,103 @@ reported by the device (%u).\n"),
                                                NBBY * cfg->blocksize);
 }
 
+static bool
+ddev_is_solidstate(
+       struct libxfs_xinit     *xi)
+{
+       int                     fd;
+       unsigned short          rotational = 1;
+       int                     error;
+
+       fd = libxfs_device_to_fd(xi->ddev);
+       if (fd < 0)
+               return false;
+
+       error = ioctl(fd, BLKROTATIONAL, &rotational);
+       if (error)
+               return false;
+
+       return rotational == 0;
+}
+
+static void
+calc_data_concurrency_ag_geometry(
+       struct mkfs_params      *cfg,
+       struct cli_params       *cli,
+       struct libxfs_xinit     *xi)
+{
+       uint64_t                try_agsize;
+       uint64_t                def_agsize;
+       uint64_t                def_agcount;
+       int                     nr_threads = cli->data_concurrency;
+       int                     try_threads;
+
+       calc_default_ag_geometry(cfg->blocklog, cfg->dblocks, cfg->dsunit,
+                       &def_agsize, &def_agcount);
+       try_agsize = def_agsize;
+
+       /*
+        * If the caller doesn't have a particular concurrency level in mind,
+        * set it to the number of CPUs in the system.
+        */
+       if (nr_threads < 0)
+               nr_threads = nr_cpus();
+
+       /*
+        * Don't create fewer AGs than what we would create with the default
+        * geometry calculation.
+        */
+       if (!nr_threads || nr_threads < def_agcount)
+               goto out;
+
+       /*
+        * Let's try matching the number of AGs to the number of CPUs.  If the
+        * proposed geometry results in AGs smaller than 4GB, reduce the AG
+        * count until we have 4GB AGs.  Don't let the thread count go below
+        * the default geometry calculation.
+        */
+       try_threads = nr_threads;
+       try_agsize = cfg->dblocks / try_threads;
+       if (try_agsize < GIGABYTES(4, cfg->blocklog)) {
+               do {
+                       try_threads--;
+                       if (try_threads <= def_agcount) {
+                               try_agsize = def_agsize;
+                               goto out;
+                       }
+
+                       try_agsize = cfg->dblocks / try_threads;
+               } while (try_agsize < GIGABYTES(4, cfg->blocklog));
+               goto out;
+       }
+
+       /*
+        * For large filesystems we try to ensure that the AG count is a
+        * multiple of the desired thread count.  Specifically, if the proposed
+        * AG size is larger than both the maximum AG size and the AG size we
+        * would have gotten with the defaults, add the thread count to the AG
+        * count until we get an AG size below both of those factors.
+        */
+       while (try_agsize > XFS_AG_MAX_BLOCKS(cfg->blocklog) &&
+              try_agsize > def_agsize) {
+               try_threads += nr_threads;
+               try_agsize = cfg->dblocks / try_threads;
+       }
+
+out:
+       cfg->agsize = try_agsize;
+       cfg->agcount = howmany(cfg->dblocks, cfg->agsize);
+}
+
 static void
 calculate_initial_ag_geometry(
        struct mkfs_params      *cfg,
-       struct cli_params       *cli)
+       struct cli_params       *cli,
+       struct libxfs_xinit     *xi)
 {
-       if (cli->agsize) {              /* User-specified AG size */
+       if (cli->data_concurrency > 0) {
+               calc_data_concurrency_ag_geometry(cfg, cli, xi);
+       } else if (cli->agsize) {       /* User-specified AG size */
                cfg->agsize = getnum(cli->agsize, &dopts, D_AGSIZE);
 
                /*
@@ -2978,6 +3122,8 @@ _("agsize (%s) not a multiple of fs blk size (%d)\n"),
                cfg->agcount = cli->agcount;
                cfg->agsize = cfg->dblocks / cfg->agcount +
                                (cfg->dblocks % cfg->agcount != 0);
+       } else if (cli->data_concurrency == -1 && ddev_is_solidstate(xi)) {
+               calc_data_concurrency_ag_geometry(cfg, cli, xi);
        } else {
                calc_default_ag_geometry(cfg->blocklog, cfg->dblocks,
                                         cfg->dsunit, &cfg->agsize,
@@ -3942,6 +4088,7 @@ main(
                .xi = &xi,
                .loginternal = 1,
                .has_warranty   = 1,
+               .data_concurrency = -1, /* auto detect non-mechanical storage */
        };
        struct mkfs_params      cfg = {};
 
@@ -4131,7 +4278,7 @@ main(
         * dependent on device sizes. Once calculated, make sure everything
         * aligns to device geometry correctly.
         */
-       calculate_initial_ag_geometry(&cfg, &cli);
+       calculate_initial_ag_geometry(&cfg, &cli, &xi);
        align_ag_geometry(&cfg);
 
        calculate_imaxpct(&cfg, &cli);