]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
tests/23rdev-lifetime: fix a typo main master
authorYu Kuai <yukuai3@huawei.com>
Thu, 9 May 2024 01:10:59 +0000 (09:10 +0800)
committerMariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Fri, 10 May 2024 08:05:32 +0000 (10:05 +0200)
"pill" was wrong, while it should be "kill", test will still pass while
test thread will not be cleaned up.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
154 files changed:
.gitignore
ANNOUNCE-3.0 [deleted file]
ANNOUNCE-3.0.1 [deleted file]
ANNOUNCE-3.0.2 [deleted file]
ANNOUNCE-3.0.3 [deleted file]
ANNOUNCE-3.1 [deleted file]
ANNOUNCE-3.1.1 [deleted file]
ANNOUNCE-3.1.2 [deleted file]
ANNOUNCE-3.1.3 [deleted file]
ANNOUNCE-3.1.4 [deleted file]
ANNOUNCE-3.1.5 [deleted file]
ANNOUNCE-3.2 [deleted file]
ANNOUNCE-3.2.1 [deleted file]
ANNOUNCE-3.2.2 [deleted file]
ANNOUNCE-3.2.3 [deleted file]
ANNOUNCE-3.2.4 [deleted file]
ANNOUNCE-3.2.5 [deleted file]
ANNOUNCE-3.2.6 [deleted file]
ANNOUNCE-3.3 [deleted file]
ANNOUNCE-3.3.1 [deleted file]
ANNOUNCE-3.3.2 [deleted file]
ANNOUNCE-3.3.3 [deleted file]
ANNOUNCE-3.3.4 [deleted file]
ANNOUNCE-3.4 [deleted file]
ANNOUNCE-4.0 [deleted file]
ANNOUNCE-4.1 [deleted file]
Assemble.c
Build.c
CHANGELOG.md [new file with mode: 0644]
ChangeLog [deleted file]
Create.c
Detail.c
Dump.c
Examine.c
Grow.c
Incremental.c
Kill.c
MAINTAINERS.md [new file with mode: 0644]
Makefile
Manage.c
Monitor.c
Query.c
README.initramfs [deleted file]
README.md [new file with mode: 0644]
ReadMe.c
TODO [deleted file]
bitmap.c
config.c
dlink.c
documentation/external-reshape-design.txt [moved from external-reshape-design.txt with 99% similarity]
documentation/mdadm.conf-example [moved from mdadm.conf-example with 100% similarity]
documentation/mdmon-design.txt [moved from mdmon-design.txt with 100% similarity]
drive_encryption.c [new file with mode: 0644]
drive_encryption.h [new file with mode: 0644]
inventory [deleted file]
lib.c
makedist [deleted file]
managemon.c
mapfile.c
maps.c
md.4
mdadm.8.in
mdadm.c
mdadm.conf.5.in [moved from mdadm.conf.5 with 86% similarity]
mdadm.h
mdadm.spec [deleted file]
mdmon.8
mdmon.c
mdopen.c
mdstat.c
misc/mdcheck
mkinitramfs [deleted file]
monitor.c
msg.c
platform-intel.c
platform-intel.h
policy.c
probe_roms.c
raid6check.8
raid6check.c
restripe.c
sha1.c
super-ddf.c
super-intel.c
super0.c
super1.c
sysfs.c
systemd/mdadm-grow-continue@.service
systemd/mdadm-last-resort@.service
systemd/mdcheck_continue.service
systemd/mdcheck_start.service
systemd/mdmon@.service
systemd/mdmonitor-oneshot.service
systemd/mdmonitor.service
test
tests/00confnames [new file with mode: 0644]
tests/00createnames [new file with mode: 0644]
tests/00linear
tests/00names
tests/00raid0
tests/00raid5-zero [new file with mode: 0644]
tests/00readonly
tests/01r5fail
tests/02lineargrow
tests/03assem-incr
tests/03r0assem
tests/04r0update
tests/04r5swap.broken [new file with mode: 0644]
tests/04update-metadata
tests/06name
tests/07autoassemble.broken [new file with mode: 0644]
tests/07autodetect.broken [new file with mode: 0644]
tests/07changelevelintr.broken [new file with mode: 0644]
tests/07changelevels.broken [new file with mode: 0644]
tests/07reshape5intr.broken [new file with mode: 0644]
tests/07revert-grow.broken [new file with mode: 0644]
tests/07revert-shrink.broken [new file with mode: 0644]
tests/07testreshape5.broken [new file with mode: 0644]
tests/09imsm-assemble.broken [new file with mode: 0644]
tests/09imsm-create-fail-rebuild.broken [new file with mode: 0644]
tests/09imsm-overlap [deleted file]
tests/09imsm-overlap.broken [new file with mode: 0644]
tests/10ddf-assemble-missing.broken [new file with mode: 0644]
tests/10ddf-fail-create-race.broken [new file with mode: 0644]
tests/10ddf-fail-two-spares.broken [new file with mode: 0644]
tests/10ddf-incremental-wrong-order.broken [new file with mode: 0644]
tests/14imsm-r1_2d-grow-r1_3d
tests/14imsm-r1_2d-grow-r1_3d.broken [new file with mode: 0644]
tests/14imsm-r1_2d-takeover-r0_2d
tests/14imsm-r1_2d-takeover-r0_2d.broken [new file with mode: 0644]
tests/18imsm-1d-takeover-r1_2d
tests/18imsm-r10_4d-takeover-r0_2d.broken [new file with mode: 0644]
tests/18imsm-r1_2d-takeover-r0_1d
tests/18imsm-r1_2d-takeover-r0_1d.broken [new file with mode: 0644]
tests/19raid6auto-repair.broken [new file with mode: 0644]
tests/19raid6repair.broken [new file with mode: 0644]
tests/23rdev-lifetime [new file with mode: 0644]
tests/24raid10deadlock [new file with mode: 0644]
tests/24raid10deadlock.inject_error [new file with mode: 0644]
tests/24raid456deadlock [new file with mode: 0644]
tests/25raid456-recovery-while-reshape [new file with mode: 0644]
tests/25raid456-reshape-corrupt-data [new file with mode: 0644]
tests/25raid456-reshape-deadlock [new file with mode: 0644]
tests/25raid456-reshape-while-recovery [new file with mode: 0644]
tests/func.sh
tests/imsm-grow-template
tests/templates/names_template [new file with mode: 0644]
udev-md-raid-arrays.rules
udev-md-raid-assembly.rules
udev-md-raid-safe-timeouts.rules
udev.c [new file with mode: 0644]
udev.h [new file with mode: 0644]
util.c
uuid.c [new file with mode: 0644]

index 217fe76d3807f71d4fae27ad23317f74df11988d..8d791c6ffc8326aeb459dd9a167a9abf78e75380 100644 (file)
@@ -3,6 +3,7 @@
 /*-stamp
 /mdadm
 /mdadm.8
+/mdadm.conf.5
 /mdadm.udeb
 /mdassemble
 /mdmon
diff --git a/ANNOUNCE-3.0 b/ANNOUNCE-3.0
deleted file mode 100644 (file)
index f2d4f84..0000000
+++ /dev/null
@@ -1,98 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.0 - A tool for managing Soft RAID under Linux
-
-I am pleased to (finally) announce the availability of
-   mdadm version 3.0
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git?p=mdadm
-
-
-This is a major new version and as such should be treated with some
-caution.  However it has seen substantial testing and is considerred
-to be ready for wide use.
-
-
-The significant change which justifies the new major version number is
-that mdadm can now handle metadata updates entirely in userspace.
-This allows mdadm to support metadata formats that the kernel knows
-nothing about.
-
-Currently two such metadata formats are supported:
-  - DDF  - The SNIA standard format
-  - Intel Matrix - The metadata used by recent Intel ICH controlers.
-
-Also the approach to device names has changed significantly.
-
-If udev is installed on the system, mdadm will not create any devices
-in /dev.  Rather it allows udev to manage those devices.  For this to work
-as expected, the included udev rules file should be installed.
-
-If udev is not installed, mdadm will still create devices and symlinks 
-as required, and will also remove them when the array is stopped.
-
-mdadm now requires all devices which do not have a standard name (mdX
-or md_dX) to live in the directory /dev/md/.  Names in this directory
-will always be created as symlinks back to the standard name in /dev.
-
-The man pages contain some information about the new externally managed
-metadata.  However see below for a more condensed overview.
-
-Externally managed metadata introduces the concept of a 'container'.
-A container is a collection of (normally) physical devices which have
-a common set of metadata.  A container is assembled as an md array, but
-is left 'inactive'.
-
-A container can contain one or more data arrays.  These are composed from
-slices (partitions?) of various devices in the container.
-
-For example, a 5 devices DDF set can container a RAID1 using the first
-half of two devices, a RAID0 using the first half of the remain 3 devices,
-and a RAID5 over thte second half of all 5 devices.
-
-A container can be created with
-
-   mdadm --create /dev/md0 -e ddf -n5 /dev/sd[abcde]
-
-or "-e imsm" to use the Intel Matrix Storage Manager.
-
-An array can be created within a container either by giving the
-container name and the only member:
-
-   mdadm -C /dev/md1 --level raid1 -n 2 /dev/md0
-
-or by listing the component devices
-
-   mdadm -C /dev/md2 --level raid0 -n 3 /dev/sd[cde]
-
-To assemble a container, it is easiest just to pass each device in turn to 
-mdadm -I
-
-  for i in /dev/sd[abcde]
-  do mdadm -I $i
-  done
-
-This will assemble the container and the components.
-
-Alternately the container can be assembled explicitly
-
-   mdadm -A /dev/md0 /dev/sd[abcde]
-
-Then the components can all be assembled with
-
-   mdadm -I /dev/md0
-
-For each container, mdadm will start a program called "mdmon" which will
-monitor the array and effect any metadata updates needed.  The array is
-initially assembled readonly. It is up to "mdmon" to mark the metadata 
-as 'dirty' and which the array to 'read-write'.
-
-The version 0.90 and 1.x metadata formats supported by previous
-versions for mdadm are still supported and the kernel still performs
-the same updates it use to.  The new 'mdmon' approach is only used for
-newly introduced metadata types.
-
-NeilBrown 2nd June 2009
diff --git a/ANNOUNCE-3.0.1 b/ANNOUNCE-3.0.1
deleted file mode 100644 (file)
index 91b4428..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.0.1 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.0.1
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git?p=mdadm
-
-
-This contains only minor bug fixes over 3.0.  If you are using
-3.0, you could consider upgrading.
-
-The brief change log is:
-   -    Fix various segfaults
-   -    Fixed for --examine with containers
-   -    Lots of other little fixes.
-
-NeilBrown 25th September 2009
diff --git a/ANNOUNCE-3.0.2 b/ANNOUNCE-3.0.2
deleted file mode 100644 (file)
index 93643d1..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.0.2 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.0.2
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git?p=mdadm
-
-
-This just contains one bugfix over 3.0.1 - I was obviously a bit hasty
-in releasing that one.
-
-The brief change log is:
-   -    Fix crash when hosthost is not set, as often happens in
-       early boot.
-
-NeilBrown 25th September 2009
diff --git a/ANNOUNCE-3.0.3 b/ANNOUNCE-3.0.3
deleted file mode 100644 (file)
index d6117a1..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.0.3 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.0.3
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git?p=mdadm
-
-
-This contains a collection of bug fixes and minor enhancements over
-3.0.1.
-
-The brief change log is:
-   -    Improvements for creating arrays giving just a name, like 'foo',
-       rather than the full '/dev/md/foo'.
-   -    Improvements for assembling member arrays of containers.
-   -    Improvements to test suite
-   -    Add option to change increment for RebuildNN messages reported
-       by "mdadm --monitor"
-   -    Improvements to mdmon 'hand-over' from initrd to final root.
-   -    Handle merging of devices that have left an IMSM array and are
-       being re-incorporated.
-   -    Add missing space in "--detail --brief" output.
-       
-NeilBrown 22nd October 2009
diff --git a/ANNOUNCE-3.1 b/ANNOUNCE-3.1
deleted file mode 100644 (file)
index 343b85d..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.1 - A tool for managing Soft RAID under Linux
-
-Hot on the heals of 3.0.3 I am pleased to announce the availability of
-   mdadm version 3.1
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git?p=mdadm
-
-
-It contains significant feature enhancements over 3.0.x
-
-The brief change log is:
-   -    Support --grow to change the layout of RAID4/5/6
-   -    Support --grow to change the chunksize of raid 4/5/6
-   -    Support --grow to change level from RAID1 -> RAID5 -> RAID6 and
-        back.
-   -    Support --grow to reduce the number of devices in RAID4/5/6.
-   -    Support restart of these grow options which assembling an array 
-       which is partially grown.
-   -    Assorted tests of this code, and of different RAID6 layouts.
-
-Note that a 2.6.31 or later is needed to have access to these.
-Reducing devices in a RAID4/5/6 requires 2.6.32.
-Changing RAID5 to RAID1 requires 2.6.33.
-
-You should only upgrade if you need to use, or which to test, these
-features.
-       
-NeilBrown 22nd October 2009
diff --git a/ANNOUNCE-3.1.1 b/ANNOUNCE-3.1.1
deleted file mode 100644 (file)
index 9e480dc..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.1.1 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.1.1
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git?p=mdadm
-
-This is a bugfix release over 3.1, which was withdrawn due to serious
-bugs.  So it might be best to ignore 3.1 and say that this is a significant
-feature release over 3.0.x
-
-Significant changes are:
-  - RAID level conversion between RAID1, RAID5, and RAID6 are
-    possible were the kernel supports it (2.6.32 at least)
-  - online chunksize and layout changing for RAID5 and RAID6
-    where the kernel supports it.
-  - reduce the number of devices in a RAID4/5/6 array.
-
-  - The default metadata is not v1.1.  This metadata is stored at the
-    start of the device so is safer in many ways but could interfere with
-    boot loaded.  The old default (0.90) is still available and fully
-    supported.
-
-  - The default chunksize is now 512K rather than 64K.  This seems more
-    appropriate for modern devices.
-
-  - The default bitmap chunksize for internal bitmaps is now at least
-    64Meg as fine grained bitmaps tend to impact performance more for
-    little extra gain.
-
-This release is believed to be stable and you should feel free to
-upgrade to 3.1.1.
-
-NeilBrown 19th November 2009
diff --git a/ANNOUNCE-3.1.2 b/ANNOUNCE-3.1.2
deleted file mode 100644 (file)
index 321b8be..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.1.2 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.1.2
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git?p=mdadm
-
-This is a bugfix/stability release over 3.1.1.
-
-Significant changes are:
-  - The default metadata has change again (sorry about that).
-    It is now v1.2 and will hopefully stay that way.  It turned
-    out there with boot-block issues with v1.1 which make it 
-    unsuitable for a default, though in many cases it is still
-    suitable to use.
-  - Stopping a container is not permitted when members are still
-    active
-  - Add 'homehost' to the valid words for the "AUTO" config file
-    line.  When followed by "-all", this causes mdadm to
-    auto-assemble any array belonging to this host, but not
-    auto-assemble anything else.
-  - Fix some bugs with "--grow --chunksize=" for changing chunksize.
-  - VAR_RUN can be easily changed at compile time just like ALT_RUN.
-    This gives distros more flexability in how to manage the
-    pid and sock files that mdmon needs.
-  - Various mdmon fixes
-  - Alway make bitmap 4K-aligned if at all possible.
-  - If mdadm.conf lists arrays which have inter-dependencies,
-    the previously had to be listed in the "right" order.  Now
-    any order should work.
-  - Fix --force assembly of v1.x arrays which are in the process
-    of recovering.
-  - Add section on 'scrubbing' to 'md' man page.
-  - Various command-line-option parsing improvements.
-  - ... and lots of other bug fixes.
-
-
-This release is believed to be stable and you should feel free to
-upgrade to 3.1.2
-
-NeilBrown 10th March 2010
diff --git a/ANNOUNCE-3.1.3 b/ANNOUNCE-3.1.3
deleted file mode 100644 (file)
index 95b2b6c..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.1.3 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.1.3
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git?p=mdadm
-
-This is a bugfix/stability release over 3.1.2
-
-Significant changes are:
-   -    mapfile now lives in a fixed location which default to
-        /dev/.mdadm/map but can be changed at compile time.  This
-       location is choses and most distros provide it during early
-       boot and preserve it through.  As long a /dev exists and is
-       writable, /dev/.mdadm will be created.
-       Other files file communication with mdmon live here too.
-       This fixes a bug reported by Debian and Gentoo users where
-       udev would spin in early-boot.
-   -    IMSM and DDF metadata will not be recognised on partitions
-        as they should only be used on whole-disks.
-   -    Various overflows causes by 2G drives have been addressed.
-   -    A subarray of an IMSM contain can now be killed with
-        --kill-subarray.  Also subarrays can be renamed with
-       --update-subarray
-   -    -If (or --incremental --fail) can be used  from udev to
-        fail and remove from all arrays a device which has been
-       unplugged from the system.  i.e. hot-unplug-support.
-   -    "mdadm /dev/mdX --re-add missing" will look for any device
-        that looks like it should be a member of /dev/mdX but isn't
-       and will automatically --re-add it
-   -    Now compile with -Wextra to get extra warnings.
-   -    Lots of minor bug fixes, documentation improvements, etcc
-
-This release is believed to be stable and you should feel free to
-upgrade to 3.1.3
-
-It is expected that the next release will be 3.2 with a number of new
-features.  3.1.4 will only happen if important bugs show up before 3.2
-is stable.
-
-NeilBrown 6th August 2010
diff --git a/ANNOUNCE-3.1.4 b/ANNOUNCE-3.1.4
deleted file mode 100644 (file)
index c157a36..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.1.4 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.1.4
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git?p=mdadm
-
-This is a bugfix/stability release over 3.1.3.
-3.1.3 had a couple of embarrasing regressions and a couple of other
-issues surfaces which had easy fixes so I decided to make a 3.1.4
-release after all.
-
-Two fixes related to configs that aren't using udev:
-   - Don't remove md devices which 'standard' names on --stop
-   - Allow dev_open to work on read-only /dev
-And fixed regressions:
-   - Allow --incremental to add spares to an array
-   - Accept --no-degraded as a deprecated option rather than
-            throwing an error
-   - Return correct success status when --incrmental assembling 
-     a container which does not yet have enough devices.
-   - Don't link mdadm with pthreads, only mdmon needs it.
-   - Fix compiler warning due to bad use of snprintf
-   - Fix spare migration
-
-This release is believed to be stable and you should feel free to
-upgrade to 3.1.4
-
-It is expected that the next release will be 3.2 with a number of new
-features.
-
-NeilBrown 31st August 2010
diff --git a/ANNOUNCE-3.1.5 b/ANNOUNCE-3.1.5
deleted file mode 100644 (file)
index baa1f92..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.1.5 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.1.5
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git?p=mdadm
-
-This is a bugfix/stability release over 3.1.4.  It contains all the
-important bugfixes found while working on 3.2 and 3.2.1.  It will be
-the last 3.1.x release - 3.2.1 is expected to be released in a few days.
-
-Changes include:
-  - Fixes for v1.x metadata on big-endian machines.
-  - man page improvements
-  - Improve '--detail --export' when run on partitions of an md array.
-  - Fix regression with removing 'failed' or 'detached' devices.
-  - Fixes for "--assemble --force" in various unusual cases.
-  - Allow '-Y' to mean --export.  This was documented but not implemented.
-  - Various fixed for handling 'ddf' metadata.  This is now more reliable
-    but could benefit from more interoperability testing.
-  - Correctly list subarrays of a container in "--detail" output.
-  - Improve checks on whether the requested number of devices is supported
-    by the metadata - both for --create and --grow.
-  - Don't remove partitions from a device that is being included in an
-    array until we are fully committed to including it.
-  - Allow "--assemble --update=no-bitmap" so an array with a corrupt
-    bitmap can still be assembled.
-  - Don't allow --add to succeed if it looks like a "--re-add" is probably
-    wanted, but cannot succeed.  This avoids inadvertently turning
-    devices into spares when an array is failed.
-
-This release is believed to be stable and you should feel free to
-upgrade to 3.1.5
-
-
-NeilBrown 23rd March 2011
-
diff --git a/ANNOUNCE-3.2 b/ANNOUNCE-3.2
deleted file mode 100644 (file)
index 9e282bc..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.2 - A tool for managing Soft RAID under Linux (DEVEL ONLY)
-
-I am pleased to announce the availability of
-   mdadm version 3.2
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm devel-3.2
-   http://neil.brown.name/git?p=mdadm
-
-This is a "Developers only" release.  Please don't consider using it
-or making it available to others without reading the following.
-
-
-By far the most significant change in this release related to the
-management of reshaping arrays.  This code has been substantially
-re-written so that it can work with 'externally managed metadata' -
-Intel's IMSM in particular.  We now support level migration and
-OnLine Capacity Expansion on these arrays.
-
-However, while the code largely works it has not been tested
-exhaustively so there are likely to be problems.  As the reshape code
-for native metadata arrays was changed as part of this rewrite these
-problems could also result in regressions for reshape of native
-metadata.
-
-It is partly to encourage greater testing that this release is being
-made.  Any reports of problem - particular reproducible recipes for
-triggering the problems - will be gratefully received.
-
-It is hopped that a "3.2.1" release will be available in early March
-which will be a bugfix release over this and can be considered
-suitable for general use.
-
-Other changes of note:
-
- - Policy framework.
-   Various policy statements can be made in the mdadm.conf to guide
-   the behaviour of mdadm, particular with regards to how new devices
-   are treated by "mdadm -I".
-   Depending on the 'action' associated with a device (identified by
-   its 'path') such need devices can be automatically re-added to and
-   existing array that they previously fell out off, or automatically
-   added as a spare if they appear to contain no data.
-
- - mdadm now has a limited understanding of partition tables.  This
-   allows the policy framework to make decisions about partitioned
-   devices as well.
-
- - --incremental --remove can be told what --path the device was on,
-   and this info will be recorded so that another device appearing at
-   the same physical location can be preferentially added to the same
-   array (provides the spare-same-slot action policy applied to the
-   path).
-
- - A new flags "--invalid-backup" flag is available in --assemble
-   mode.  This can be used to re-assemble an array which was stopping
-   in the middle of a reshape, and for which the 'backup file' is no
-   longer available or is corrupted.  The array may have some
-   corruption in it at the point where reshape was up to, but at least
-   the rest of the array will become available.
-   
-
- - Various internal restructuring - more is needed.
-
-
-Any feed back and bug reports are always welcomed at:
-    linux-raid@vger.kernel.org
-
-And please:  don't use this in production - particularly not the
---grow functionality.
-
-NeilBrown 1st February 2011
-
-
diff --git a/ANNOUNCE-3.2.1 b/ANNOUNCE-3.2.1
deleted file mode 100644 (file)
index 0e7826c..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-
-
-I am pleased to announce the availability of
-   mdadm version 3.2.1
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git/mdadm
-
-Many of the changes in this release are of internal interest only,
-restructuring and refactoring code and so forth.
-
-Most of the bugs found and fixed during development for 3.2.1 have been
-back-ported for the recently-release 3.1.5 so this release primarily
-provides a few new features over 3.1.5.
-
-They include:
-  - policy framework
-     Policy can be expressed for moving spare devices between arrays, and
-     for how to handle hot-plugged devices.  This policy can be different
-     for devices plugged in to different controllers etc.
-     This, for example, allows a configuration where when a device is plugged
-     in it is immediately included in an md array as a hot spare and
-     possibly starts recovery immediately if an array is degraded.
-
-  - some understanding of mbr and gpt paritition tables
-     This is primarly to support the new hot-plug support.  If a
-     device is plugged in and policy suggests it should have a partition table,
-     the partition table will be copied from a suitably similar device, and
-     then the partitions will hot-plug and can then be added to md arrays.
-
-  - "--incremental --remove" can remember where a device was removed from
-    so if a device gets plugged back in the same place, special policy applies
-    to it, allowing it to be included in an array even if a general hotplug
-    will not be included.
-
-  - enhanced reshape options, including growing a RAID0 by converting to RAID4,
-    restriping, and converting back.  Also convertions between RAID0 and
-    RAID10 and between RAID1 and RAID10 are possible (with a suitably recent
-    kernel).
-
-  - spare migration for IMSM arrays.
-     Spare migration can now work across 'containers' using non-native metadata
-     and specifically Intel's IMSM arrays support spare migrations.
-
-  - OLCE and level migration for Intel IMSM arrays.
-     OnLine Capacity Expansion and level migration (e.g. RAID0 -> RAID5) is
-     supported for Intel Matrix Storage Manager arrays.
-     This support is currently 'experimental' for technical reasons.  It can
-     be enabled with "export MDADM_EXPERIMENTAL=1"
-
-  - avoid including wayward devices
-     If you split a RAID1, mount the two halves as two separate degraded RAID1s,
-     and then later bring the two back together, it is possible that the md 
-     metadata won't properly show that one must over-ride the other.
-     mdadm now does extra checking to detect this possibilty and avoid
-     potentially corrupting data.
-
-  - remove any possible confusion between similar options.
-     e.g. --brief and --bitmap were mapped to 'b' and mdadm wouldn't
-     notice if one was used where the other was expected.
-
-  - allow K,M,G suffixes on chunk sizes
-
-
-While mdadm-3.2.1 is considered to be reasonably stable, you should
-only use it if you want to try out the new features, or if you
-generally like to be on the bleeding edge.   If the new features are not
-important to you, then 3.1.5 is probably the appropriate version to be using
-until 3.2.2 comes out.
-
-NeilBrown 28th March 2011
diff --git a/ANNOUNCE-3.2.2 b/ANNOUNCE-3.2.2
deleted file mode 100644 (file)
index b70d18b..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.2.2 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.2.2
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git/mdadm
-
-This release is largely a stablising release for the 3.2 series.
-Many of the changes just fix bugs introduces in 3.2 or 3.2.1.
-
-There are some new features.  They are:
-  - reshaping IMSM (Intel metadata) arrays is no longer 'experimental',
-    it should work properly and be largely compatible with IMSM drivers in
-    other platforms.
-  - --assume-clean can be used with --grow --size to avoid resyncing the
-    new part of the array.  This is only support with very new kernels.
-  - RAID0 arrays can have chunksize which is not a power of 2.  This has been
-    supported in the kernel for a while but is only now supprted by
-    mdadm.
-
-  - A new tool 'raid6check' is available which can check a RAID6 array,
-    or part of it, and report which device is most inconsistent with the
-    others if any stripe is inconsistent.   This is still under development
-    and does not have a man page yet.  If anyone tries it out and has any
-    questions or experience to report, they would be most welcome on
-    linux-raid@vger.kernel.org.
-
-Future releases in the 3.2 series will only be made if bugfixes are needed.
-The next release to add features is expected to be 3.3.
-
-NeilBrown 17th June 2011
diff --git a/ANNOUNCE-3.2.3 b/ANNOUNCE-3.2.3
deleted file mode 100644 (file)
index 8a8dba4..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.2.3 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.2.3
-
-It is available at the usual places:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git/mdadm
-
-This release is largely a bugfix release for the 3.2 series with many
-minor fixes with little or no impact.
-
-The largest single area of change is support for reshape of Intel
-IMSM arrays (OnLine Capacity Explansion and Level Migtration).
-Among other fixes, this now has a better chance of surviving if a
-device fails during reshape.
-
-Upgrading is recommended - particularly if you use mdadm for IMSM
-arrays - but not essential.
-
-NeilBrown 23rd December 2011
diff --git a/ANNOUNCE-3.2.4 b/ANNOUNCE-3.2.4
deleted file mode 100644 (file)
index e321678..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.2.4 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.2.4
-
-It is available at the usual places, now including github:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://github.com/neilbrown/mdadm
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git/mdadm
-
-This release is largely a bugfix release for the 3.2 series with many
-minor fixes with little or no impact.
-
-"--oneline" log of changes is below.  Some notable ones are:
-
- - --offroot argument to improve interactions between mdmon and initrd
- - --prefer argument to select which /dev names to display in some
-   circumstances.
- - relax restructions on when "--add" will be allowed
- - Fix bug with adding write-intent-bitmap to active array
- - Now defaults to "/run/mdadm" for storing run-time files.
-
-Upgrading is encouraged.
-
-The next mdadm release is expected to be 3.3 with a number of new
-features.
-
-NeilBrown 9th May 2012
-
-77b3ac8 monitor: make return from read_and_act more symbolic.
-68226a8 monitor: ensure we retry soon when 'remove' fails.
-8453f8d fix: Monitor sometimes crashes
-90fa1a2 Work around gcc-4.7's strict aliasing checks
-0c4304c fix: container creation with --incremental used.
-5d1c7cd FIX: External metadata sometimes is not updated
-3c20f98 FIX: mdmon check in reshape_container() can cause a problem
-59ab9f5 FIX: Typo error in fprint command
-9587c37 imsm: load_super_imsm_all function refactoring
-ec50f7b imsm: load_imsm_super_all supports loading metadata from the device list
-ca9de18 imsm: validate the number of imsm volumes per controller
-30602f5 imsm: display fd in error trace when when store_imsm_mpb failes
-eb155f6 mdmon: Use getopt_long() to parse command line options
-08ca2ad Add --offroot argument to mdadm
-da82751 Add --offroot argument to mdmon
-a0963a8 Spawn mdmon with --offroot if mdadm was launched with --offroot
-f878b24 imsm: fix, the second array need to have the whole available space on devices
-d597705 getinfo_super1: Use MaxSector in place of sb->size
-6ef8905 super1: make aread/awrite always use an aligned buffer.
-de5a472 Remove avail_disks arg from 'enough'.
-da8fe5a Assemble: fix --force assemble during reshape.
-b10c663 config: fix handing of 'homehost' in AUTO line.
-92d49ec FIX: NULL pointer to strdup() can be passed
-d2bde6d imsm: FIX: No new missing disks are allowed during general migration
-111e9fd FIX: Array is not run when expansion disks are added
-bf5cf7c imsm: FIX: imsm_get_allowed_degradation() doesn't count degradation for raid1
-50927b1 Fix: Sometimes mdmon throws core dump during reshape
-78340e2 Flush mdmon before next reshape step during container operation
-e174219 imsm: FIX: Chunk size migration problem
-f93346e FIX: use md position to reshape restart
-6a75c8c imsm: FIX: use md position to reshape restart
-51d83f5 imsm: FIX: Clear migration record when migration switches to next volume.
-e1dd332 FIX: restart reshape when reshape process is stopped just between 2 reshapes
-1ca90aa FIX: Do not try to (continue) reshape using inactive array
-9f1b0f0 config: conf_match should ignore devname when not set.
-d669228 Use posix_memalign() for memory used to write bitmaps
-178950e FIX: Changes in '0' case for reshape position verification
-9200d41 avoid double-free upon "old buggy kernel" sysfs_read failure
-4011421 Print error message if failing to write super for 1.x metadata
-0011874 Use MDMON_DIR for pid files created in Monitor.c
-56d1885 Assemble: don't use O_EXCL until we have checked device content.
-b720636 Assemble: support assembling of a RAID0 being reshaped.
-c69ffac Manage: allow --re-add to failed array.
-52f07f5 Reset bad flag on map update
-911cead super1: support superblocks up to 4K.
-ad6db3c Create: reduce the verbosity of 'default_layout'.
-b2bfdfa super1.c don't keep recalculating bitmap pointer
-4122675 Define and use SUPER1_SIZE for allocations
-1afa930 init_super1() memset full buffer allocated for superblock
-2de0b8a match_metadata_desc1(): Use calloc instead of malloc+memset
-3c0bcd4 Use 4K buffer alignment for superblock allocations
-308340a Use struct align_fd to cache fd's block size for aligned reads/writes
-65ed615 match_metadata_desc0(): Use calloc instead of malloc+memset
-de89706 Generalize ROUND_UP() macro and introduce matching ROUND_UP_PTR()
-0a2f189 super1.c: use ROUND_UP/ROUND_UP_PTR
-654a381 super-intel.c: Use ROUND_UP() instead of manually coding it
-42d5dfd __write_init_super_ddf(): Use posix_memalign() instead of static aligned buffer
-d4633e0 Examine: fix array size calculation for RAID10.
-e62b778 Assemble: improve verbose logging when including old devices.
-0073a6e Remove possible crash during RAID6 -> RAID5 reshape.
-69fe207 Incremental: fix adding devices with --incremental
-bcbb311 Manage: replace 'return 1' with 'goto abort'.
-9f58469 Manage: freeze recovery while adding multiple devices.
-ae6c05a Create: round off size for RAID1 arrays.
-5ca3a90 Grow: print useful error when converting RAID1->RAID5 will fail.
-c07d640 Fix tests/05r1-re-add-nosupper
-2d762ad Fix the new ROUND_UP macro.
-fd324b0 sysfs: fixed sysfs_freeze_array array to work properly with Manage_subdevs.
-5551b11 imsm: avoid overflows for disks over 1TB
-97f81ee clear hi bits if not used after loading metadata from disk
-e03640b simplify calculating array_blocks
-29cd082 show 2TB volumes/disks support in --detail-platform
-2cc699a check volume size in validate_geometry_imsm_orom
-9126b9a check that no disk over 2TB is used to create container when no support
-027c374 imsm: set 2tb disk attribute for spare
-3556c2f Fix typo: wan -> want
-15632a9 parse_size: distinguish between 0 and error.
-fbdef49 Bitmap_offset is a signed number
-508a7f1 super1: leave more space in front of data by default.
-40110b9 Fix two typos in fprintf messages
-342460c mdadm man page: fix typo
-0e7f69a imsm: display maximum volumes per controller and array
-36fd8cc imsm: FIX: Update function imsm_num_data_members() for Raid1/10
-7abc987 imsm: FIX: Add volume size expand support to imsm_analyze_change()
-f3871fd imsm: Add new metadata update for volume size expansion
-54397ed imsm: Execute size change for external metatdata
-016e00f FIX: Support metadata changes rollback
-fbf3d20 imsm: FIX: Support metadata changes rollback
-44f6f18 FIX: Extend size of raid0 array
-7e7e9a4 FIX: Respect metadata size limitations
-65a9798 FIX: Detect error and rollback metadata
-13bcac9 imsm: Add function imsm_get_free_size()
-b130333 imsm: Support setting max size for size change operation
-c41e00b imsm: FIX: Component size alignment check
-58d26a2 FIX: Size change is possible as standalone change only
-4aecb54 FIX: Assembled second array is in read only state during reshape
-ae2416e FIX: resolve make everything compilation error
-480f356 Raid limit of 1024 when scanning for devices.
-c2ecf5f Add --prefer option for --detail and --monitor
-0a99975 Relax restrictions on when --add is permitted.
-7ce0570 imsm: fix: rebuild does not continue after reboot
-b51702b fix: correct extending size of raid0 array
-34a1395 Fix sign extension of bitmap_offset in super1.c
-012a864 Introduce sysfs_set_num_signed() and use it to set bitmap/offset
-5d7b407 imsm: fix: thunderdome may drop 2tb attribute
-5ffdc2d Update test for "is udev active".
-96fd06e Adjust to new standard of /run
-974e039 test: don't worry too much about array size.
-b0a658f Grow: failing the set the per-device size is not an error.
-36614e9 super-intel.c: Don't try to close negative fd
-562aa10 super-intel.c: Fix resource leak from opendir()
-
diff --git a/ANNOUNCE-3.2.5 b/ANNOUNCE-3.2.5
deleted file mode 100644 (file)
index 396da12..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.2.5 - A tool for managing Soft RAID under Linux
-
-I am somewhat disappointed to have to announce the availability of
-   mdadm version 3.2.5
-
-It is available at the usual places, now including github:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://github.com/neilbrown/mdadm
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git/mdadm
-
-This release primarily fixes a serious regression in 3.2.4.
-This regression does *not* cause any risk to data.  It simply
-means that adding a device with "--add" would sometime fail
-when it should not.
-
-The fix also includes a couple of minor fixes such as making
-the "--layout=preserve" option to "--grow" work again.
-
-A reminder that the default location for runtime files is now
-"/run/mdadm".  If you compile this for a distro that does not
-have "/run", you will need to compile with an alternate setting for
-MAP_DIR. e.g.
-   make MAP_DIR=/var/run/mdadm
-or
-   make MAP_DIR=/dev/.mdadm
-
-NeilBrown 18th May 2012
-
diff --git a/ANNOUNCE-3.2.6 b/ANNOUNCE-3.2.6
deleted file mode 100644 (file)
index f5cfd49..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-Subject:  ANNOUNCE: mdadm 3.2.6 - A tool for managing Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.2.6
-
-It is available at the usual places, now including github:
-   countrycode=xx.
-   http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://github.com/neilbrown/mdadm
-   git://neil.brown.name/mdadm
-   http://neil.brown.name/git/mdadm
-
-This is a stablity release which adds a number of bugfixs to 3.2.5.
-There are no real stand-out fixes, just lots of little bits and pieces.
-
-Below is the "git log --oneline --reverse" list of changes since
-3.2.5.
-
-NeilBrown 25th October 2012
-
-b7e05d2 udev-rules: prevent systemd from mount devices before they are ready.
-0d478e2 mdadm: Fix Segmentation fault.
-42f0ca1 imsm: fix: correct checking volume's degradation
-fcf2195 Monitor: fix inconsistencies in values for ->percent
-5f862fb Monitor: Report NewArray when an array the disappeared, reappears.
-6f51b1c Monitor: fix reporting for Fail vs FailSpare etc.
-68ad53b mdmon: fix arg parsing.
-517f135 Assemble: don't leak memory with fdlist.
-090900c udev-rules: prevent systemd from mount devices before they are ready.
-446e000 sha1.h: remove ansidecl.h header inclusion
-ec894f5 Manage: zero metadata before adding to 'external' array.
-3a84db5 ddf: allow a non-spare to be used to recovery a missing device.
-c5d61ca ddf: hack to fix container recognition.
-23084aa mdmon: fix arg processing for -a
-c4e96a3 mdmon: allow --takeover when original was started with --offroot
-80841df find_free_devnum: avoid auto-using names in /etc/mdadm.conf
-c5c56d6 mapfile: fix mapfile rebuild for containers
-aec89f6 fix segfaults in Detail()
-2117ad1 Fix 'enough' function for RAID10.
-0bc300d Use --offroot flag when assembling md arrays via --incrmental
-ac78f24 Grow: make warning about old metadata more explicit.
-14026ab Replace sha1.h with slightly older version.
-6f6809f Add zlib license to crc32.c
-5267ba0 Handles spaces in array names better.
-c51f288 imsm: allow --assume-clean to work.
-acf7076 Grow: allow --grow --continue to work for native metadata.
-335d2a6 Grow: fix a couple of typos with --assume-clean usage
-9ff1427 Fix open_container
-3713633 mdadm: super0: do not override uuid with homehost
-31bff58 Trivial bugfix and spelling fixes.
-e1e539f Detail: don't report a faulty device as 'spare' or 'rebuilding'.
-22a6461 super0: allow creation of array on 2TB+ devices.
-a5d47a2 Create new md devices consistently
-eb48676 Monitor: don't complain about non-monitorable arrays in mdadm.conf
-ecdf2d7 Query: don't be confused by partition tables.
-f7b75c1 Query: allow member of non-0.90 arrays to be better reported.
diff --git a/ANNOUNCE-3.3 b/ANNOUNCE-3.3
deleted file mode 100644 (file)
index f770aa1..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-Subject: ANNOUNCE: mdadm 3.3 - A tools for managing md Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.3
-
-It is available at the usual places:
-   http://www.kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://github.com/neilbrown/mdadm
-   git://neil.brown.name/mdadm
-   http://git.neil.brown.name/git/mdadm
-
-This is a major new release so don't be too surprised if there are a
-few issues.  If I hear about them they will be fixed in 3.3.1.
-git log reports nearly 500 changes since 3.2.6 so I won't list them
-all.
-
-Some highlights are:
-
-- Some array reshapes can proceed without needing backup file.
-  This is done by changing the 'data_offset' so we never need to write
-  any data back over where it was before.  If there is no "head space"
-  or "tail space" to allow data_offset to change, the old mechanism
-  with a backup file can still be used.
-- RAID10 arrays can be reshaped to change the number of devices,
-  change the chunk size, or change the layout between 'near'
-  and 'offset'.
-  This will always change data_offset, and will fail if there is no
-  room for data_offset to be moved.
-- "--assemble --update=metadata" can convert a 0.90 array to a 1.0 array.
-- bad-block-logs are supported (but not heavily tested yet)
-- "--assemble --update=revert-reshape" can be used to undo a reshape
-  that has just been started but isn't really wanted.  This is very
-  new and while it passes basic tests it cannot be guaranteed.
-- improved locking between --incremental and --assemble
-- uses systemd to run "mdmon" if systemd is configured to do that.
-- kernel names of md devices can be non-numeric. e.g. "md_home" rather than
-  "md0".  This will probably confuse lots of other tools, so you need to
-       echo CREATE names=yes >> /etc/mdadm.conf
-  or the feature will not be used.  (you also need a reasonably new kernel).
-- "--stop" can be given a kernel name instead of a device name. i.e
-     mdadm --stop md4
-  will work even if /dev/md4 doesn't exist.
-- "--detail --export" has some information about the devices in the array
-- --dump and --restore can be used to backup and restore the metadata on an
-   array.
-- Hot-replace is supported with
-     mdadm /dev/mdX --replace /dev/foo
-  and
-     mdadm /dev/mdX --replace /dev/foo --with /dev/bar
-- Config file can be a directory in which case all "*.conf" files are
-  read in lexical order.
-  Default is to read /etc/mdadm.conf and then /etc/mdadm.conf.d
-  Thus
-      echo CREATE name=yes > /etc/mdadm.conf.d/names.conf
-  will also enable the use of named md devices.
-
-- Lots of improvements to DDF support including adding support for
-  RAID10 (thanks Martin Wilck).
-
-and lots of bugfixes and other little changes.
-
-NeilBrown 3rd September 2013
diff --git a/ANNOUNCE-3.3.1 b/ANNOUNCE-3.3.1
deleted file mode 100644 (file)
index 7d5e666..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-Subject: ANNOUNCE: mdadm 3.3.1 - A tool for managing md Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.3.1
-
-It is available at the usual places:
-   http://www.kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://github.com/neilbrown/mdadm
-   git://neil.brown.name/mdadm
-   http://git.neil.brown.name/git/mdadm.git
-
-The main changes are:
- - lots of work on "DDF" support.  Hopefully it will be more stable
-   now.  Bug reports are always welcome.
- - improved interactions with 'systemd'.  Where possible, background
-   tasks are run from systemd (if it is present) rather then forking
-   disassociationg from the session.  This is important because udev
-   doesn't really let you disassociate.
-
-though there are a number of other little bug fixes too.
-
-NeilBrown 5th June 2014
diff --git a/ANNOUNCE-3.3.2 b/ANNOUNCE-3.3.2
deleted file mode 100644 (file)
index 6b54961..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-Subject: ANNOUNCE: mdadm 3.3.2 - A tool for managing md Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.3.2
-
-It is available at the usual places:
-   http://www.kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://github.com/neilbrown/mdadm
-   git://neil.brown.name/mdadm
-   http://git.neil.brown.name/git/mdadm.git
-
-Changes since 3.3.1 are mostly little bugfixes and some man-page
-updates.
-
-NeilBrown 21st August 2014
diff --git a/ANNOUNCE-3.3.3 b/ANNOUNCE-3.3.3
deleted file mode 100644 (file)
index ac1b217..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-Subject: ANNOUNCE: mdadm 3.3.3 - A tool for managing md Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.3.3
-
-It is available at the usual places:
-   http://www.kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://github.com/neilbrown/mdadm
-   git://neil.brown.name/mdadm
-   http://git.neil.brown.name/git/mdadm.git
-
-The 100 changes since 3.3.3 are mostly little bugfixes and some improvements
-to the selftests.
-raid6check now handle all RAID6 layouts including DDF correctly.
-See git log for the rest.
-
-NeilBrown 24th July 2015
diff --git a/ANNOUNCE-3.3.4 b/ANNOUNCE-3.3.4
deleted file mode 100644 (file)
index 52b9456..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-Subject: ANNOUNCE: mdadm 3.3.4 - A tool for managing md Soft RAID under Linux
-
-I am somewhat disappointed to have to announce the availability of
-   mdadm version 3.3.4
-
-It is available at the usual places:
-   http://www.kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://github.com/neilbrown/mdadm
-   git://neil.brown.name/mdadm
-   http://git.neil.brown.name/git/mdadm.git
-
-In mdadm-3.3 a change was made to how IMSM (Intel Matrix Storage
-Manager) metadata was handled.  Previously an IMSM array would only
-be assembled if it was attached to an IMSM controller.
-
-In 3.3 this was relaxed as there are circumstances where the
-controller is not properly detected.  Unfortunately this has negative
-consequences which have only just come to light.
-
-If you have an IMSM RAID1 configured and then disable RAID in the
-BIOS, the metadata will remain on the devices.  If you then install
-some other OS on one device and then install Linux on the other, Linux
-might eventually start noticing the IMSM metadata (depending a bit on whether
-mdadm is included in the initramfs) and might start up the RAID1.  This could
-copy one device over the other, thus trashing one of the installations.
-
-Not good.
-
-So with this release IMSM arrays will only be assembled if attached to
-an IMSM controller, or if "--force" is given to --assemble, or if the
-environment variable IMSM_NO_PLATFORM is set (used primarily for
-testing).
-
-I strongly recommend upgrading to 3.3.4 if you are using 3.3 or later.
-
-NeilBrown 3rd August 2015.
diff --git a/ANNOUNCE-3.4 b/ANNOUNCE-3.4
deleted file mode 100644 (file)
index 2689732..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-Subject: ANNOUNCE: mdadm 3.4 - A tool for managing md Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 3.4
-
-It is available at the usual places:
-   http://www.kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://github.com/neilbrown/mdadm
-   git://neil.brown.name/mdadm
-   http://git.neil.brown.name/git/mdadm
-
-The new second-level version number reflects significant new
-functionality, particular support for journalled RAID5/6 and clustered
-RAID1.  This new support is probably still buggy.  Please report bugs.
-
-There are also a number of fixes for Intel's IMSM metadata support,
-and an assortment of minor bug fixes.
-
-I plan for this to be the last release of mdadm that I provide as I am
-retiring from MD and mdadm maintenance.  Jes Sorensen has volunteered
-to oversee mdadm for the next while.  Thanks Jes!
-
-NeilBrown 28th January 2016
diff --git a/ANNOUNCE-4.0 b/ANNOUNCE-4.0
deleted file mode 100644 (file)
index f79c540..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-Subject: ANNOUNCE: mdadm 4.0 - A tool for managing md Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 4.0
-
-It is available at the usual places:
-   http://www.kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://git.kernel.org/pub/scm/utils/mdadm/mdadm.git
-   http://git.kernel.org/cgit/utils/mdadm/
-
-The update in major version number primarily indicates this is a
-release by it's new maintainer. In addition it contains a large number
-of fixes in particular for IMSM RAID and clustered RAID support.  In
-addition this release includes support for IMSM 4k sector drives,
-failfast and better documentation for journaled RAID.
-
-This is my first release of mdadm. Please thank Neil Brown for his
-previous work as maintainer and blame me for all the bugs I caused
-since taking over.
-
-Jes Sorensen, 2017-01-09
diff --git a/ANNOUNCE-4.1 b/ANNOUNCE-4.1
deleted file mode 100644 (file)
index a273b9a..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-Subject: ANNOUNCE: mdadm 4.1 - A tool for managing md Soft RAID under Linux
-
-I am pleased to announce the availability of
-   mdadm version 4.1
-
-It is available at the usual places:
-   http://www.kernel.org/pub/linux/utils/raid/mdadm/
-and via git at
-   git://git.kernel.org/pub/scm/utils/mdadm/mdadm.git
-   http://git.kernel.org/cgit/utils/mdadm/
-
-The update constitutes more than one year of enhancements and bug fixes
-including for IMSM RAID, Partial Parity Log, clustered RAID support,
-improved testing, and gcc-8 support.
-
-Jes Sorensen, 2018-10-01
index 6b5a7c8ef7806d4305eef2f986c6457ae8ab8ebf..f5e9ab1f085304608ab852645b4d3327059daa92 100644 (file)
 #include       "mdadm.h"
 #include       <ctype.h>
 
+mapping_t assemble_statuses[] = {
+       { "but cannot be started", INCR_NO },
+       { "but not safe to start", INCR_UNSAFE },
+       { "and started", INCR_YES },
+       { NULL, INCR_ALREADY }
+};
+
+
+/**
+ * struct assembly_array_info - General, meaningful information for assembly.
+ * @name: Array name.
+ * @new_cnt: Count of drives known to be members, recently added.
+ * @preexist_cnt: Count of member drives in pre-assembled array.
+ * @exp_cnt: Count of known expansion targets.
+ *
+ * FIXME: @exp_new_cnt for recently added expansion targets.
+ */
+struct assembly_array_info {
+       char *name;
+       int new_cnt;
+       int preexist_cnt;
+       int exp_cnt;
+};
+
+/**
+ * set_array_assembly_status() - generate status of assembly for an array.
+ * @c: Global settings.
+ * @result: Pointer to status mask.
+ * @status: Status to be set/printed.
+ * @arr: Array information.
+ *
+ *  Print status message to user or set it in @result if it is not NULL.
+ */
+static void set_array_assembly_status(struct context *c,
+                                  int *result, int status,
+                                  struct assembly_array_info *arr)
+{
+       int raid_disks = arr->preexist_cnt + arr->new_cnt;
+       char *status_msg = map_num_s(assemble_statuses, status);
+
+       if (c->export && result)
+               *result |= status;
+
+       if (c->export || c->verbose < 0)
+               return;
+
+       pr_err("%s has been assembled with %d device%s", arr->name,
+              raid_disks, raid_disks == 1 ? "":"s");
+       if (arr->preexist_cnt > 0)
+               fprintf(stderr, " (%d new)", arr->new_cnt);
+       if (arr->exp_cnt)
+               fprintf(stderr, " ( + %d for expansion)", arr->exp_cnt);
+       fprintf(stderr, " %s.\n", status_msg);
+}
+
 static int name_matches(char *found, char *required, char *homehost, int require_homehost)
 {
        /* See if the name found matches the required name, possibly
@@ -80,17 +135,17 @@ static int ident_matches(struct mddev_ident *ident,
                         struct mdinfo *content,
                         struct supertype *tst,
                         char *homehost, int require_homehost,
-                        char *update, char *devname)
+                        enum update_opt update, char *devname)
 {
 
-       if (ident->uuid_set && (!update || strcmp(update, "uuid")!= 0) &&
+       if (ident->uuid_set && update != UOPT_UUID &&
            same_uuid(content->uuid, ident->uuid, tst->ss->swapuuid)==0 &&
            memcmp(content->uuid, uuid_zero, sizeof(int[4])) != 0) {
                if (devname)
                        pr_err("%s has wrong uuid.\n", devname);
                return 0;
        }
-       if (ident->name[0] && (!update || strcmp(update, "name")!= 0) &&
+       if (ident->name[0] && update != UOPT_NAME &&
            name_matches(content->name, ident->name, homehost, require_homehost)==0) {
                if (devname)
                        pr_err("%s has wrong name.\n", devname);
@@ -269,11 +324,16 @@ static int select_devices(struct mddev_dev *devlist,
                        if (auto_assem || !inargv)
                                /* Ignore unrecognised devices during auto-assembly */
                                goto loop;
-                       if (ident->uuid_set || ident->name[0] ||
+                       if (ident->name[0] ||
                            ident->super_minor != UnSet)
                                /* Ignore unrecognised device if looking for
                                 * specific array */
                                goto loop;
+                       if (ident->uuid_set)
+                               /* ignore unrecognized device if looking for
+                                * specific uuid
+                                */
+                               goto loop;
 
                        pr_err("%s has no superblock - assembly aborted\n",
                               devname);
@@ -281,8 +341,10 @@ static int select_devices(struct mddev_dev *devlist,
                                st->ss->free_super(st);
                        dev_policy_free(pol);
                        domain_free(domains);
-                       if (tst)
+                       if (tst) {
                                tst->ss->free_super(tst);
+                               free(tst);
+                       }
                        return -1;
                }
 
@@ -357,6 +419,7 @@ static int select_devices(struct mddev_dev *devlist,
                                st->ss->free_super(st);
                                dev_policy_free(pol);
                                domain_free(domains);
+                               free(st);
                                return -1;
                        }
                        if (c->verbose > 0)
@@ -435,7 +498,7 @@ static int select_devices(struct mddev_dev *devlist,
 
                        if (st->ss != tst->ss ||
                            st->minor_version != tst->minor_version ||
-                           st->ss->compare_super(st, tst) != 0) {
+                           st->ss->compare_super(st, tst, 1) != 0) {
                                /* Some mismatch. If exactly one array matches this host,
                                 * we can resolve on that one.
                                 * Or, if we are auto assembling, we just ignore the second
@@ -473,6 +536,7 @@ static int select_devices(struct mddev_dev *devlist,
                                st->ss->free_super(st);
                                dev_policy_free(pol);
                                domain_free(domains);
+                               free(tst);
                                return -1;
                        }
                        tmpdev->used = 1;
@@ -486,8 +550,10 @@ static int select_devices(struct mddev_dev *devlist,
                }
                dev_policy_free(pol);
                pol = NULL;
-               if (tst)
+               if (tst) {
                        tst->ss->free_super(tst);
+                       free(tst);
+               }
        }
 
        /* Check if we found some imsm spares but no members */
@@ -507,8 +573,7 @@ static int select_devices(struct mddev_dev *devlist,
                                if (dfd < 0 ||
                                    st->ss->load_super(st, dfd, NULL))
                                        tmpdev->used = 2;
-                               if (dfd > 0)
-                                       close(dfd);
+                               close_fd(&dfd);
                        }
                }
 
@@ -589,11 +654,10 @@ static int load_devices(struct devs *devices, char *devmap,
                        int err;
                        fstat(mdfd, &stb2);
 
-                       if (strcmp(c->update, "uuid") == 0 && !ident->uuid_set)
+                       if (c->update == UOPT_UUID && !ident->uuid_set)
                                random_uuid((__u8 *)ident->uuid);
 
-                       if (strcmp(c->update, "ppl") == 0 &&
-                           ident->bitmap_fd >= 0) {
+                       if (c->update == UOPT_PPL && ident->bitmap_fd >= 0) {
                                pr_err("PPL is not compatible with bitmap\n");
                                close(mdfd);
                                free(devices);
@@ -625,30 +689,30 @@ static int load_devices(struct devs *devices, char *devmap,
                        strcpy(content->name, ident->name);
                        content->array.md_minor = minor(stb2.st_rdev);
 
-                       if (strcmp(c->update, "byteorder") == 0)
+                       if (c->update == UOPT_BYTEORDER)
                                err = 0;
-                       else if (strcmp(c->update, "home-cluster") == 0) {
+                       else if (c->update == UOPT_HOME_CLUSTER) {
                                tst->cluster_name = c->homecluster;
                                err = tst->ss->write_bitmap(tst, dfd, NameUpdate);
-                       } else if (strcmp(c->update, "nodes") == 0) {
+                       } else if (c->update == UOPT_NODES) {
                                tst->nodes = c->nodes;
                                err = tst->ss->write_bitmap(tst, dfd, NodeNumUpdate);
-                       } else if (strcmp(c->update, "revert-reshape") == 0 &&
-                                  c->invalid_backup)
+                       } else if (c->update == UOPT_REVERT_RESHAPE && c->invalid_backup)
                                err = tst->ss->update_super(tst, content,
-                                                           "revert-reshape-nobackup",
+                                                           UOPT_SPEC_REVERT_RESHAPE_NOBACKUP,
                                                            devname, c->verbose,
                                                            ident->uuid_set,
                                                            c->homehost);
                        else
-                               err = tst->ss->update_super(tst, content, c->update,
+                               err = tst->ss->update_super(tst, content,
+                                                           c->update,
                                                            devname, c->verbose,
                                                            ident->uuid_set,
                                                            c->homehost);
                        if (err < 0) {
                                if (err == -1)
                                        pr_err("--update=%s not understood for %s metadata\n",
-                                              c->update, tst->ss->name);
+                                              map_num(update_options, c->update), tst->ss->name);
                                tst->ss->free_super(tst);
                                free(tst);
                                close(mdfd);
@@ -658,7 +722,7 @@ static int load_devices(struct devs *devices, char *devmap,
                                *stp = st;
                                return -1;
                        }
-                       if (strcmp(c->update, "uuid")==0 &&
+                       if (c->update == UOPT_UUID &&
                            !ident->uuid_set) {
                                ident->uuid_set = 1;
                                memcpy(ident->uuid, content->uuid, 16);
@@ -667,7 +731,7 @@ static int load_devices(struct devs *devices, char *devmap,
                                pr_err("Could not re-write superblock on %s.\n",
                                       devname);
 
-                       if (strcmp(c->update, "uuid")==0 &&
+                       if (c->update == UOPT_UUID &&
                            ident->bitmap_fd >= 0 && !bitmap_done) {
                                if (bitmap_update_uuid(ident->bitmap_fd,
                                                       content->uuid,
@@ -781,6 +845,7 @@ static int load_devices(struct devs *devices, char *devmap,
                                close(mdfd);
                                free(devices);
                                free(devmap);
+                               free(best);
                                *stp = st;
                                return -1;
                        }
@@ -847,8 +912,7 @@ static int force_array(struct mdinfo *content,
                                 * devices in RAID4 or last devices in RAID4/5/6.
                                 */
                                delta = devices[j].i.delta_disks;
-                               if (devices[j].i.array.level >= 4 &&
-                                   devices[j].i.array.level <= 6 &&
+                               if (is_level456(devices[j].i.array.level) &&
                                    i/2 >= content->array.raid_disks - delta)
                                        /* OK */;
                                else if (devices[j].i.array.level == 4 &&
@@ -902,7 +966,7 @@ static int force_array(struct mdinfo *content,
                        continue;
                }
                content->events = devices[most_recent].i.events;
-               tst->ss->update_super(tst, content, "force-one",
+               tst->ss->update_super(tst, content, UOPT_SPEC_FORCE_ONE,
                                      devices[chosen_drive].devname, c->verbose,
                                      0, NULL);
 
@@ -1062,7 +1126,8 @@ static int start_array(int mdfd,
                               i/2, mddev);
        }
 
-       if (content->array.level == LEVEL_CONTAINER) {
+       if (is_container(content->array.level)) {
+               sysfs_rules_apply(mddev, content);
                if (c->verbose >= 0) {
                        pr_err("Container %s has been assembled with %d drive%s",
                               mddev, okcnt + sparecnt + journalcnt,
@@ -1070,10 +1135,8 @@ static int start_array(int mdfd,
                        if (okcnt < (unsigned)content->array.raid_disks)
                                fprintf(stderr, " (out of %d)\n",
                                        content->array.raid_disks);
-                       else {
+                       else
                                fprintf(stderr, "\n");
-                               sysfs_rules_apply(mddev, content);
-                       }
                }
 
                if (st->ss->validate_container) {
@@ -1127,17 +1190,14 @@ static int start_array(int mdfd,
                                pr_err("%s: Need a backup file to complete reshape of this array.\n",
                                       mddev);
                                pr_err("Please provided one with \"--backup-file=...\"\n");
-                               if (c->update &&
-                                   strcmp(c->update, "revert-reshape") == 0)
+                               if (c->update == UOPT_REVERT_RESHAPE)
                                        pr_err("(Don't specify --update=revert-reshape again, that part succeeded.)\n");
                                return 1;
                        }
                        rv = sysfs_set_str(content, NULL,
                                           "array_state", "readonly");
                        if (rv == 0)
-                               rv = Grow_continue(mdfd, st, content,
-                                                  c->backup_file, 0,
-                                                  c->freeze_reshape);
+                               rv = Grow_continue(mdfd, st, content, 0, c);
                } else if (c->readonly &&
                           sysfs_attribute_available(content, NULL,
                                                     "array_state")) {
@@ -1168,8 +1228,7 @@ static int start_array(int mdfd,
                                fprintf(stderr, ".\n");
                        }
                        if (content->reshape_active &&
-                           content->array.level >= 4 &&
-                           content->array.level <= 6) {
+                           is_level456(content->array.level)) {
                                /* might need to increase the size
                                 * of the stripe cache - default is 256
                                 */
@@ -1427,7 +1486,7 @@ try_again:
         */
        if (map_lock(&map))
                pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
-       if (c->update && strcmp(c->update,"uuid") == 0)
+       if (c->update == UOPT_UUID)
                mp = NULL;
        else
                mp = map_by_uuid(&map, content->uuid);
@@ -1482,6 +1541,10 @@ try_again:
                                name = content->name;
                        break;
                }
+               if (mddev && map_by_name(&map, mddev) != NULL) {
+                       pr_err("Cannot create device with %s because is in use\n", mddev);
+                       goto out;
+               }
                if (!auto_assem)
                        /* If the array is listed in mdadm.conf or on
                         * command line, then we trust the name
@@ -1489,8 +1552,7 @@ try_again:
                         */
                        trustworthy = LOCAL;
 
-               if (name[0] == 0 &&
-                   content->array.level == LEVEL_CONTAINER) {
+               if (!name[0] && is_container(content->array.level)) {
                        name = content->text_version;
                        trustworthy = METADATA;
                }
@@ -1571,7 +1633,7 @@ try_again:
                goto out;
        }
 
-       if (c->update && strcmp(c->update, "byteorder")==0)
+       if (c->update == UOPT_BYTEORDER)
                st->minor_version = 90;
 
        st->ss->getinfo_super(st, content, NULL);
@@ -1729,7 +1791,7 @@ try_again:
                if (!(devices[j].i.array.state & 1))
                        clean = 0;
 
-               if (st->ss->update_super(st, &devices[j].i, "assemble", NULL,
+               if (st->ss->update_super(st, &devices[j].i, UOPT_SPEC_ASSEMBLE, NULL,
                                         c->verbose, 0, NULL)) {
                        if (c->force) {
                                if (c->verbose >= 0)
@@ -1742,18 +1804,11 @@ try_again:
                                               i, mddev, devices[j].devname);
                        }
                }
-#if 0
-               if (!(super.disks[i].i.disk.state & (1 << MD_DISK_FAULTY))) {
-                       pr_err("devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
-                              i, mddev);
-               }
-#endif
        }
-       if (c->force && !clean &&
+       if (c->force && !clean && !is_container(content->array.level) &&
            !enough(content->array.level, content->array.raid_disks,
-                   content->array.layout, clean,
-                   avail)) {
-               change += st->ss->update_super(st, content, "force-array",
+                   content->array.layout, clean, avail)) {
+               change += st->ss->update_super(st, content, UOPT_SPEC_FORCE_ARRAY,
                                               devices[chosen_drive].devname, c->verbose,
                                               0, NULL);
                was_forced = 1;
@@ -1840,7 +1895,7 @@ try_again:
        /* First, fill in the map, so that udev can find our name
         * as soon as we become active.
         */
-       if (c->update && strcmp(c->update, "metadata")==0) {
+       if (c->update == UOPT_METADATA) {
                content->array.major_version = 1;
                content->array.minor_version = 0;
                strcpy(content->text_version, "1.0");
@@ -1887,13 +1942,14 @@ out:
                                                break;
                                        close(mdfd);
                                }
-                               usleep(usecs);
+                               sleep_for(0, USEC_TO_NSEC(usecs), true);
                                usecs <<= 1;
                        }
                }
        } else if (mdfd >= 0)
                close(mdfd);
 
+       free(best);
        /* '2' means 'OK, but not started yet' */
        if (rv == -1) {
                free(devices);
@@ -1907,12 +1963,13 @@ int assemble_container_content(struct supertype *st, int mdfd,
                               char *chosen_name, int *result)
 {
        struct mdinfo *dev, *sra, *dev2;
-       int working = 0, preexist = 0;
-       int expansion = 0;
+       struct assembly_array_info array = {chosen_name, 0, 0, 0};
        int old_raid_disks;
        int start_reshape;
        char *avail;
        int err;
+       int is_clean, all_disks;
+       bool is_raid456;
 
        if (sysfs_init(content, mdfd, NULL)) {
                pr_err("Unable to initialize sysfs\n");
@@ -1920,13 +1977,16 @@ int assemble_container_content(struct supertype *st, int mdfd,
        }
 
        sra = sysfs_read(mdfd, NULL, GET_VERSION|GET_DEVS);
-       if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) {
-               if (content->array.major_version == -1 &&
-                   content->array.minor_version == -2 &&
-                   c->readonly &&
-                   content->text_version[0] == '/')
-                       content->text_version[0] = '-';
-               if (sysfs_set_array(content, 9003) != 0) {
+       if (sra == NULL) {
+               pr_err("Failed to read sysfs parameters\n");
+               return 1;
+       }
+
+       /* Fill sysfs properties only if they are not set. Determine it by checking text_version
+        * and ignoring special character on the first place.
+        */
+       if (strcmp(sra->text_version + 1, content->text_version + 1) != 0) {
+               if (sysfs_set_array(content) != 0) {
                        sysfs_free(sra);
                        return 1;
                }
@@ -1953,8 +2013,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
                if (dev)
                        continue;
                /* Don't want this one any more */
-               if (sysfs_set_str(sra, dev2, "slot", "none") < 0 &&
-                   errno == EBUSY) {
+               if (sysfs_set_str(sra, dev2, "slot", STR_COMMON_NONE) < 0 && errno == EBUSY) {
                        pr_err("Cannot remove old device %s: not updating %s\n", dev2->sys_name, sra->sys_name);
                        sysfs_free(sra);
                        return 1;
@@ -1969,17 +2028,16 @@ int assemble_container_content(struct supertype *st, int mdfd,
                if (sysfs_add_disk(content, dev, 1) == 0) {
                        if (dev->disk.raid_disk >= old_raid_disks &&
                            content->reshape_active)
-                               expansion++;
+                               array.exp_cnt++;
                        else
-                               working++;
+                               array.new_cnt++;
                } else if (errno == EEXIST)
-                       preexist++;
+                       array.preexist_cnt++;
        }
        sysfs_free(sra);
-       if (working + expansion == 0 && c->runstop <= 0) {
-               free(avail);
-               return 1;/* Nothing new, don't try to start */
-       }
+
+       all_disks = array.new_cnt + array.exp_cnt + array.preexist_cnt;
+
        map_update(NULL, fd2devnm(mdfd), content->text_version,
                   content->uuid, chosen_name);
 
@@ -2030,46 +2088,65 @@ int assemble_container_content(struct supertype *st, int mdfd,
                        free(avail);
                        return err;
                }
+       } else if (c->force) {
+               /* Set the array as 'clean' so that we can proceed with starting
+                * it even if we don't have all devices. Mdmon doesn't care
+                * if the dirty flag is set in metadata, it will start managing
+                * it anyway.
+                * This is really important for raid456 (RWH case), other levels
+                * are started anyway.
+                */
+               content->array.state |= 1;
        }
 
+       is_raid456 = is_level456(content->array.level);
+       is_clean = content->array.state & 1;
+
        if (enough(content->array.level, content->array.raid_disks,
-                  content->array.layout, content->array.state & 1, avail) == 0) {
-               if (c->export && result)
-                       *result |= INCR_NO;
-               else if (c->verbose >= 0) {
-                       pr_err("%s assembled with %d device%s",
-                              chosen_name, preexist + working,
-                              preexist + working == 1 ? "":"s");
-                       if (preexist)
-                               fprintf(stderr, " (%d new)", working);
-                       fprintf(stderr, " but not started\n");
-               }
+                  content->array.layout, is_clean, avail) == 0) {
+               set_array_assembly_status(c, result, INCR_NO, &array);
+
+               if (c->verbose >= 0 && is_raid456 && !is_clean)
+                       pr_err("Consider --force to start dirty degraded array\n");
+
                free(avail);
                return 1;
        }
        free(avail);
 
-       if (c->runstop <= 0 &&
-           (working + preexist + expansion) <
-           content->array.working_disks) {
-               if (c->export && result)
-                       *result |= INCR_UNSAFE;
-               else if (c->verbose >= 0) {
-                       pr_err("%s assembled with %d device%s",
-                              chosen_name, preexist + working,
-                              preexist + working == 1 ? "":"s");
-                       if (preexist)
-                               fprintf(stderr, " (%d new)", working);
-                       fprintf(stderr, " but not safe to start\n");
-               }
+       if (c->runstop <= 0 && all_disks < content->array.working_disks) {
+
+               set_array_assembly_status(c, result, INCR_UNSAFE, &array);
+
+               if (c->verbose >= 0 && c->force)
+                       pr_err("Consider --run to start array as degraded.\n");
                return 1;
        }
 
+       if (is_raid456 && content->resync_start != MaxSector && c->force &&
+           all_disks < content->array.raid_disks) {
+
+               content->resync_start = MaxSector;
+               err = sysfs_set_num(content, NULL, "resync_start", MaxSector);
+               if (err)
+                       return 1;
+
+               pr_err("%s array state forced to clean. It may cause data corruption.\n",
+                      chosen_name);
+       }
+
+       /*
+        * Before activating the array, perform extra steps required
+        * to configure the internal write-intent bitmap.
+        */
+       if (content->consistency_policy == CONSISTENCY_POLICY_BITMAP &&
+           st->ss->set_bitmap)
+               st->ss->set_bitmap(st, content);
 
        if (start_reshape) {
-               int spare = content->array.raid_disks + expansion;
+               int spare = content->array.raid_disks + array.exp_cnt;
                if (restore_backup(st, content,
-                                  working,
+                                  array.new_cnt,
                                   spare, &c->backup_file, c->verbose) == 1)
                        return 1;
 
@@ -2101,8 +2178,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
                                st->update_tail = &st->updates;
                }
 
-               err = Grow_continue(mdfd, st, content, c->backup_file,
-                                   0, c->freeze_reshape);
+               err = Grow_continue(mdfd, st, content, 0, c);
        } else switch(content->array.level) {
                case LEVEL_LINEAR:
                case LEVEL_MULTIPATH:
@@ -2132,31 +2208,14 @@ int assemble_container_content(struct supertype *st, int mdfd,
            !start_reshape)
                block_subarray(content);
 
-       if (c->export && result) {
-               if (err)
-                       *result |= INCR_NO;
-               else
-                       *result |= INCR_YES;
-       } else if (c->verbose >= 0) {
-               if (err)
-                       pr_err("array %s now has %d device%s",
-                              chosen_name, working + preexist,
-                              working + preexist == 1 ? "":"s");
-               else {
-                       sysfs_rules_apply(chosen_name, content);
-                       pr_err("Started %s with %d device%s",
-                              chosen_name, working + preexist,
-                              working + preexist == 1 ? "":"s");
-               }
-               if (preexist)
-                       fprintf(stderr, " (%d new)", working);
-               if (expansion)
-                       fprintf(stderr, " ( + %d for expansion)",
-                               expansion);
-               fprintf(stderr, "\n");
-       }
-       if (!err)
+       if (err)
+               set_array_assembly_status(c, result, INCR_NO, &array);
+       else {
+               set_array_assembly_status(c, result, INCR_YES, &array);
                wait_for(chosen_name, mdfd);
+               sysfs_rules_apply(chosen_name, content);
+       }
+
        return err;
        /* FIXME should have an O_EXCL and wait for read-auto */
 }
diff --git a/Build.c b/Build.c
index 962c2e3776b91ccb15135b9c0cff91f322a8c9fe..1be90e418ad1f1e6d9890e19c94a15cac93d24f7 100644 (file)
--- a/Build.c
+++ b/Build.c
@@ -24,8 +24,8 @@
 
 #include "mdadm.h"
 
-int Build(char *mddev, struct mddev_dev *devlist,
-         struct shape *s, struct context *c)
+int Build(struct mddev_ident *ident, struct mddev_dev *devlist, struct shape *s,
+         struct context *c)
 {
        /* Build a linear or raid0 arrays without superblocks
         * We cannot really do any checks, we just do it.
@@ -71,40 +71,18 @@ int Build(char *mddev, struct mddev_dev *devlist,
        }
 
        if (s->layout == UnSet)
-               switch(s->level) {
-               default: /* no layout */
-                       s->layout = 0;
-                       break;
-               case 10:
-                       s->layout = 0x102; /* near=2, far=1 */
-                       if (c->verbose > 0)
-                               pr_err("layout defaults to n1\n");
-                       break;
-               case 5:
-               case 6:
-                       s->layout = map_name(r5layout, "default");
-                       if (c->verbose > 0)
-                               pr_err("layout defaults to %s\n", map_num(r5layout, s->layout));
-                       break;
-               case LEVEL_FAULTY:
-                       s->layout = map_name(faultylayout, "default");
-
-                       if (c->verbose > 0)
-                               pr_err("layout defaults to %s\n", map_num(faultylayout, s->layout));
-                       break;
-               }
+               s->layout = default_layout(NULL, s->level, c->verbose);
 
        /* We need to create the device.  It can have no name. */
        map_lock(&map);
-       mdfd = create_mddev(mddev, NULL, c->autof, LOCAL,
+       mdfd = create_mddev(ident->devname, NULL, c->autof, LOCAL,
                            chosen_name, 0);
        if (mdfd < 0) {
                map_unlock(&map);
                return 1;
        }
-       mddev = chosen_name;
 
-       map_update(&map, fd2devnm(mdfd), "none", uuid, chosen_name);
+       map_update(&map, fd2devnm(mdfd), STR_COMMON_NONE, uuid, chosen_name);
        map_unlock(&map);
 
        array.level = s->level;
@@ -114,7 +92,7 @@ int Build(char *mddev, struct mddev_dev *devlist,
        array.nr_disks = s->raiddisks;
        array.raid_disks = s->raiddisks;
        array.md_minor = 0;
-       if (fstat_is_blkdev(mdfd, mddev, &rdev))
+       if (fstat_is_blkdev(mdfd, chosen_name, &rdev))
                array.md_minor = minor(rdev);
        array.not_persistent = 1;
        array.state = 0; /* not clean, but no errors */
@@ -129,12 +107,11 @@ int Build(char *mddev, struct mddev_dev *devlist,
        array.chunk_size = s->chunk*1024;
        array.layout = s->layout;
        if (md_set_array_info(mdfd, &array)) {
-               pr_err("md_set_array_info() failed for %s: %s\n",
-                      mddev, strerror(errno));
+               pr_err("md_set_array_info() failed for %s: %s\n", chosen_name, strerror(errno));
                goto abort;
        }
 
-       if (s->bitmap_file && strcmp(s->bitmap_file, "none") == 0)
+       if (s->bitmap_file && str_is_none(s->bitmap_file) == true)
                s->bitmap_file = NULL;
        if (s->bitmap_file && s->level <= 0) {
                pr_err("bitmaps not meaningful with level %s\n",
@@ -179,12 +156,6 @@ int Build(char *mddev, struct mddev_dev *devlist,
                bitmap_fd = open(s->bitmap_file, O_RDWR);
                if (bitmap_fd < 0) {
                        int major = BITMAP_MAJOR_HI;
-#if 0
-                       if (s->bitmap_chunk == UnSet) {
-                               pr_err("%s cannot be opened.\n", s->bitmap_file);
-                               goto abort;
-                       }
-#endif
                        bitmapsize = s->size >> 9; /* FIXME wrong for RAID10 */
                        if (CreateBitmap(s->bitmap_file, 1, NULL,
                                         s->bitmap_chunk, c->delay,
@@ -199,8 +170,8 @@ int Build(char *mddev, struct mddev_dev *devlist,
                }
                if (bitmap_fd >= 0) {
                        if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
-                               pr_err("Cannot set bitmap file for %s: %s\n",
-                                      mddev, strerror(errno));
+                               pr_err("Cannot set bitmap file for %s: %s\n", chosen_name,
+                                      strerror(errno));
                                goto abort;
                        }
                }
@@ -214,9 +185,8 @@ int Build(char *mddev, struct mddev_dev *devlist,
        }
 
        if (c->verbose >= 0)
-               pr_err("array %s built and started.\n",
-                       mddev);
-       wait_for(mddev, mdfd);
+               pr_err("array %s built and started.\n", chosen_name);
+       wait_for(chosen_name, mdfd);
        close(mdfd);
        return 0;
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644 (file)
index 0000000..c1997ba
--- /dev/null
@@ -0,0 +1,368 @@
+# Release [mdadm-4.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-4.3)
+
+Features:
+- **IMSM_NO_PLATFORM** boot parameter support from Neil Brown.
+- **--write-zeros** option support by Logan Gunthorpe.
+- **IMSM** monetization by VMD register from Mateusz Grzonka.
+- RST SATA under VMD support from Kevin Friedberg.
+- Strong name rules from Mariusz Tkaczyk.
+
+Fixes:
+- Unify failed raid behavior from Coly Li.
+- Rework of **--update** options from Mateusz Kusiak.
+- **mdmon-initrd** service from Neil Brown.
+- **IMSM** expand functionality rework from Mariusz Tkaczyk.
+- Mdmonitor improvements from Mateusz Grzonka.
+- Failed state verification from Mateusz Kusiak and Kinga Tanska.
+
+# Release [mdadm-4.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-4.2)
+
+The release includes more than two years of development and bugfixes, so it is difficult to
+remember everything. Highlights include enhancements and bug fixes including for **IMSM** RAID,
+Partial Parity Log, clustered RAID support, improved testing, and gcc-9 support.
+
+# Release [mdadm-4.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-4.1)
+
+The update constitutes more than one year of enhancements and bug fixes including for **IMSM**
+RAID, Partial Parity Log, clustered RAID support, improved testing, and gcc-8 support.
+
+# Release [mdadm-4.0](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-4.0)
+
+The update in major version number primarily indicates this is a release by it's new maintainer.
+In addition it contains a large number of fixes in particular for IMSM RAID and clustered RAID
+support. In addition, this release includes support for IMSM 4k sector drives, failfast and better
+documentation for journaled RAID.
+
+This is my first release of mdadm. Please thank Neil Brown for his previous work as maintainer and
+blame me for all the bugs I caused since taking over.
+
+# Release [mdadm-3.4](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.4)
+
+- Support for journalled RAID5/6 and clustered RAID1. This new support is probably still buggy.
+  Please report bugs.
+
+- There are also a number of fixes for **IMSM** support and an assortment of minor bug fixes.
+
+- I plan for this to be the last release of mdadm that I provide as I am retiring from MD and mdadm
+  maintenance. Jes Sorensen has volunteered to oversee mdadm for the next while. Thanks Jes!
+
+# Release [mdadm-3.3.4](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.3.4)
+
+**I strongly recommend upgrading to 3.3.4 if you are using 3.3 or later with IMSM.**
+
+- **IMSM** metadata assemble fixes.
+
+  In mdadm-3.3 a change was made to how **IMSM** metadata was handled. Previously an **IMSM** array
+  would only be assembled if it was attached to an **IMSM** controller. In 3.3 this was relaxed as
+  there are circumstances where the controller is not properly detected. Unfortunately, this has
+  negative consequences which have only just come to light.
+
+  If you have an IMSM RAID1 configured and then disable RAID in the BIOS, the metadata will remain
+  on the devices. If you then install some other OS on one device and then install Linux on the
+  other, Linux might eventually start noticing the IMSM metadata (depending a bit on whether
+  mdadm is included in the initramfs) and might start up the RAID1. This could copy one device over
+  the other, thus trashing one of the installations.
+
+  So, with this release IMSM arrays will only be assembled if attached to an **IMSM** controller,
+  or if **--force** is given to **--assemble**, or if the environment variable
+  **IMSM_NO_PLATFORM=1** is set (used primarily for testing).
+
+# Release [mdadm-3.3.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.3.3)
+
+- The 100 changes since 3.3.3 are mostly little bugfixes and some improvements to the self-tests.
+- raid6check now handle all RAID6 layouts including **DDF** correctly. See git log for the rest.
+
+# Release [mdadm-3.3.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.3.2)
+
+- Little bugfixes and some man-page updates.
+
+# Release [mdadm-3.3.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.3.1)
+
+- lots of work on **DDF** support.
+- Improved interactions with **systemd**. Where possible, background tasks are run from systemd
+  rather than forking.
+- Number of other little bug fixes too.
+
+# Release [mdadm-3.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.3)
+
+- Some array reshapes can proceed without needing backup file. This is done by changing the
+  data_offset* so we never need to write any data back over where it was before. If there is no
+  'head space' or 'tail space' to allow *data_offset* to change, the old mechanism with a backup
+  file can still be used.
+
+- RAID10 arrays can be reshaped to change the number of devices, change the chunk size, or change
+  the layout between *near* and *offset*.
+  This will always change *data_offset*, and will fail if there is no room for *data_offset* to be
+  moved.
+
+- **--assemble --update=metadata** can convert a **0.90** array to a **1.0** array.
+
+- **bad-block-logs** are supported (but not heavily tested yet).
+
+- **--assemble --update=revert-reshape** can be used to undo a reshape that has just been started
+  but isn't really wanted. This is very new and while it passes basic tests it cannot be
+  guaranteed.
+
+- improved locking between **--incremental** and **--assemble**.
+
+- uses systemd to run **mdmon** if systemd is configured to do that.
+- kernel names of md devices can be non-numeric. e.g. "md_home" rather than
+  "md0". This will probably confuse lots of other tools, so you need to
+  **echo CREATE names=yes >> /etc/mdadm.conf** or the feature will not be used (you also need a
+  reasonably new kernel).
+
+- **--stop** can be given a kernel name instead of a device name. i.e. **mdadm --stop md4** will
+  work even if /dev/md4 doesn't exist.
+
+- **--detail --export** has some information about the devices in the array.
+- **--dump** and **--restore** can be used to backup and restore the metadata on an array.
+- Hot-replace is supported with **mdadm /dev/mdX --replace /dev/foo** and
+  **mdadm /dev/mdX --replace /dev/foo --with /dev/bar**.
+
+- Config file can be a directory in which case all "*.conf" files are read in lexical order.
+  Default is to read **/etc/mdadm.conf** and then **/etc/mdadm.conf.d**. Thus
+  **echo CREATE name=yes > /etc/mdadm.conf.d/names.conf** will also enable the use of named md
+  devices.
+
+- Lots of improvements to **DDF** support including adding support for RAID10 (thanks Martin Wilck).
+
+# Release [mdadm-3.2.6](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.6)
+
+- There are no real stand-out fixes, just lots of little bits and pieces.
+
+# Release [mdadm-3.2.5](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.5)
+
+- This release primarily fixes a serious regression in 3.2.4. This regression does *not* cause
+  any risk to data. It simply means that adding a device with **--add** would sometime fail
+  when it should not.
+- The fix also includes a couple of minor fixes such as making the **--layout=preserve** option to
+  **--grow** work again.
+
+# Release [mdadm-3.2.4](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.4)
+
+ - **--offroot** argument to improve interactions between mdmon and initrd.
+ - **--prefer** argument to select which */dev* names to display in some circumstances.
+ - relax restrictions on when **--add** will be allowed.
+ - Fix bug with adding write-intent-bitmap to active array.
+ - Now defaults to */run/mdadm* for storing run-time files.
+
+# Release [mdadm-3.2.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.3)
+
+- The largest single area of change is support for reshape of Intel IMSM arrays (OnLine Capacity
+  Expansion and Level Migration).
+- Among other fixes, this now has a better chance of surviving if a device fails during reshape.
+
+# Release [mdadm-3.2.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.2)
+
+- reshaping IMSM (Intel metadata) arrays is no longer 'experimental', it should work properly and be
+  largely compatible with IMSM drivers in other platforms.
+- **--assume-clean** can be used with **--grow --size** to avoid resyncing the new part of the
+  array. This is only support with very new kernels.
+- RAID0 arrays can have chunksize which is not a power of 2. This has been supported in the kernel
+  for a while but is only now supported by mdadm.
+
+- A new tool **raid6check** is available, which can check a RAID6 array, or part of it and report
+  which device is most inconsistent with the others if any stripe is inconsistent. This is still
+  under development and does not have a man page yet. If anyone tries it out and has any questions
+  or experience to report, they would be most welcome on linux-raid@vger.kernel.org.
+
+# Release [mdadm-3.2.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.1)
+
+- Policy framework
+
+  Policy can be expressed for moving spare devices between arrays, and for how to handle hot-plugged
+  devices. This policy can be different for devices plugged in to different controllers etc. This,
+  for example, allows a configuration where when a device is plugged in it is immediately included
+  in an md array as a hot spare and possibly starts recovery immediately if an array is degraded.
+
+- Some understanding of mbr and gpt paritition tables. This is primarily to support the new
+  hot-plug support. If a device is plugged in and policy suggests it should have a partition table,
+  the partition table will be copied from a suitably similar device, and then the partitions will
+  hot-plug and can then be added to md arrays.
+
+- **--incremental --remove** can remember where a device was removed from so if a device gets
+  plugged back in the same place, special policy applies to it, allowing it to be included in an
+  array even if a general hotplug will not be included.
+
+- Enhanced reshape options, including growing a RAID0 by converting to RAID4, restriping, and
+  converting back. Also convertions between RAID0 and RAID10 and between RAID1 and RAID10 are
+  possible (with a suitably recent kernel).
+
+- Spare migration for IMSM arrays. Spare migration can now work across 'containers' using
+  non-native metadata and specifically Intel's IMSM arrays support spare migrations.
+
+- OLCE and level migration for Intel IMSM arrays. OnLine Capacity Expansion and level migration
+  (e.g. RAID0 -> RAID5) is supported for Intel Matrix Storage Manager arrays. This support is
+  currently *experimental* for technical reasons. It can be enabled with
+  **export MDADM_EXPERIMENTAL=1**.
+
+- avoid including wayward devices.
+
+  If you split a RAID1, mount the two halves as two separate degraded RAID1s, and then later bring
+  the two back together, it is possible that the md metadata won't properly show that one must
+  over-ride the other. Mdadm now does extra checking to detect this possibility and avoid
+  potentially corrupting data.
+
+- Remove any possible confusion between similar options. e.g. **--brief** and **--bitmap** were
+  mapped to 'b' and mdadm wouldn't notice if one was used where the other was expected.
+
+- Allow K,M,G suffixes on chunk sizes.
+
+# Release [mdadm-3.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2)
+
+- By far the most significant change in this release related to the management of reshaping arrays.
+  This code has been substantially re-written so that it can work with **externally managed
+  metadata** -Intel's IMSM in particular. We now support level migration and OnLine Capacity
+  Expansion on these arrays.
+
+- Various policy statements can be made in the *mdadm.conf* to guide the behavior of mdadm,
+  particular with regards to how new devices are treated by **--incremental**. Depending on the
+  *action* associated with a device (identified by its *path*) such need devices can be
+  automatically re-added to and existing array that they previously fell out off, or automatically
+  added as a spare if they appear to contain no data.
+
+- mdadm now has a limited understanding of partition tables. This allows the policy framework to
+  make decisions about partitioned devices as well.
+
+- **--incremental --remove** can be told what **--path** the device was on, and this info will be
+  recorded so that another device appearing at the same physical location can be preferentially
+  added to the same array (provides the spare-same-slot action policy applied to the path).
+
+- A new flags **--invalid-backup** flag is available in **--assemble** mode. This can be used to
+  re-assemble an array which was stopping in the middle of a reshape, and for which the
+  *backup file* is no longer available or is corrupted. The array may have some corruption in it
+  at the point where reshape was up to, but at least the rest of the array will become available.
+
+- Policy framework.
+- Various internal restructuring - more is needed.
+
+# Release [mdadm-3.1.5](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1.5)
+
+- Fixes for **v1.x** metadata on big-endian machines.
+- man page improvements.
+- Improve **--detail --export** when run on partitions of an md array.
+- Fix regression with removing *failed* or *detached* devices.
+- Fixes for **--assemble --force** in various unusual cases.
+- Allow **-Y** to mean **--export**. This was documented but not implemented.
+- Various fixes for handling **ddf** metadata. This is now more reliable but could benefit from
+  more interoperability testing.
+- Correctly list subarrays of a container in **--detail** output.
+- Improve checks on whether the requested number of devices is supported by the metadata, both for
+  **--create** and **--grow**.
+- Don't remove partitions from a device that is being included in an array until we are fully
+  committed to including it.
+- Allow **--assemble --update=no-bitmap** so an array with a corrupt bitmap can still be assembled.
+- Don't allow **--add** to succeed if it looks like a **--re-add** is probably wanted, but cannot
+  succeed. This avoids inadvertently turning devices into spares when an array is failed.
+
+# Release [mdadm-3.1.4](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1.4)
+
+Two fixes related to configs that aren't using udev:
+- Don't remove md devices which 'standard' names on **--stop**.
+- Allow dev_open to work on read-only */dev*.
+
+And fixed regressions:
+- Allow **--incremental** to add spares to an array.
+- Accept **--no-degraded** as a deprecated option rather than throwing an error.
+- Return correct success status when **--incremental** assembling a container which does not yet
+  have enough devices.
+- Don't link mdadm with pthreads, only mdmon needs it.
+- Fix compiler warning due to bad use of snprintf.
+
+# Release [mdadm-3.1.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1.3)
+
+- mapfile now lives in a fixed location which default to */dev/.mdadm/map*, but can be changed at
+  compile time. This location is chosen and most distros provide it during early boot and preserve
+  it through. As long a */dev* exists and is writable, */dev/.mdadm* will be created. Other files
+  communication with mdmon live here too. This fixes a bug reported by Debian and Gentoo users where
+  udev would spin in early-boot.
+
+- IMSM and DDF metadata will not be recognized on partitions as they should only be used on
+  whole-disks.
+
+- Various overflows causes by 2G drives have been addressed.
+
+- A subarray of an IMSM contain can now be killed with **--kill-subarray**. Also, subarrays can be
+  renamed with **--update-subarray --update=name**.
+
+- **-If** (or **--incremental --fail**) can be used from udev to fail and remove from all arrays
+  a device which has been unplugged from the system i.e. hot-unplug-support.
+
+- **/dev/mdX --re-add missing** will look for any device that looks like it should be a member of
+  */dev/mdX* but isn't and will automatically **--re-add** it.
+
+- Now compile with *-Wextra* to get extra warnings.
+- Lots of minor bug fixes, documentation improvements, etc.
+
+# Release [mdadm-3.1.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1.2)
+
+- The default metadata has change again (sorry about that). It is now **v1.2** and will hopefully
+  stay that way. It turned out there with boot-block issues with **v1.1** which make it unsuitable
+  for a default, though in many cases it is still suitable to use.
+
+- Add *homehost* to the valid words for the **AUTO** config file line. When followed by *-all*,
+  this causes mdadm to auto-assemble any array belonging to this host, but not auto-assemble
+  anything else.
+
+- VAR_RUN can be easily changed at compile time just like ALT_RUN. This gives distros more
+  flexibility in how to manage the pid and sock files that mdmon needs.
+
+- If mdadm.conf lists arrays which have inter-dependencies, the previously had to be listed in the
+  "right" order. Now, any order should work.
+
+- Fix some bugs with **--grow --chunksize=**.
+- Stopping a container is not permitted when members are still active.
+- Various mdmon fixes.
+- Alway make bitmap 4K-aligned if at all possible.
+- Fix **--force** assembly of **v1.x** arrays which are in the process of recovering.
+- Add section on 'scrubbing' to 'md' man page.
+- Various command-line-option parsing improvements.
+- ... and lots of other bug fixes.
+
+# Release [mdadm-3.1.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1.1)
+
+- Multiple fixes for new **--grow** levels including fixes for serious data corruption
+  problems.
+- Change default metadata to **v1.1**.
+- Change default chunk size to 512K.
+- Change default bitmap chunk size to 64MB.
+- When **--re-add** is used, don't fall back to **--add** as this can destroy data.
+
+# Release [mdadm-3.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1)
+
+- Support **--grow** to change the layout of RAID 4/5/6.
+- Support **--grow** to change the chunk size of RAID 4/5/6.
+- Support **--grow** to change level from RAID1 -> RAID5 -> RAID6 and back.
+- Support **--grow** to reduce the number of devices in RAID 4/5/6.
+- Support restart of these grow options which assembling an array which is partially grown.
+- Assorted tests of this code, and of different RAID6 layouts.
+
+# Release [mdadm-3.0.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.0.3)
+
+- Improvements for creating arrays giving just a name, like *foo*, rather than the full
+  */dev/md/foo*.
+- Improvements for assembling member arrays of containers.
+- Improvements to test suite.
+- Add option to change increment for *RebuildNN* messages reported by **--monitor**.
+- Improvements to **mdmon** hand-over from initrd to final root.
+- Handle merging of devices that have left an IMSM array and are being re-incorporated.
+- Add missing space in **--detail --brief** output.
+
+# Release [mdadm-3.0.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.0.2)
+
+- Fix crash when **homehost** is not set, as often happens in early boot.
+
+# Release [mdadm-3.0.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.0.1)
+
+- Fix various segfaults.
+- Fixed for **--examine** with containers.
+- Lots of other little fixes.
+
+# Release [mdadm-3.0](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.0)
+
+- Support for **externally managed metadata**, specifically DDF and IMSM.
+- Depend on udev to create entries in */dev*, rather than creating them ourselves.
+- Remove **--auto-update-home-hosts**.
+- New config file line **auto**.
+- New *ignore* and *any* options for **homehost**.
+- Numerous bug fixes and minor enhancements.
diff --git a/ChangeLog b/ChangeLog
deleted file mode 100644 (file)
index a3bf700..0000000
--- a/ChangeLog
+++ /dev/null
@@ -1,306 +0,0 @@
-Please see git logs for detailed change log.
-This file just contains highlight.
-
-Changes Prior to release 3.3
-- Some array reshapes can proceed without needing backup file.
-  This is done by changing the 'data_offset' so we never need to write
-  any data back over where it was before.  If there is no "head space"
-  or "tail space" to allow data_offset to change, the old mechanism
-  with a backup file can still be used.
-- RAID10 arrays can be reshaped to change the number of devices,
-  change the chunk size, or change the layout between 'near'
-  and 'offset'.
-  This will always change data_offset, and will fail if there is no
-  room for data_offset to be moved.
-- "--assemble --update=metadata" can convert a 0.90 array to a 1.0 array.
-- bad-block-logs are supported (but not heavily tested yet)
-- "--assemble --update=revert-reshape" can be used to undo a reshape
-  that has just been started but isn't really wanted.  This is very
-  new and while it passes basic tests it cannot be guaranteed.
-- improved locking between --incremental and --assemble
-- uses systemd to run "mdmon" if systemd is configured to do that.
-- kernel names of md devices can be non-numeric. e.g. "md_home" rather than
-  "md0".  This will probably confuse lots of other tools, so you need to
-       echo CREATE names=yes >> /etc/mdadm.conf
-  or the feature will not be used.  (you also need a reasonably new kernel).
-- "--stop" can be given a kernel name instead of a device name. i.e
-     mdadm --stop md4
-  will work even if /dev/md4 doesn't exist.
-- "--detail --export" has some information about the devices in the array
-- --dump and --restore can be used to backup and restore the metadata on an
-   array.
-- Hot-replace is supported with
-     mdadm /dev/mdX --replace /dev/foo
-  and
-     mdadm /dev/mdX --replace /dev/foo --with /dev/bar
-- Config file can be a directory in which case all "*.conf" files are
-  read in lexical order.
-  Default is to read /etc/mdadm.conf and then /etc/mdadm.conf.d
-  Thus
-      echo CREATE name=yes > /etc/mdadm.conf.d/names.conf
-  will also enable the use of named md devices.
-
-- Lots of improvements to DDF support including adding support for
-  RAID10 (thanks Martin Wilck).
-
-Changes Prior to release 3.2.6
-  - There are no real stand-out fixes, just lots of little bits and pieces.
-
-Changes Prior to release 3.2.5
-  - This release primarily fixes a serious regression in 3.2.4.
-    This regression does *not* cause any risk to data.  It simply
-    means that adding a device with "--add" would sometime fail
-    when it should not.
-
-  - The fix also includes a couple of minor fixes such as making
-    the "--layout=preserve" option to "--grow" work again.
-
-
-Changes Prior to release 3.2.4
-"--oneline" log of changes is below.  Some notable ones are:
-
- - --offroot argument to improve interactions between mdmon and initrd
- - --prefer argument to select which /dev names to display in some
-   circumstances.
- - relax restructions on when "--add" will be allowed
- - Fix bug with adding write-intent-bitmap to active array
- - Now defaults to "/run/mdadm" for storing run-time files.
-
-Changes Prior to release 3.2.3
-  - The largest single area of change is support for reshape of Intel
-    IMSM arrays (OnLine Capacity Explansion and Level Migration).
-  - Among other fixes, this now has a better chance of surviving if a
-    device fails during reshape.
-
-Changes Prior to release 3.2.2
-  - reshaping IMSM (Intel metadata) arrays is no longer 'experimental',
-    it should work properly and be largely compatible with IMSM drivers in
-    other platforms.
-  - --assume-clean can be used with --grow --size to avoid resyncing the
-    new part of the array.  This is only support with very new kernels.
-  - RAID0 arrays can have chunksize which is not a power of 2.  This has been
-    supported in the kernel for a while but is only now supprted by
-    mdadm.
-
-  - A new tool 'raid6check' is available which can check a RAID6 array,
-    or part of it, and report which device is most inconsistent with the
-    others if any stripe is inconsistent.   This is still under development
-    and does not have a man page yet.  If anyone tries it out and has any
-    questions or experience to report, they would be most welcome on
-    linux-raid@vger.kernel.org.
-
-Changes Prior to release 3.2.1
-  - policy framework
-     Policy can be expressed for moving spare devices between arrays, and
-     for how to handle hot-plugged devices.  This policy can be different
-     for devices plugged in to different controllers etc.
-     This, for example, allows a configuration where when a device is plugged
-     in it is immediately included in an md array as a hot spare and
-     possibly starts recovery immediately if an array is degraded.
-
-  - some understanding of mbr and gpt paritition tables
-     This is primarly to support the new hot-plug support.  If a
-     device is plugged in and policy suggests it should have a partition table,
-     the partition table will be copied from a suitably similar device, and
-     then the partitions will hot-plug and can then be added to md arrays.
-
-  - "--incremental --remove" can remember where a device was removed from
-    so if a device gets plugged back in the same place, special policy applies
-    to it, allowing it to be included in an array even if a general hotplug
-    will not be included.
-
-  - enhanced reshape options, including growing a RAID0 by converting to RAID4,
-    restriping, and converting back.  Also convertions between RAID0 and
-    RAID10 and between RAID1 and RAID10 are possible (with a suitably recent
-    kernel).
-
-  - spare migration for IMSM arrays.
-     Spare migration can now work across 'containers' using non-native metadata
-     and specifically Intel's IMSM arrays support spare migrations.
-
-  - OLCE and level migration for Intel IMSM arrays.
-     OnLine Capacity Expansion and level migration (e.g. RAID0 -> RAID5) is
-     supported for Intel Matrix Storage Manager arrays.
-     This support is currently 'experimental' for technical reasons.  It can
-     be enabled with "export MDADM_EXPERIMENTAL=1"
-
-  - avoid including wayward devices
-     If you split a RAID1, mount the two halves as two separate degraded RAID1s,
-     and then later bring the two back together, it is possible that the md
-     metadata won't properly show that one must over-ride the other.
-     mdadm now does extra checking to detect this possibilty and avoid
-     potentially corrupting data.
-
-  - remove any possible confusion between similar options.
-     e.g. --brief and --bitmap were mapped to 'b' and mdadm wouldn't
-     notice if one was used where the other was expected.
-
-  - allow K,M,G suffixes on chunk sizes
-
-Changes Prior to release 3.2
- - By far the most significant change in this release related to the
-   management of reshaping arrays.  This code has been substantially
-   re-written so that it can work with 'externally managed metadata' -
-   Intel's IMSM in particular.  We now support level migration and
-   OnLine Capacity Expansion on these arrays.
- - Policy framework.
-   Various policy statements can be made in the mdadm.conf to guide
-   the behaviour of mdadm, particular with regards to how new devices
-   are treated by "mdadm -I".
-   Depending on the 'action' associated with a device (identified by
-   its 'path') such need devices can be automatically re-added to and
-   existing array that they previously fell out off, or automatically
-   added as a spare if they appear to contain no data.
-
- - mdadm now has a limited understanding of partition tables.  This
-   allows the policy framework to make decisions about partitioned
-   devices as well.
-
- - --incremental --remove can be told what --path the device was on,
-   and this info will be recorded so that another device appearing at
-   the same physical location can be preferentially added to the same
-   array (provides the spare-same-slot action policy applied to the
-   path).
-
- - A new flags "--invalid-backup" flag is available in --assemble
-   mode.  This can be used to re-assemble an array which was stopping
-   in the middle of a reshape, and for which the 'backup file' is no
-   longer available or is corrupted.  The array may have some
-   corruption in it at the point where reshape was up to, but at least
-   the rest of the array will become available.
-
-
- - Various internal restructuring - more is needed.
-
-Changes Prior to release 3.1.5
-  - Fixes for v1.x metadata on big-endian machines.
-  - man page improvements
-  - Improve '--detail --export' when run on partitions of an md array.
-  - Fix regression with removing 'failed' or 'detached' devices.
-  - Fixes for "--assemble --force" in various unusual cases.
-  - Allow '-Y' to mean --export.  This was documented but not implemented.
-  - Various fixed for handling 'ddf' metadata.  This is now more reliable
-    but could benefit from more interoperability testing.
-  - Correctly list subarrays of a container in "--detail" output.
-  - Improve checks on whether the requested number of devices is supported
-    by the metadata - both for --create and --grow.
-  - Don't remove partitions from a device that is being included in an
-    array until we are fully committed to including it.
-  - Allow "--assemble --update=no-bitmap" so an array with a corrupt
-    bitmap can still be assembled.
-  - Don't allow --add to succeed if it looks like a "--re-add" is probably
-    wanted, but cannot succeed.  This avoids inadvertently turning
-    devices into spares when an array is failed.
-
-Changes Prior to release 3.1.4
-  Two fixes related to configs that aren't using udev:
-   -    Don't remove md devices which 'standard' names on --stop
-   -    Allow dev_open to work on read-only /dev
-  And fixed regressions:
-   -    Allow --incremental to add spares to an array
-   -    Accept --no-degraded as a deprecated option rather than
-            throwing an error
-   -    Return correct success status when --incrmental assembling
-        a container which does not yet have enough devices.
-   -    Don't link mdadm with pthreads, only mdmon needs it.
-   -    Fix compiler warning due to bad use of snprintf
-
-Changes Prior to release 3.1.3
-   -    mapfile now lives in a fixed location which default to
-        /dev/.mdadm/map but can be changed at compile time.  This
-       location is choses and most distros provide it during early
-       boot and preserve it through.  As long a /dev exists and is
-       writable, /dev/.mdadm will be created.
-       Other files file communication with mdmon live here too.
-       This fixes a bug reported by Debian and Gentoo users where
-       udev would spin in early-boot.
-   -    IMSM and DDF metadata will not be recognised on partitions
-        as they should only be used on whole-disks.
-   -    Various overflows causes by 2G drives have been addressed.
-   -    A subarray of an IMSM contain can now be killed with
-        --kill-subarray.  Also subarrays can be renamed with
-       --update-subarray
-   -    -If (or --incremental --fail) can be used  from udev to
-        fail and remove from all arrays a device which has been
-       unplugged from the system.  i.e. hot-unplug-support.
-   -    "mdadm /dev/mdX --re-add missing" will look for any device
-        that looks like it should be a member of /dev/mdX but isn't
-       and will automatically --re-add it
-   -    Now compile with -Wextra to get extra warnings.
-   -    Lots of minor bug fixes, documentation improvements, etcc
-
-Changes Prior to release 3.1.2
-   -    The default metadata has change again (sorry about that).
-        It is now v1.2 and will hopefully stay that way.  It turned
-        out there with boot-block issues with v1.1 which make it
-        unsuitable for a default, though in many cases it is still
-        suitable to use.
-   -    Stopping a container is not permitted when members are still
-        active
-   -    Add 'homehost' to the valid words for the "AUTO" config file
-        line.  When followed by "-all", this causes mdadm to
-        auto-assemble any array belonging to this host, but not
-        auto-assemble anything else.
-   -    Fix some bugs with "--grow --chunksize=" for changing chunksize.
-   -    VAR_RUN can be easily changed at compile time just like ALT_RUN.
-        This gives distros more flexability in how to manage the
-        pid and sock files that mdmon needs.
-   -    Various mdmon fixes
-   -    Alway make bitmap 4K-aligned if at all possible.
-   -    If mdadm.conf lists arrays which have inter-dependencies,
-        the previously had to be listed in the "right" order.  Now
-        any order should work.
-   -    Fix --force assembly of v1.x arrays which are in the process
-        of recovering.
-   -    Add section on 'scrubbing' to 'md' man page.
-   -    Various command-line-option parsing improvements.
-   -    ... and lots of other bug fixes.
-
-Changes Prior to release 3.1.1
-   -    Multiple fixes for new --grow levels including fixes for
-       serious data corruption problems.
-   -    Change default metadata to v1.1
-   -    Change default chunk size to 512K
-   -    Change default bitmap chunk size to 64Meg
-   -    When --re-add is used, don't fall back to
-       --add if --re-add fails as this can destroy data.
-
-Changes Prior to release 3.1
-   -    Support --grow to change the layout of RAID4/5/6
-   -    Support --grow to change the chunksize of raid 4/5/6
-   -    Support --grow to change level from RAID1 -> RAID5 -> RAID6 and
-        back.
-   -    Support --grow to reduce the number of devices in RAID4/5/6.
-   -    Support restart of these grow options which assembling an array
-       which is partially grown.
-   -    Assorted tests of this code, and of different RAID6 layouts.
-
-Changes Prior to release 3.0.3
-   -    Improvements for creating arrays giving just a name, like 'foo',
-       rather than the full '/dev/md/foo'.
-   -    Improvements for assembling member arrays of containers.
-   -    Improvements to test suite
-   -    Add option to change increment for RebuildNN messages reported
-       by "mdadm --monitor"
-   -    Improvements to mdmon 'hand-over' from initrd to final root.
-   -    Handle merging of devices that have left an IMSM array and are
-       being re-incorporated.
-   -    Add missing space in "--detail --brief" output.
-
-Changes Prior to release 3.0.2
-   -    Fix crash when hosthost is not set, as often happens in
-       early boot.
-
-Changes Prior to release 3.0.1
-   -    Fix various segfaults
-   -    Fixed for --examine with containers
-   -    Lots of other little fixes.
-
-Changes Prior to release 3.0
-   -    Support for externally managed metadata, specifically DDF and IMSM.
-   -    Depend on udev to create entries in /dev, rather than creating them
-       ourselves.
-   -    remove --auto-update-home-hosts
-   -    new config file line "auto"
-   -    new "<ignore>" and "any" options for "homehost"
-   -    numerous bug fixes and minor enhancements.
index 6f84e5b0de9944dc05ce6c445a454d376d916615..d033eb68f30c81024d6624eda74eca189ecede1e 100644 (file)
--- a/Create.c
+++ b/Create.c
  */
 
 #include       "mdadm.h"
+#include       "udev.h"
 #include       "md_u.h"
 #include       "md_p.h"
 #include       <ctype.h>
+#include       <fcntl.h>
+#include       <signal.h>
+#include       <sys/signalfd.h>
+#include       <sys/wait.h>
+
+#ifndef FALLOC_FL_ZERO_RANGE
+#define FALLOC_FL_ZERO_RANGE 16
+#endif
 
 static int round_size_and_verify(unsigned long long *size, int chunk)
 {
@@ -39,48 +48,440 @@ static int round_size_and_verify(unsigned long long *size, int chunk)
        return 0;
 }
 
-static int default_layout(struct supertype *st, int level, int verbose)
+/**
+ * default_layout() - Get default layout for level.
+ * @st: metadata requested, could be NULL.
+ * @level: raid level requested.
+ * @verbose: verbose level.
+ *
+ * Try to ask metadata handler first, otherwise use global defaults.
+ *
+ * Return: Layout or &UnSet, return value meaning depends of level used.
+ */
+int default_layout(struct supertype *st, int level, int verbose)
 {
        int layout = UnSet;
+       mapping_t *layout_map = NULL;
+       char *layout_name = NULL;
 
        if (st && st->ss->default_geometry)
                st->ss->default_geometry(st, &level, &layout, NULL);
 
-       if (layout == UnSet)
-               switch(level) {
-               default: /* no layout */
-                       layout = 0;
-                       break;
-               case 0:
-                       layout = RAID0_ORIG_LAYOUT;
-                       break;
-               case 10:
-                       layout = 0x102; /* near=2, far=1 */
-                       if (verbose > 0)
-                               pr_err("layout defaults to n2\n");
-                       break;
-               case 5:
-               case 6:
-                       layout = map_name(r5layout, "default");
-                       if (verbose > 0)
-                               pr_err("layout defaults to %s\n", map_num(r5layout, layout));
-                       break;
-               case LEVEL_FAULTY:
-                       layout = map_name(faultylayout, "default");
+       if (layout != UnSet)
+               return layout;
+
+       switch (level) {
+       default: /* no layout */
+               layout = 0;
+               break;
+       case 0:
+               layout = RAID0_ORIG_LAYOUT;
+               break;
+       case 10:
+               layout = 0x102; /* near=2, far=1 */
+               layout_name = "n2";
+               break;
+       case 5:
+       case 6:
+               layout_map = r5layout;
+               break;
+       case LEVEL_FAULTY:
+               layout_map = faultylayout;
+               break;
+       }
+
+       if (layout_map) {
+               layout = map_name(layout_map, "default");
+               layout_name = map_num_s(layout_map, layout);
+       }
+       if (layout_name && verbose > 0)
+               pr_err("layout defaults to %s\n", layout_name);
+
+       return layout;
+}
+
+static pid_t write_zeroes_fork(int fd, struct shape *s, struct supertype *st,
+                              struct mddev_dev *dv)
+
+{
+       const unsigned long long req_size = 1 << 30;
+       unsigned long long offset_bytes, size_bytes, sz;
+       sigset_t sigset;
+       int ret = 0;
+       pid_t pid;
+
+       size_bytes = KIB_TO_BYTES(s->size);
+
+       /*
+        * If size_bytes is zero, this is a zoned raid array where
+        * each disk is of a different size and uses its full
+        * disk. Thus zero the entire disk.
+        */
+       if (!size_bytes && !get_dev_size(fd, dv->devname, &size_bytes))
+               return -1;
+
+       if (dv->data_offset != INVALID_SECTORS)
+               offset_bytes = SEC_TO_BYTES(dv->data_offset);
+       else
+               offset_bytes = SEC_TO_BYTES(st->data_offset);
+
+       pr_info("zeroing data from %lld to %lld on: %s\n",
+               offset_bytes, size_bytes, dv->devname);
+
+       pid = fork();
+       if (pid < 0) {
+               pr_err("Could not fork to zero disks: %s\n", strerror(errno));
+               return pid;
+       } else if (pid != 0) {
+               return pid;
+       }
+
+       sigemptyset(&sigset);
+       sigaddset(&sigset, SIGINT);
+       sigprocmask(SIG_UNBLOCK, &sigset, NULL);
 
-                       if (verbose > 0)
-                               pr_err("layout defaults to %s\n", map_num(faultylayout, layout));
+       while (size_bytes) {
+               /*
+                * Split requests to the kernel into 1GB chunks seeing the
+                * fallocate() call is not interruptible and blocking a
+                * ctrl-c for several minutes is not desirable.
+                *
+                * 1GB is chosen as a compromise: the user may still have
+                * to wait several seconds if they ctrl-c on devices that
+                * zero slowly, but will reduce the number of requests
+                * required and thus the overhead on devices that perform
+                * better.
+                */
+               sz = size_bytes;
+               if (sz >= req_size)
+                       sz = req_size;
+
+               if (fallocate(fd, FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE,
+                             offset_bytes, sz)) {
+                       pr_err("zeroing %s failed: %s\n", dv->devname,
+                              strerror(errno));
+                       ret = 1;
                        break;
                }
 
-       return layout;
+               offset_bytes += sz;
+               size_bytes -= sz;
+       }
+
+       exit(ret);
+}
+
+static int wait_for_zero_forks(int *zero_pids, int count)
+{
+       int wstatus, ret = 0, i, sfd, wait_count = 0;
+       struct signalfd_siginfo fdsi;
+       bool interrupted = false;
+       sigset_t sigset;
+       ssize_t s;
+
+       for (i = 0; i < count; i++)
+               if (zero_pids[i])
+                       wait_count++;
+       if (!wait_count)
+               return 0;
+
+       sigemptyset(&sigset);
+       sigaddset(&sigset, SIGINT);
+       sigaddset(&sigset, SIGCHLD);
+       sigprocmask(SIG_BLOCK, &sigset, NULL);
+
+       sfd = signalfd(-1, &sigset, 0);
+       if (sfd < 0) {
+               pr_err("Unable to create signalfd: %s\n", strerror(errno));
+               return 1;
+       }
+
+       while (1) {
+               s = read(sfd, &fdsi, sizeof(fdsi));
+               if (s != sizeof(fdsi)) {
+                       pr_err("Invalid signalfd read: %s\n", strerror(errno));
+                       close(sfd);
+                       return 1;
+               }
+
+               if (fdsi.ssi_signo == SIGINT) {
+                       printf("\n");
+                       pr_info("Interrupting zeroing processes, please wait...\n");
+                       interrupted = true;
+               } else if (fdsi.ssi_signo == SIGCHLD) {
+                       if (!--wait_count)
+                               break;
+               }
+       }
+
+       close(sfd);
+
+       for (i = 0; i < count; i++) {
+               if (!zero_pids[i])
+                       continue;
+
+               waitpid(zero_pids[i], &wstatus, 0);
+               zero_pids[i] = 0;
+               if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus))
+                       ret = 1;
+       }
+
+       if (interrupted) {
+               pr_err("zeroing interrupted!\n");
+               return 1;
+       }
+
+       if (ret)
+               pr_err("zeroing failed!\n");
+       else
+               pr_info("zeroing finished\n");
+
+       return ret;
+}
+
+static int add_disk_to_super(int mdfd, struct shape *s, struct context *c,
+               struct supertype *st, struct mddev_dev *dv,
+               struct mdinfo *info, int have_container, int major_num,
+               int *zero_pid)
+{
+       dev_t rdev;
+       int fd;
+
+       if (dv->disposition == 'j') {
+               info->disk.raid_disk = MD_DISK_ROLE_JOURNAL;
+               info->disk.state = (1<<MD_DISK_JOURNAL);
+       } else if (info->disk.raid_disk < s->raiddisks) {
+               info->disk.state = (1<<MD_DISK_ACTIVE) |
+                       (1<<MD_DISK_SYNC);
+       } else {
+               info->disk.state = 0;
+       }
+
+       if (dv->writemostly == FlagSet) {
+               if (major_num == BITMAP_MAJOR_CLUSTERED) {
+                       pr_err("Can not set %s --write-mostly with a clustered bitmap\n",dv->devname);
+                       return 1;
+               } else {
+                       info->disk.state |= (1<<MD_DISK_WRITEMOSTLY);
+               }
+
+       }
+
+       if (dv->failfast == FlagSet)
+               info->disk.state |= (1<<MD_DISK_FAILFAST);
+
+       if (have_container) {
+               fd = -1;
+       } else {
+               if (st->ss->external && st->container_devnm[0])
+                       fd = open(dv->devname, O_RDWR);
+               else
+                       fd = open(dv->devname, O_RDWR|O_EXCL);
+
+               if (fd < 0) {
+                       pr_err("failed to open %s after earlier success - aborting\n",
+                              dv->devname);
+                       return 1;
+               }
+               if (!fstat_is_blkdev(fd, dv->devname, &rdev)) {
+                       close(fd);
+                       return 1;
+               }
+               info->disk.major = major(rdev);
+               info->disk.minor = minor(rdev);
+       }
+       if (fd >= 0)
+               remove_partitions(fd);
+       if (st->ss->add_to_super(st, &info->disk, fd, dv->devname,
+                                dv->data_offset)) {
+               ioctl(mdfd, STOP_ARRAY, NULL);
+               close(fd);
+               return 1;
+       }
+       st->ss->getinfo_super(st, info, NULL);
+
+       if (fd >= 0 && s->write_zeroes) {
+               *zero_pid = write_zeroes_fork(fd, s, st, dv);
+               if (*zero_pid <= 0) {
+                       ioctl(mdfd, STOP_ARRAY, NULL);
+                       close(fd);
+                       return 1;
+               }
+       }
+
+       if (have_container && c->verbose > 0)
+               pr_err("Using %s for device %d\n",
+                      map_dev(info->disk.major, info->disk.minor, 0),
+                      info->disk.number);
+
+       if (!have_container) {
+               /* getinfo_super might have lost these ... */
+               info->disk.major = major(rdev);
+               info->disk.minor = minor(rdev);
+       }
+
+       return 0;
+}
+
+static int update_metadata(int mdfd, struct shape *s, struct supertype *st,
+                          struct map_ent **map, struct mdinfo *info,
+                          char *chosen_name)
+{
+       struct mdinfo info_new;
+       struct map_ent *me = NULL;
+
+       /* check to see if the uuid has changed due to these
+        * metadata changes, and if so update the member array
+        * and container uuid.  Note ->write_init_super clears
+        * the subarray cursor such that ->getinfo_super once
+        * again returns container info.
+        */
+       st->ss->getinfo_super(st, &info_new, NULL);
+       if (st->ss->external && !is_container(s->level) &&
+           !same_uuid(info_new.uuid, info->uuid, 0)) {
+               map_update(map, fd2devnm(mdfd),
+                          info_new.text_version,
+                          info_new.uuid, chosen_name);
+               me = map_by_devnm(map, st->container_devnm);
+       }
+
+       if (st->ss->write_init_super(st)) {
+               st->ss->free_super(st);
+               return 1;
+       }
+
+       /*
+        * Before activating the array, perform extra steps
+        * required to configure the internal write-intent
+        * bitmap.
+        */
+       if (info_new.consistency_policy == CONSISTENCY_POLICY_BITMAP &&
+           st->ss->set_bitmap && st->ss->set_bitmap(st, info)) {
+               st->ss->free_super(st);
+               return 1;
+       }
+
+       /* update parent container uuid */
+       if (me) {
+               char *path = xstrdup(me->path);
+
+               st->ss->getinfo_super(st, &info_new, NULL);
+               map_update(map, st->container_devnm, info_new.text_version,
+                          info_new.uuid, path);
+               free(path);
+       }
+
+       flush_metadata_updates(st);
+       st->ss->free_super(st);
+
+       return 0;
+}
+
+static int add_disks(int mdfd, struct mdinfo *info, struct shape *s,
+                    struct context *c, struct supertype *st,
+                    struct map_ent **map, struct mddev_dev *devlist,
+                    int total_slots, int have_container, int insert_point,
+                    int major_num, char *chosen_name)
+{
+       struct mddev_dev *moved_disk = NULL;
+       int pass, raid_disk_num, dnum;
+       int zero_pids[total_slots];
+       struct mddev_dev *dv;
+       struct mdinfo *infos;
+       sigset_t sigset, orig_sigset;
+       int ret = 0;
+
+       /*
+        * Block SIGINT so the main thread will always wait for the
+        * zeroing processes when being interrupted. Otherwise the
+        * zeroing processes will finish their work in the background
+        * keeping the disk busy.
+        */
+       sigemptyset(&sigset);
+       sigaddset(&sigset, SIGINT);
+       sigprocmask(SIG_BLOCK, &sigset, &orig_sigset);
+       memset(zero_pids, 0, sizeof(zero_pids));
+
+       infos = xmalloc(sizeof(*infos) * total_slots);
+       enable_fds(total_slots);
+       for (pass = 1; pass <= 2; pass++) {
+               for (dnum = 0, raid_disk_num = 0, dv = devlist; dv;
+                    dv = (dv->next) ? (dv->next) : moved_disk, dnum++) {
+                       if (dnum >= total_slots)
+                               abort();
+                       if (dnum == insert_point) {
+                               raid_disk_num += 1;
+                               moved_disk = dv;
+                               continue;
+                       }
+                       if (strcasecmp(dv->devname, "missing") == 0) {
+                               raid_disk_num += 1;
+                               continue;
+                       }
+                       if (have_container)
+                               moved_disk = NULL;
+                       if (have_container && dnum < total_slots - 1)
+                               /* repeatedly use the container */
+                               moved_disk = dv;
+
+                       switch(pass) {
+                       case 1:
+                               infos[dnum] = *info;
+                               infos[dnum].disk.number = dnum;
+                               infos[dnum].disk.raid_disk = raid_disk_num++;
+
+                               if (dv->disposition == 'j')
+                                       raid_disk_num--;
+
+                               ret = add_disk_to_super(mdfd, s, c, st, dv,
+                                               &infos[dnum], have_container,
+                                               major_num, &zero_pids[dnum]);
+                               if (ret)
+                                       goto out;
+
+                               break;
+                       case 2:
+                               infos[dnum].errors = 0;
+
+                               ret = add_disk(mdfd, st, info, &infos[dnum]);
+                               if (ret) {
+                                       pr_err("ADD_NEW_DISK for %s failed: %s\n",
+                                              dv->devname, strerror(errno));
+                                       if (errno == EINVAL &&
+                                           info->array.level == 0) {
+                                               pr_err("Possibly your kernel doesn't support RAID0 layouts.\n");
+                                               pr_err("Either upgrade, or use --layout=dangerous\n");
+                                       }
+                                       goto out;
+                               }
+                               break;
+                       }
+                       if (!have_container &&
+                           dv == moved_disk && dnum != insert_point) break;
+               }
+
+               if (pass == 1) {
+                       ret = wait_for_zero_forks(zero_pids, total_slots);
+                       if (ret)
+                               goto out;
+
+                       ret = update_metadata(mdfd, s, st, map, info,
+                                             chosen_name);
+                       if (ret)
+                               goto out;
+               }
+       }
+
+out:
+       if (ret)
+               wait_for_zero_forks(zero_pids, total_slots);
+       free(infos);
+       sigprocmask(SIG_SETMASK, &orig_sigset, NULL);
+       return ret;
 }
 
-int Create(struct supertype *st, char *mddev,
-          char *name, int *uuid,
-          int subdevs, struct mddev_dev *devlist,
-          struct shape *s,
-          struct context *c, unsigned long long data_offset)
+int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
+          struct mddev_dev *devlist, struct shape *s, struct context *c)
 {
        /*
         * Create a new raid array.
@@ -100,9 +501,12 @@ int Create(struct supertype *st, char *mddev,
         */
        int mdfd;
        unsigned long long minsize = 0, maxsize = 0;
+       dev_policy_t *custom_pols = NULL;
        char *mindisc = NULL;
        char *maxdisc = NULL;
-       int dnum, raid_disk_num;
+       char *name = ident->name;
+       int *uuid = ident->uuid_set == 1 ? ident->uuid : NULL;
+       int dnum;
        struct mddev_dev *dv;
        dev_t rdev;
        int fail = 0, warn = 0;
@@ -111,18 +515,16 @@ int Create(struct supertype *st, char *mddev,
        int missing_disks = 0;
        int insert_point = subdevs * 2; /* where to insert a missing drive */
        int total_slots;
-       int pass;
        int rv;
        int bitmap_fd;
        int have_container = 0;
        int container_fd = -1;
        int need_mdmon = 0;
        unsigned long long bitmapsize;
-       struct mdinfo info, *infos;
+       struct mdinfo info;
        int did_default = 0;
        int do_default_layout = 0;
        int do_default_chunk = 0;
-       unsigned long safe_mode_delay = 0;
        char chosen_name[1024];
        struct map_ent *map = NULL;
        unsigned long long newsize;
@@ -153,7 +555,7 @@ int Create(struct supertype *st, char *mddev,
                return 1;
        }
        if (s->raiddisks < 2 && s->level >= 4) {
-               pr_err("at least 2 raid-devices needed for level 4 or 5\n");
+               pr_err("at least 2 raid-devices needed for level %d\n", s->level);
                return 1;
        }
        if (s->level <= 0 && s->sparedisks) {
@@ -191,6 +593,9 @@ int Create(struct supertype *st, char *mddev,
                                first_missing = subdevs * 2;
                                second_missing = subdevs * 2;
                                insert_point = subdevs * 2;
+
+                               if (mddev_test_and_add_drive_policies(st, &custom_pols, fd, 1))
+                                       exit(1);
                        }
                }
                if (fd >= 0)
@@ -243,20 +648,14 @@ int Create(struct supertype *st, char *mddev,
                break;
        case LEVEL_LINEAR:
                /* a chunksize of zero 0s perfectly valid (and preferred) since 2.6.16 */
-               if (get_linux_version() < 2006016 && s->chunk == 0) {
-                       s->chunk = 64;
-                       if (c->verbose > 0)
-                               pr_err("chunk size defaults to 64K\n");
-               }
                break;
        case 1:
        case LEVEL_FAULTY:
        case LEVEL_MULTIPATH:
        case LEVEL_CONTAINER:
                if (s->chunk) {
-                       s->chunk = 0;
-                       if (c->verbose > 0)
-                               pr_err("chunk size ignored for this level\n");
+                       pr_err("specifying chunk size is forbidden for this level\n");
+                       return 1;
                }
                break;
        default:
@@ -274,7 +673,7 @@ int Create(struct supertype *st, char *mddev,
        newsize = s->size * 2;
        if (st && ! st->ss->validate_geometry(st, s->level, s->layout, s->raiddisks,
                                              &s->chunk, s->size*2,
-                                             data_offset, NULL,
+                                             s->data_offset, NULL,
                                              &newsize, s->consistency_policy,
                                              c->verbose >= 0))
                return 1;
@@ -309,10 +708,10 @@ int Create(struct supertype *st, char *mddev,
        info.array.working_disks = 0;
        dnum = 0;
        for (dv = devlist; dv; dv = dv->next)
-               if (data_offset == VARIABLE_OFFSET)
+               if (s->data_offset == VARIABLE_OFFSET)
                        dv->data_offset = INVALID_SECTORS;
                else
-                       dv->data_offset = data_offset;
+                       dv->data_offset = s->data_offset;
 
        for (dv=devlist; dv && !have_container; dv=dv->next, dnum++) {
                char *dname = dv->devname;
@@ -328,7 +727,7 @@ int Create(struct supertype *st, char *mddev,
                        missing_disks ++;
                        continue;
                }
-               if (data_offset == VARIABLE_OFFSET) {
+               if (s->data_offset == VARIABLE_OFFSET) {
                        doff = strchr(dname, ':');
                        if (doff) {
                                *doff++ = 0;
@@ -336,7 +735,7 @@ int Create(struct supertype *st, char *mddev,
                        } else
                                dv->data_offset = INVALID_SECTORS;
                } else
-                       dv->data_offset = data_offset;
+                       dv->data_offset = s->data_offset;
 
                dfd = open(dname, O_RDONLY);
                if (dfd < 0) {
@@ -348,7 +747,7 @@ int Create(struct supertype *st, char *mddev,
                        close(dfd);
                        exit(2);
                }
-               close(dfd);
+
                info.array.working_disks++;
                if (dnum < s->raiddisks && dv->disposition != 'j')
                        info.array.active_disks++;
@@ -421,6 +820,11 @@ int Create(struct supertype *st, char *mddev,
                        }
                }
 
+               if (drive_test_and_add_policies(st, &custom_pols, dfd, 1))
+                       exit(1);
+
+               close(dfd);
+
                if (dv->disposition == 'j')
                        goto skip_size_check;  /* skip write journal for size check */
 
@@ -473,7 +877,7 @@ int Create(struct supertype *st, char *mddev,
                            st->minor_version >= 1)
                                /* metadata at front */
                                warn |= check_partitions(fd, dname, 0, 0);
-                       else if (s->level == 1 || s->level == LEVEL_CONTAINER ||
+                       else if (s->level == 1 || is_container(s->level) ||
                                 (s->level == 0 && s->raiddisks == 1))
                                /* partitions could be meaningful */
                                warn |= check_partitions(fd, dname, freesize*2, s->size*2);
@@ -495,6 +899,7 @@ int Create(struct supertype *st, char *mddev,
                        close(fd);
                }
        }
+
        if (missing_disks == dnum && !have_container) {
                pr_err("Subdevs can't be all missing\n");
                return 1;
@@ -521,7 +926,7 @@ int Create(struct supertype *st, char *mddev,
                        if (!st->ss->validate_geometry(st, s->level, s->layout,
                                                       s->raiddisks,
                                                       &s->chunk, minsize*2,
-                                                      data_offset,
+                                                      s->data_offset,
                                                       NULL, NULL,
                                                       s->consistency_policy, 0)) {
                                pr_err("devices too large for RAID level %d\n", s->level);
@@ -540,8 +945,10 @@ int Create(struct supertype *st, char *mddev,
        }
 
        if (!s->bitmap_file &&
+           !st->ss->external &&
            s->level >= 1 &&
            st->ss->add_internal_bitmap &&
+           s->journaldisks == 0 &&
            (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
             s->consistency_policy != CONSISTENCY_POLICY_PPL) &&
            (s->write_behind || s->size > 100*1024*1024ULL)) {
@@ -549,7 +956,7 @@ int Create(struct supertype *st, char *mddev,
                        pr_err("automatically enabling write-intent bitmap on large array\n");
                s->bitmap_file = "internal";
        }
-       if (s->bitmap_file && strcmp(s->bitmap_file, "none") == 0)
+       if (s->bitmap_file && str_is_none(s->bitmap_file) == true)
                s->bitmap_file = NULL;
 
        if (s->consistency_policy == CONSISTENCY_POLICY_PPL &&
@@ -558,6 +965,13 @@ int Create(struct supertype *st, char *mddev,
                return 1;
        }
 
+       if (st->ss == &super_imsm && s->level == 10 && s->raiddisks > 4) {
+               /* Print no matter runstop was specifed */
+               pr_err("Warning! VROC UEFI driver does not support RAID10 in requested layout.\n");
+               pr_err("Array won't be suitable as boot device.\n");
+               warn = 1;
+       }
+
        if (!have_container && s->level > 0 && ((maxsize-s->size)*100 > maxsize)) {
                if (c->runstop != 1 || c->verbose >= 0)
                        pr_err("largest drive (%s) exceeds size (%lluK) by more than 1%%\n",
@@ -577,7 +991,7 @@ int Create(struct supertype *st, char *mddev,
 
        if (warn) {
                if (c->runstop!= 1) {
-                       if (!ask("Continue creating array")) {
+                       if (!ask("Continue creating array")) {
                                pr_err("create aborted.\n");
                                return 1;
                        }
@@ -626,7 +1040,7 @@ int Create(struct supertype *st, char *mddev,
 
        /* We need to create the device */
        map_lock(&map);
-       mdfd = create_mddev(mddev, name, c->autof, LOCAL, chosen_name, 1);
+       mdfd = create_mddev(ident->devname, ident->name, c->autof, LOCAL, chosen_name, 1);
        if (mdfd < 0) {
                map_unlock(&map);
                return 1;
@@ -635,16 +1049,14 @@ int Create(struct supertype *st, char *mddev,
         * it could be in conflict with already existing device
         * e.g. container, array
         */
-       if (strncmp(chosen_name, "/dev/md/", 8) == 0 &&
-           map_by_name(&map, chosen_name+8) != NULL) {
-               pr_err("Array name %s is in use already.\n",
-                       chosen_name);
+       if (strncmp(chosen_name, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0 &&
+           map_by_name(&map, chosen_name + DEV_MD_DIR_LEN)) {
+               pr_err("Array name %s is in use already.\n", chosen_name);
                close(mdfd);
                map_unlock(&map);
                udev_unblock();
                return 1;
        }
-       mddev = chosen_name;
 
        memset(&inf, 0, sizeof(inf));
        md_get_array_info(mdfd, &inf);
@@ -662,7 +1074,7 @@ int Create(struct supertype *st, char *mddev,
         * with, but it chooses to trust me instead. Sigh
         */
        info.array.md_minor = 0;
-       if (fstat_is_blkdev(mdfd, mddev, &rdev))
+       if (fstat_is_blkdev(mdfd, chosen_name, &rdev))
                info.array.md_minor = minor(rdev);
        info.array.not_persistent = 0;
 
@@ -714,8 +1126,8 @@ int Create(struct supertype *st, char *mddev,
        info.array.layout = s->layout;
        info.array.chunk_size = s->chunk*1024;
 
-       if (name == NULL || *name == 0) {
-               /* base name on mddev */
+       if (*name == 0) {
+               /* base name on devname */
                /*  /dev/md0 -> 0
                 *  /dev/md_d0 -> d0
                 *  /dev/md_foo -> foo
@@ -725,20 +1137,21 @@ int Create(struct supertype *st, char *mddev,
                 *  /dev/mdhome -> home
                 */
                /* FIXME compare this with rules in create_mddev */
-               name = strrchr(mddev, '/');
+               name = strrchr(chosen_name, '/');
+
                if (name) {
                        name++;
                        if (strncmp(name, "md_", 3) == 0 &&
-                           strlen(name) > 3 && (name-mddev) == 5 /* /dev/ */)
+                           strlen(name) > 3 && (name - chosen_name) == 5 /* /dev/ */)
                                name += 3;
                        else if (strncmp(name, "md", 2) == 0 &&
                                 strlen(name) > 2 && isdigit(name[2]) &&
-                                (name-mddev) == 5 /* /dev/ */)
+                                (name - chosen_name) == 5 /* /dev/ */)
                                name += 2;
                }
        }
        if (!st->ss->init_super(st, &info.array, s, name, c->homehost, uuid,
-                               data_offset))
+                               s->data_offset))
                goto abort_locked;
 
        total_slots = info.array.nr_disks;
@@ -748,25 +1161,30 @@ int Create(struct supertype *st, char *mddev,
                goto abort_locked;
        }
 
-       if (did_default && c->verbose >= 0) {
+       if (did_default) {
                if (is_subarray(info.text_version)) {
-                       char devnm[32];
-                       char *ep;
+                       char devnm[MD_NAME_MAX];
                        struct mdinfo *mdi;
 
-                       strncpy(devnm, info.text_version+1, 32);
-                       devnm[31] = 0;
-                       ep = strchr(devnm, '/');
-                       if (ep)
-                               *ep = 0;
+                       sysfs_get_container_devnm(&info, devnm);
 
-                       mdi = sysfs_read(-1, devnm, GET_VERSION);
+                       mdi = sysfs_read(-1, devnm, GET_VERSION | GET_DEVS);
+                       if (!mdi) {
+                               pr_err("Cannot open sysfs for container %s\n", devnm);
+                               goto abort_locked;
+                       }
+
+                       if (sysfs_test_and_add_drive_policies(st, &custom_pols, mdi, 1))
+                               goto abort_locked;
+
+                       if (c->verbose >= 0)
+                               pr_info("Creating array inside %s container /dev/%s\n",
+                                       mdi->text_version, devnm);
 
-                       pr_err("Creating array inside %s container %s\n",
-                               mdi?mdi->text_version:"managed", devnm);
                        sysfs_free(mdi);
-               } else
-                       pr_err("Defaulting to version %s metadata\n", info.text_version);
+               } else if (c->verbose >= 0) {
+                       pr_info("Defaulting to version %s metadata\n", info.text_version);
+               }
        }
 
        map_update(&map, fd2devnm(mdfd), info.text_version,
@@ -826,8 +1244,7 @@ int Create(struct supertype *st, char *mddev,
        }
        rv = set_array_info(mdfd, st, &info);
        if (rv) {
-               pr_err("failed to set array info for %s: %s\n",
-                       mddev, strerror(errno));
+               pr_err("failed to set array info for %s: %s\n", chosen_name, strerror(errno));
                goto abort_locked;
        }
 
@@ -848,171 +1265,23 @@ int Create(struct supertype *st, char *mddev,
                        goto abort_locked;
                }
                if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
-                       pr_err("Cannot set bitmap file for %s: %s\n",
-                               mddev, strerror(errno));
+                       pr_err("Cannot set bitmap file for %s: %s\n", chosen_name, strerror(errno));
                        goto abort_locked;
                }
        }
 
-       infos = xmalloc(sizeof(*infos) * total_slots);
-       enable_fds(total_slots);
-       for (pass = 1; pass <= 2; pass++) {
-               struct mddev_dev *moved_disk = NULL; /* the disk that was moved out of the insert point */
-
-               for (dnum = 0, raid_disk_num = 0, dv = devlist; dv;
-                    dv = (dv->next) ? (dv->next) : moved_disk, dnum++) {
-                       int fd;
-                       struct mdinfo *inf = &infos[dnum];
-
-                       if (dnum >= total_slots)
-                               abort();
-                       if (dnum == insert_point) {
-                               raid_disk_num += 1;
-                               moved_disk = dv;
-                               continue;
-                       }
-                       if (strcasecmp(dv->devname, "missing") == 0) {
-                               raid_disk_num += 1;
-                               continue;
-                       }
-                       if (have_container)
-                               moved_disk = NULL;
-                       if (have_container && dnum < info.array.raid_disks - 1)
-                               /* repeatedly use the container */
-                               moved_disk = dv;
-
-                       switch(pass) {
-                       case 1:
-                               *inf = info;
-
-                               inf->disk.number = dnum;
-                               inf->disk.raid_disk = raid_disk_num++;
-
-                               if (dv->disposition == 'j') {
-                                       inf->disk.raid_disk = MD_DISK_ROLE_JOURNAL;
-                                       inf->disk.state = (1<<MD_DISK_JOURNAL);
-                                       raid_disk_num--;
-                               } else if (inf->disk.raid_disk < s->raiddisks)
-                                       inf->disk.state = (1<<MD_DISK_ACTIVE) |
-                                               (1<<MD_DISK_SYNC);
-                               else
-                                       inf->disk.state = 0;
-
-                               if (dv->writemostly == FlagSet)
-                                       inf->disk.state |= (1<<MD_DISK_WRITEMOSTLY);
-                               if (dv->failfast == FlagSet)
-                                       inf->disk.state |= (1<<MD_DISK_FAILFAST);
-
-                               if (have_container)
-                                       fd = -1;
-                               else {
-                                       if (st->ss->external &&
-                                           st->container_devnm[0])
-                                               fd = open(dv->devname, O_RDWR);
-                                       else
-                                               fd = open(dv->devname, O_RDWR|O_EXCL);
-
-                                       if (fd < 0) {
-                                               pr_err("failed to open %s after earlier success - aborting\n",
-                                                       dv->devname);
-                                               goto abort_locked;
-                                       }
-                                       if (!fstat_is_blkdev(fd, dv->devname, &rdev))
-                                               return 1;
-                                       inf->disk.major = major(rdev);
-                                       inf->disk.minor = minor(rdev);
-                               }
-                               if (fd >= 0)
-                                       remove_partitions(fd);
-                               if (st->ss->add_to_super(st, &inf->disk,
-                                                        fd, dv->devname,
-                                                        dv->data_offset)) {
-                                       ioctl(mdfd, STOP_ARRAY, NULL);
-                                       goto abort_locked;
-                               }
-                               st->ss->getinfo_super(st, inf, NULL);
-                               safe_mode_delay = inf->safe_mode_delay;
-
-                               if (have_container && c->verbose > 0)
-                                       pr_err("Using %s for device %d\n",
-                                               map_dev(inf->disk.major,
-                                                       inf->disk.minor,
-                                                       0), dnum);
-
-                               if (!have_container) {
-                                       /* getinfo_super might have lost these ... */
-                                       inf->disk.major = major(rdev);
-                                       inf->disk.minor = minor(rdev);
-                               }
-                               break;
-                       case 2:
-                               inf->errors = 0;
-
-                               rv = add_disk(mdfd, st, &info, inf);
-
-                               if (rv) {
-                                       pr_err("ADD_NEW_DISK for %s failed: %s\n",
-                                              dv->devname, strerror(errno));
-                                       if (errno == EINVAL &&
-                                           info.array.level == 0) {
-                                               pr_err("Possibly your kernel doesn't support RAID0 layouts.\n");
-                                               pr_err("Either upgrade, or use --layout=dangerous\n");
-                                       }
-                                       goto abort_locked;
-                               }
-                               break;
-                       }
-                       if (!have_container &&
-                           dv == moved_disk && dnum != insert_point) break;
-               }
-               if (pass == 1) {
-                       struct mdinfo info_new;
-                       struct map_ent *me = NULL;
-
-                       /* check to see if the uuid has changed due to these
-                        * metadata changes, and if so update the member array
-                        * and container uuid.  Note ->write_init_super clears
-                        * the subarray cursor such that ->getinfo_super once
-                        * again returns container info.
-                        */
-                       st->ss->getinfo_super(st, &info_new, NULL);
-                       if (st->ss->external && s->level != LEVEL_CONTAINER &&
-                           !same_uuid(info_new.uuid, info.uuid, 0)) {
-                               map_update(&map, fd2devnm(mdfd),
-                                          info_new.text_version,
-                                          info_new.uuid, chosen_name);
-                               me = map_by_devnm(&map, st->container_devnm);
-                       }
-
-                       if (st->ss->write_init_super(st)) {
-                               st->ss->free_super(st);
-                               goto abort_locked;
-                       }
-
-                       /* update parent container uuid */
-                       if (me) {
-                               char *path = xstrdup(me->path);
-
-                               st->ss->getinfo_super(st, &info_new, NULL);
-                               map_update(&map, st->container_devnm,
-                                          info_new.text_version,
-                                          info_new.uuid, path);
-                               free(path);
-                       }
+       if (add_disks(mdfd, &info, s, c, st, &map, devlist, total_slots,
+                     have_container, insert_point, major_num, chosen_name))
+               goto abort_locked;
 
-                       flush_metadata_updates(st);
-                       st->ss->free_super(st);
-               }
-       }
        map_unlock(&map);
-       free(infos);
 
-       if (s->level == LEVEL_CONTAINER) {
+       if (is_container(s->level)) {
                /* No need to start.  But we should signal udev to
                 * create links */
                sysfs_uevent(&info, "change");
                if (c->verbose >= 0)
-                       pr_err("container %s prepared.\n", mddev);
+                       pr_err("container %s prepared.\n", chosen_name);
                wait_for(chosen_name, mdfd);
        } else if (c->runstop == 1 || subdevs >= s->raiddisks) {
                if (st->ss->external) {
@@ -1032,7 +1301,7 @@ int Create(struct supertype *st, char *mddev,
                                                    "readonly");
                                break;
                        }
-                       sysfs_set_safemode(&info, safe_mode_delay);
+                       sysfs_set_safemode(&info, info.safe_mode_delay);
                        if (err) {
                                pr_err("failed to activate array.\n");
                                ioctl(mdfd, STOP_ARRAY, NULL);
@@ -1070,7 +1339,7 @@ int Create(struct supertype *st, char *mddev,
                        ioctl(mdfd, RESTART_ARRAY_RW, NULL);
                }
                if (c->verbose >= 0)
-                       pr_err("array %s started.\n", mddev);
+                       pr_info("array %s started.\n", chosen_name);
                if (st->ss->external && st->container_devnm[0]) {
                        if (need_mdmon)
                                start_mdmon(st->container_devnm);
@@ -1082,12 +1351,11 @@ int Create(struct supertype *st, char *mddev,
        } else {
                pr_err("not starting array - not enough devices.\n");
        }
-       close(mdfd);
-       /* Give udev a moment to process the Change event caused
-        * by the close.
-        */
-       usleep(100*1000);
        udev_unblock();
+       close(mdfd);
+       sysfs_uevent(&info, "change");
+       dev_policy_free(custom_pols);
+
        return 0;
 
  abort:
@@ -1099,5 +1367,7 @@ int Create(struct supertype *st, char *mddev,
 
        if (mdfd >= 0)
                close(mdfd);
+
+       dev_policy_free(custom_pols);
        return 1;
 }
index 832485fe0fb5a10e9815abcb03c02be39b1cb9f6..55a086d3378ff2172d307c0ad3ecf0bdb01f531a 100644 (file)
--- a/Detail.c
+++ b/Detail.c
@@ -49,6 +49,30 @@ static int add_device(const char *dev, char ***p_devices,
        return n_devices + 1;
 }
 
+/**
+ * detail_fname_from_uuid() - generate uuid string with special super1 handling.
+ * @mp: map entry to parse.
+ * @buf: buf to write.
+ *
+ * Hack to workaround an issue with super1 superblocks. It swapuuid set in order for assembly
+ * to work, but can't have it set if we want this printout to match all the other uuid printouts
+ * in super1.c, so we force swapuuid to 1 to make our printout match the rest of super1.
+ *
+ * Always convert uuid if host is big endian.
+ */
+char *detail_fname_from_uuid(struct map_ent *mp, char *buf)
+{
+#if __BYTE_ORDER == BIG_ENDIAN
+       bool swap = true;
+#else
+       bool swap = false;
+#endif
+       if (strncmp(mp->metadata, "1.", 2) == 0)
+               swap = true;
+
+       return __fname_from_uuid(mp->uuid, swap, buf, ':');
+}
+
 int Detail(char *dev, struct context *c)
 {
        /*
@@ -66,11 +90,11 @@ int Detail(char *dev, struct context *c)
        int spares = 0;
        struct stat stb;
        int failed = 0;
-       struct supertype *st;
+       struct supertype *st = NULL;
        char *subarray = NULL;
        int max_disks = MD_SB_DISKS; /* just a default */
        struct mdinfo *info = NULL;
-       struct mdinfo *sra;
+       struct mdinfo *sra = NULL;
        struct mdinfo *subdev;
        char *member = NULL;
        char *container = NULL;
@@ -93,8 +117,7 @@ int Detail(char *dev, struct context *c)
        if (!sra) {
                if (md_get_array_info(fd, &array)) {
                        pr_err("%s does not appear to be an md device\n", dev);
-                       close(fd);
-                       return rv;
+                       goto out;
                }
        }
        external = (sra != NULL && sra->array.major_version == -1 &&
@@ -108,16 +131,13 @@ int Detail(char *dev, struct context *c)
                            sra->devs == NULL) {
                                pr_err("Array associated with md device %s does not exist.\n",
                                       dev);
-                               close(fd);
-                               sysfs_free(sra);
-                               return rv;
+                               goto out;
                        }
                        array = sra->array;
                } else {
                        pr_err("cannot get array detail for %s: %s\n",
                               dev, strerror(errno));
-                       close(fd);
-                       return rv;
+                       goto out;
                }
        }
 
@@ -224,9 +244,15 @@ int Detail(char *dev, struct context *c)
        }
 
        /* Ok, we have some info to print... */
-       str = map_num(pers, array.level);
+       if (inactive && info)
+               str = map_num(pers, info->array.level);
+       else
+               str = map_num(pers, array.level);
 
        if (c->export) {
+               char nbuf[64];
+               struct map_ent *mp = NULL, *map = NULL;
+
                if (array.raid_disks) {
                        if (str)
                                printf("MD_LEVEL=%s\n", str);
@@ -248,38 +274,22 @@ int Detail(char *dev, struct context *c)
                                       array.minor_version);
                }
 
-               if (st && st->sb && info) {
-                       char nbuf[64];
-                       struct map_ent *mp, *map = NULL;
+               if (info)
+                       mp = map_by_uuid(&map, info->uuid);
+               if (!mp)
+                       mp = map_by_devnm(&map, fd2devnm(fd));
 
-                       fname_from_uuid(st, info, nbuf, ':');
+               if (mp) {
+                       detail_fname_from_uuid(mp, nbuf);
                        printf("MD_UUID=%s\n", nbuf + 5);
-                       mp = map_by_uuid(&map, info->uuid);
-                       if (mp && mp->path &&
-                           strncmp(mp->path, "/dev/md/", 8) == 0) {
-                               printf("MD_DEVNAME=");
-                               print_escape(mp->path + 8);
-                               putchar('\n');
-                       }
+                       if (mp->path && strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0)
+                               printf("MD_DEVNAME=%s\n", mp->path + DEV_MD_DIR_LEN);
+               }
 
+               map_free(map);
+               if (st && st->sb) {
                        if (st->ss->export_detail_super)
                                st->ss->export_detail_super(st);
-                       map_free(map);
-               } else {
-                       struct map_ent *mp, *map = NULL;
-                       char nbuf[64];
-                       mp = map_by_devnm(&map, fd2devnm(fd));
-                       if (mp) {
-                               __fname_from_uuid(mp->uuid, 0, nbuf, ':');
-                               printf("MD_UUID=%s\n", nbuf+5);
-                       }
-                       if (mp && mp->path &&
-                           strncmp(mp->path, "/dev/md/", 8) == 0) {
-                               printf("MD_DEVNAME=");
-                               print_escape(mp->path+8);
-                               putchar('\n');
-                       }
-                       map_free(map);
                }
                if (!c->no_devices && sra) {
                        struct mdinfo *mdi;
@@ -304,6 +314,7 @@ int Detail(char *dev, struct context *c)
                                if (path)
                                        printf("MD_DEVICE_%s_DEV=%s\n",
                                               sysdev, path);
+                               free(sysdev);
                        }
                }
                goto out;
@@ -352,9 +363,14 @@ int Detail(char *dev, struct context *c)
        avail = xcalloc(array.raid_disks, 1);
 
        for (d = 0; d < array.raid_disks; d++) {
-
-               if ((disks[d*2].state & (1<<MD_DISK_SYNC)) ||
-                   (disks[d*2+1].state & (1<<MD_DISK_SYNC))) {
+               char dv[PATH_MAX], dv_rep[PATH_MAX];
+               snprintf(dv, PATH_MAX, "/sys/dev/block/%d:%d",
+                       disks[d*2].major, disks[d*2].minor);
+               snprintf(dv_rep, PATH_MAX, "/sys/dev/block/%d:%d",
+                       disks[d*2+1].major, disks[d*2+1].minor);
+
+               if ((is_dev_alive(dv) && (disks[d*2].state & (1<<MD_DISK_SYNC))) ||
+                   (is_dev_alive(dv_rep) && (disks[d*2+1].state & (1<<MD_DISK_SYNC)))) {
                        avail_disks ++;
                        avail[d] = 1;
                } else
@@ -468,7 +484,9 @@ int Detail(char *dev, struct context *c)
                if (ioctl(fd, GET_BITMAP_FILE, &bmf) == 0 && bmf.pathname[0]) {
                        printf("     Intent Bitmap : %s\n", bmf.pathname);
                        printf("\n");
-               } else if (array.state & (1<<MD_SB_BITMAP_PRESENT))
+               } else if (array.state & (1<<MD_SB_CLUSTERED))
+                       printf("     Intent Bitmap : Internal(Clustered)\n\n");
+               else if (array.state & (1<<MD_SB_BITMAP_PRESENT))
                        printf("     Intent Bitmap : Internal\n\n");
                atime = array.utime;
                if (atime)
@@ -489,24 +507,45 @@ int Detail(char *dev, struct context *c)
                        if (array.state & (1 << MD_SB_CLEAN)) {
                                if ((array.level == 0) ||
                                    (array.level == LEVEL_LINEAR))
-                                       arrayst = map_num(sysfs_array_states,
-                                                         sra->array_state);
+                                       arrayst = map_num_s(sysfs_array_states,
+                                                              sra->array_state);
                                else
                                        arrayst = "clean";
-                       } else
+                       } else {
                                arrayst = "active";
+                               if (array.state & (1<<MD_SB_CLUSTERED)) {
+                                       for (d = 0; d < max_disks * 2; d++) {
+                                               char *dv;
+                                               mdu_disk_info_t disk = disks[d];
+
+                                               /* only check first valid disk in cluster env */
+                                               if ((disk.state & (MD_DISK_SYNC | MD_DISK_ACTIVE))
+                                                       && (disk.major | disk.minor)) {
+                                                       dv = map_dev_preferred(disk.major, disk.minor, 0,
+                                                                       c->prefer);
+                                                       if (!dv)
+                                                               continue;
+                                                       arrayst = IsBitmapDirty(dv) ? "active" : "clean";
+                                                       break;
+                                               }
+                                       }
+                               }
+                       }
 
-                       printf("             State : %s%s%s%s%s%s \n",
+                       printf("             State : %s%s%s%s%s%s%s \n",
                               arrayst, st,
                               (!e || (e->percent < 0 &&
                                       e->percent != RESYNC_PENDING &&
-                                      e->percent != RESYNC_DELAYED)) ?
+                                      e->percent != RESYNC_DELAYED &&
+                                      e->percent != RESYNC_REMOTE)) ?
                               "" : sync_action[e->resync],
                               larray_size ? "": ", Not Started",
                               (e && e->percent == RESYNC_DELAYED) ?
                               " (DELAYED)": "",
                               (e && e->percent == RESYNC_PENDING) ?
-                              " (PENDING)": "");
+                              " (PENDING)": "",
+                              (e && e->percent == RESYNC_REMOTE) ?
+                              " (REMOTE)": "");
                } else if (inactive && !is_container) {
                        printf("             State : inactive\n");
                }
@@ -517,7 +556,8 @@ int Detail(char *dev, struct context *c)
                               array.working_disks);
                if (array.raid_disks) {
                        printf("    Failed Devices : %d\n", array.failed_disks);
-                       printf("     Spare Devices : %d\n", array.spare_disks);
+                       if (!external)
+                               printf("     Spare Devices : %d\n", array.spare_disks);
                }
                printf("\n");
                if (array.level == 5) {
@@ -580,13 +620,6 @@ int Detail(char *dev, struct context *c)
                }
 
                if ((st && st->sb) && (info && info->reshape_active)) {
-#if 0
-This is pretty boring
-                       printf("     Reshape pos'n : %llu%s\n",
-                              (unsigned long long) info->reshape_progress << 9,
-                              human_size((unsigned long long)
-                                         info->reshape_progress << 9));
-#endif
                        if (info->delta_disks != 0)
                                printf("     Delta Devices : %d, (%d->%d)\n",
                                       info->delta_disks,
@@ -763,7 +796,8 @@ This is pretty boring
                                                       &max_devices, n_devices);
                        else
                                printf("   %s", dv);
-               }
+               } else if (disk.major | disk.minor)
+                       printf("   missing");
                if (!c->brief)
                        printf("\n");
        }
@@ -794,10 +828,12 @@ out:
        close(fd);
        free(subarray);
        free(avail);
-       for (d = 0; d < n_devices; d++)
-               free(devices[d]);
+       if (devices)
+               for (d = 0; d < n_devices; d++)
+                       free(devices[d]);
        free(devices);
        sysfs_free(sra);
+       free(st);
        return rv;
 }
 
diff --git a/Dump.c b/Dump.c
index 38e8f238a156f22b61b0e8dd8f89f62abed1d607..736bcb608496b355e33f77aa92cbf7f31c2ffe9d 100644 (file)
--- a/Dump.c
+++ b/Dump.c
@@ -272,6 +272,11 @@ int Restore_metadata(char *dev, char *dir, struct context *c,
                       fname);
                goto err;
        }
+       if (stat(fname, &stb) != 0) {
+               pr_err("Could not stat %s for --restore.\n",
+                      fname);
+               goto err;
+       }
        if (((unsigned long long)stb.st_size) != size) {
                pr_err("%s is not the same size as %s - cannot restore.\n",
                       fname, dev);
index 7013480d6dd80846cc2f464d3ba23c9e8e21284d..c9605a60dfe476927d3a4dac1ca2912f04c3fa22 100644 (file)
--- a/Examine.c
+++ b/Examine.c
@@ -117,7 +117,7 @@ int Examine(struct mddev_dev *devlist,
                }
 
                if (c->SparcAdjust)
-                       st->ss->update_super(st, NULL, "sparc2.2",
+                       st->ss->update_super(st, NULL, UOPT_SPARC22,
                                             devlist->devname, 0, 0, NULL);
                /* Ok, its good enough to try, though the checksum could be wrong */
 
@@ -130,7 +130,7 @@ int Examine(struct mddev_dev *devlist,
                        char *d;
                        for (ap = arrays; ap; ap = ap->next) {
                                if (st->ss == ap->st->ss &&
-                                   st->ss->compare_super(ap->st, st) == 0)
+                                   st->ss->compare_super(ap->st, st, 0) == 0)
                                        break;
                        }
                        if (!ap) {
@@ -166,7 +166,7 @@ int Examine(struct mddev_dev *devlist,
                        int newline = 0;
 
                        ap->st->ss->brief_examine_super(ap->st, c->verbose > 0);
-                       if (ap->spares)
+                       if (ap->spares && !ap->st->ss->external)
                                newline += printf("   spares=%d", ap->spares);
                        if (c->verbose > 0) {
                                newline += printf("   devices");
diff --git a/Grow.c b/Grow.c
index 764374fc2da4f38a3672389b6ed9dffad5614612..87ed9214ef0249d9dbc851f9afa7cf0183f95dcb 100644 (file)
--- a/Grow.c
+++ b/Grow.c
@@ -26,7 +26,6 @@
 #include       <sys/mman.h>
 #include       <stddef.h>
 #include       <stdint.h>
-#include       <signal.h>
 #include       <sys/wait.h>
 
 #if ! defined(__BIG_ENDIAN) && ! defined(__LITTLE_ENDIAN)
@@ -197,7 +196,12 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
        info.disk.minor = minor(rdev);
        info.disk.raid_disk = d;
        info.disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
-       st->ss->update_super(st, &info, "linear-grow-new", newdev, 0, 0, NULL);
+       if (st->ss->update_super(st, &info, UOPT_SPEC_LINEAR_GROW_NEW, newdev,
+                                0, 0, NULL) != 0) {
+               pr_err("Preparing new metadata failed on %s\n", newdev);
+               close(nfd);
+               return 1;
+       }
 
        if (st->ss->store_super(st, nfd)) {
                pr_err("Cannot store new superblock on %s\n", newdev);
@@ -250,8 +254,12 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
                info.array.active_disks = nd+1;
                info.array.working_disks = nd+1;
 
-               st->ss->update_super(st, &info, "linear-grow-update", dv,
-                                    0, 0, NULL);
+               if (st->ss->update_super(st, &info, UOPT_SPEC_LINEAR_GROW_UPDATE, dv,
+                                    0, 0, NULL) != 0) {
+                       pr_err("Updating metadata failed on %s\n", dv);
+                       close(fd2);
+                       return 1;
+               }
 
                if (st->ss->store_super(st, fd2)) {
                        pr_err("Cannot store new superblock on %s\n", dv);
@@ -301,7 +309,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                return 1;
        }
        if (bmf.pathname[0]) {
-               if (strcmp(s->bitmap_file,"none") == 0) {
+               if (str_is_none(s->bitmap_file) == true) {
                        if (ioctl(fd, SET_BITMAP_FILE, -1) != 0) {
                                pr_err("failed to remove bitmap %s\n",
                                        bmf.pathname);
@@ -317,7 +325,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                return 1;
        }
        if (array.state & (1 << MD_SB_BITMAP_PRESENT)) {
-               if (strcmp(s->bitmap_file, "none")==0) {
+               if (str_is_none(s->bitmap_file) == true) {
                        array.state &= ~(1 << MD_SB_BITMAP_PRESENT);
                        if (md_set_array_info(fd, &array) != 0) {
                                if (array.state & (1 << MD_SB_CLUSTERED))
@@ -332,7 +340,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                return 1;
        }
 
-       if (strcmp(s->bitmap_file, "none") == 0) {
+       if (str_is_none(s->bitmap_file) == true) {
                pr_err("no bitmap found on %s\n", devname);
                return 1;
        }
@@ -421,6 +429,12 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                        dv = map_dev(disk.major, disk.minor, 1);
                        if (!dv)
                                continue;
+                       if ((disk.state & (1 << MD_DISK_WRITEMOSTLY)) &&
+                          (strcmp(s->bitmap_file, "clustered") == 0)) {
+                               pr_err("%s disks marked write-mostly are not supported with clustered bitmap\n",devname);
+                               free(mdi);
+                               return 1;
+                       }
                        fd2 = dev_open(dv, O_RDWR);
                        if (fd2 < 0)
                                continue;
@@ -440,8 +454,10 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
                                pr_err("failed to load super-block.\n");
                        }
                        close(fd2);
-                       if (rv)
+                       if (rv) {
+                               free(mdi);
                                return 1;
+                       }
                }
                if (offset_setable) {
                        st->ss->getinfo_super(st, mdi, NULL);
@@ -529,12 +545,12 @@ int Grow_consistency_policy(char *devname, int fd, struct context *c, struct sha
        char *subarray = NULL;
        int ret = 0;
        char container_dev[PATH_MAX];
-       char buf[20];
+       char buf[SYSFS_MAX_BUF_SIZE];
 
        if (s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
            s->consistency_policy != CONSISTENCY_POLICY_PPL) {
                pr_err("Operation not supported for consistency policy %s\n",
-                      map_num(consistency_policies, s->consistency_policy));
+                      map_num_s(consistency_policies, s->consistency_policy));
                return 1;
        }
 
@@ -565,20 +581,20 @@ int Grow_consistency_policy(char *devname, int fd, struct context *c, struct sha
 
        if (sra->consistency_policy == (unsigned)s->consistency_policy) {
                pr_err("Consistency policy is already %s\n",
-                      map_num(consistency_policies, s->consistency_policy));
+                      map_num_s(consistency_policies, s->consistency_policy));
                ret = 1;
                goto free_info;
        } else if (sra->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
                   sra->consistency_policy != CONSISTENCY_POLICY_PPL) {
                pr_err("Current consistency policy is %s, cannot change to %s\n",
-                      map_num(consistency_policies, sra->consistency_policy),
-                      map_num(consistency_policies, s->consistency_policy));
+                      map_num_s(consistency_policies, sra->consistency_policy),
+                      map_num_s(consistency_policies, s->consistency_policy));
                ret = 1;
                goto free_info;
        }
 
        if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
-               if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0) {
+               if (sysfs_get_str(sra, NULL, "sync_action", buf, sizeof(buf)) <= 0) {
                        ret = 1;
                        goto free_info;
                } else if (strcmp(buf, "reshape\n") == 0) {
@@ -589,12 +605,12 @@ int Grow_consistency_policy(char *devname, int fd, struct context *c, struct sha
        }
 
        if (subarray) {
-               char *update;
+               enum update_opt update;
 
                if (s->consistency_policy == CONSISTENCY_POLICY_PPL)
-                       update = "ppl";
+                       update = UOPT_PPL;
                else
-                       update = "no-ppl";
+                       update = UOPT_NO_PPL;
 
                sprintf(container_dev, "/dev/%s", st->container_devnm);
 
@@ -652,7 +668,7 @@ int Grow_consistency_policy(char *devname, int fd, struct context *c, struct sha
                                        goto free_info;
                                }
 
-                               ret = st->ss->update_super(st, sra, "ppl",
+                               ret = st->ss->update_super(st, sra, UOPT_PPL,
                                                           devname,
                                                           c->verbose, 0, NULL);
                                if (ret) {
@@ -691,8 +707,8 @@ int Grow_consistency_policy(char *devname, int fd, struct context *c, struct sha
        }
 
        ret = sysfs_set_str(sra, NULL, "consistency_policy",
-                           map_num(consistency_policies,
-                                   s->consistency_policy));
+                           map_num_s(consistency_policies,
+                                        s->consistency_policy));
        if (ret)
                pr_err("Failed to change array consistency policy\n");
 
@@ -801,12 +817,12 @@ static int freeze(struct supertype *st)
        else {
                struct mdinfo *sra = sysfs_read(-1, st->devnm, GET_VERSION);
                int err;
-               char buf[20];
+               char buf[SYSFS_MAX_BUF_SIZE];
 
                if (!sra)
                        return -1;
                /* Need to clear any 'read-auto' status */
-               if (sysfs_get_str(sra, NULL, "array_state", buf, 20) > 0 &&
+               if (sysfs_get_str(sra, NULL, "array_state", buf, sizeof(buf)) > 0 &&
                    strncmp(buf, "read-auto", 9) == 0)
                        sysfs_set_str(sra, NULL, "array_state", "clean");
 
@@ -822,10 +838,10 @@ static void unfreeze(struct supertype *st)
                return unfreeze_container(st);
        else {
                struct mdinfo *sra = sysfs_read(-1, st->devnm, GET_VERSION);
-               char buf[20];
+               char buf[SYSFS_MAX_BUF_SIZE];
 
                if (sra &&
-                   sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0 &&
+                   sysfs_get_str(sra, NULL, "sync_action", buf, sizeof(buf)) > 0 &&
                    strcmp(buf, "frozen\n") == 0)
                        sysfs_set_str(sra, NULL, "sync_action", "idle");
                sysfs_free(sra);
@@ -835,21 +851,18 @@ static void unfreeze(struct supertype *st)
 static void wait_reshape(struct mdinfo *sra)
 {
        int fd = sysfs_get_fd(sra, NULL, "sync_action");
-       char action[20];
+       char action[SYSFS_MAX_BUF_SIZE];
 
        if (fd < 0)
                return;
 
-       while (sysfs_fd_get_str(fd, action, 20) > 0 &&
+       while (sysfs_fd_get_str(fd, action, sizeof(action)) > 0 &&
               strncmp(action, "reshape", 7) == 0)
                sysfs_wait(fd, NULL);
        close(fd);
 }
 
-static int reshape_super(struct supertype *st, unsigned long long size,
-                        int level, int layout, int chunksize, int raid_disks,
-                        int delta_disks, char *backup_file, char *dev,
-                        int direction, int verbose)
+static int reshape_super(struct supertype *st, struct shape *shape, struct context *c)
 {
        /* nothing extra to check in the native case */
        if (!st->ss->external)
@@ -860,9 +873,65 @@ static int reshape_super(struct supertype *st, unsigned long long size,
                return 1;
        }
 
-       return st->ss->reshape_super(st, size, level, layout, chunksize,
-                                    raid_disks, delta_disks, backup_file, dev,
-                                    direction, verbose);
+       return st->ss->reshape_super(st, shape, c);
+}
+
+/**
+ * reshape_super_size() - Reshape array, size only.
+ *
+ * @st: supertype.
+ * @devname: device name.
+ * @size: component size.
+ * @dir metadata changes direction
+ * Returns: 0 on success, 1 otherwise.
+ *
+ * This function is solely used to change size of the volume.
+ * Setting size is not valid for container.
+ * Size is only change that can be rolled back, thus the @dir param.
+ */
+static int reshape_super_size(struct supertype *st, char *devname,
+                             unsigned long long size, change_dir_t direction,
+                             struct context *c)
+{
+       struct shape shape = {0};
+
+       shape.level = UnSet;
+       shape.layout = UnSet;
+       shape.delta_disks = UnSet;
+       shape.dev = devname;
+       shape.size = size;
+       shape.direction = direction;
+
+       return reshape_super(st, &shape, c);
+}
+
+/**
+ * reshape_super_non_size() - Reshape array, non size changes.
+ *
+ * @st: supertype.
+ * @devname: device name.
+ * @info: superblock info.
+ * Returns: 0 on success, 1 otherwise.
+ *
+ * This function is used for any external array changes but size.
+ * It handles both volumes and containers.
+ * For changes other than size, rollback is not possible.
+ */
+static int reshape_super_non_size(struct supertype *st, char *devname,
+                                 struct mdinfo *info, struct context *c)
+{
+       struct shape shape = {0};
+       /* Size already set to zero, not updating size */
+       shape.level = info->new_level;
+       shape.layout = info->new_layout;
+       shape.chunk = info->new_chunk;
+       shape.raiddisks = info->array.raid_disks;
+       shape.delta_disks = info->delta_disks;
+       shape.dev = devname;
+       /* Rollback not possible for non size changes */
+       shape.direction = APPLY_METADATA_CHANGES;
+
+       return reshape_super(st, &shape, c);
 }
 
 static void sync_metadata(struct supertype *st)
@@ -886,7 +955,7 @@ static int subarray_set_num(char *container, struct mdinfo *sra, char *name, int
         * to close a race with the array_state going clean before the
         * next write to raid_disks / stripe_cache_size
         */
-       char safe[50];
+       char safe[SYSFS_MAX_BUF_SIZE];
        int rc;
 
        /* only 'raid_disks' and 'stripe_cache_size' trigger md_allow_write */
@@ -912,7 +981,7 @@ static int subarray_set_num(char *container, struct mdinfo *sra, char *name, int
 }
 
 int start_reshape(struct mdinfo *sra, int already_running,
-                 int before_data_disks, int data_disks)
+                 int before_data_disks, int data_disks, struct supertype *st)
 {
        int err;
        unsigned long long sync_max_to_set;
@@ -926,16 +995,22 @@ int start_reshape(struct mdinfo *sra, int already_running,
        else
                sync_max_to_set = (sra->component_size * data_disks
                                   - sra->reshape_progress) / data_disks;
+
        if (!already_running)
                sysfs_set_num(sra, NULL, "sync_min", sync_max_to_set);
-       err = err ?: sysfs_set_num(sra, NULL, "sync_max", sync_max_to_set);
+
+        if (st->ss->external)
+               err = err ?: sysfs_set_num(sra, NULL, "sync_max", sync_max_to_set);
+       else
+               err = err ?: sysfs_set_str(sra, NULL, "sync_max", "max");
+
        if (!already_running && err == 0) {
                int cnt = 5;
                do {
                        err = sysfs_set_str(sra, NULL, "sync_action",
                                            "reshape");
                        if (err)
-                               sleep(1);
+                               sleep_for(1, 0, true);
                } while (err && errno == EBUSY && cnt-- > 0);
        }
        return err;
@@ -981,8 +1056,8 @@ int remove_disks_for_takeover(struct supertype *st,
                                rv = 1;
                        sysfs_free(arrays);
                        if (rv) {
-                               pr_err("Error. Cannot perform operation on /dev/%s\n", st->devnm);
-                               pr_err("For this operation it MUST be single array in container\n");
+                               pr_err("Error. Cannot perform operation on %s- for this operation "
+                                      "it MUST be single array in container\n", st->devnm);
                                return rv;
                        }
                }
@@ -1045,7 +1120,7 @@ int remove_disks_for_takeover(struct supertype *st,
                remaining = sd->next;
 
                sysfs_set_str(sra, sd, "state", "faulty");
-               sysfs_set_str(sra, sd, "slot", "none");
+               sysfs_set_str(sra, sd, "slot", STR_COMMON_NONE);
                /* for external metadata disks should be removed in mdmon */
                if (!st->ss->external)
                        sysfs_set_str(sra, sd, "state", "remove");
@@ -1686,14 +1761,6 @@ char *analyse_change(char *devname, struct mdinfo *info, struct reshape *re)
                return NULL;
        }
 
-       if (re->after.data_disks == re->before.data_disks &&
-           get_linux_version() < 2006032)
-               return "in-place reshape is not safe before 2.6.32 - sorry.";
-
-       if (re->after.data_disks < re->before.data_disks &&
-           get_linux_version() < 2006030)
-               return "reshape to fewer devices is not supported before 2.6.30 - sorry.";
-
        re->backup_blocks = compute_backup_blocks(
                info->new_chunk, info->array.chunk_size,
                re->after.data_disks, re->before.data_disks);
@@ -1750,13 +1817,70 @@ static int reshape_container(char *container, char *devname,
                             int mdfd,
                             struct supertype *st,
                             struct mdinfo *info,
-                            int force,
-                            char *backup_file, int verbose,
-                            int forked, int restart, int freeze_reshape);
+                            struct context *c,
+                            int forked, int restart);
+
+/**
+ * prepare_external_reshape() - prepares update on external metadata if supported.
+ * @devname: Device name.
+ * @subarray: Subarray.
+ * @st: Supertype.
+ * @container: Container.
+ * @cfd: Container file descriptor.
+ *
+ * Function checks that the requested reshape is supported on external metadata,
+ * and performs an initial check that the container holds the pre-requisite
+ * spare devices (mdmon owns final validation).
+ *
+ * Return: 0 on success, else 1
+ */
+static int prepare_external_reshape(char *devname, char *subarray,
+                                   struct supertype *st, char *container,
+                                   const int cfd)
+{
+       struct mdinfo *cc = NULL;
+       struct mdinfo *content = NULL;
+
+       if (st->ss->load_container(st, cfd, NULL)) {
+               pr_err("Cannot read superblock for %s\n", devname);
+               return 1;
+       }
+
+       if (!st->ss->container_content)
+               return 1;
+
+       cc = st->ss->container_content(st, subarray);
+       for (content = cc; content ; content = content->next) {
+               /*
+                * check if reshape is allowed based on metadata
+                * indications stored in content.array.status
+                */
+               if (is_bit_set(&content->array.state, MD_SB_BLOCK_VOLUME) ||
+                   is_bit_set(&content->array.state, MD_SB_BLOCK_CONTAINER_RESHAPE)) {
+                       pr_err("Cannot reshape arrays in container with unsupported metadata: %s(%s)\n",
+                              devname, container);
+                       goto error;
+               }
+               if (content->consistency_policy == CONSISTENCY_POLICY_PPL) {
+                       pr_err("Operation not supported when ppl consistency policy is enabled\n");
+                       goto error;
+               }
+               if (content->consistency_policy == CONSISTENCY_POLICY_BITMAP) {
+                       pr_err("Operation not supported when write-intent bitmap consistency policy is enabled\n");
+                       goto error;
+               }
+       }
+       sysfs_free(cc);
+       if (mdmon_running(container))
+               st->update_tail = &st->updates;
+       return 0;
+error:
+       sysfs_free(cc);
+       return 1;
+}
 
 int Grow_reshape(char *devname, int fd,
                 struct mddev_dev *devlist,
-                unsigned long long data_offset,
                 struct context *c, struct shape *s)
 {
        /* Make some changes in the shape of an array.
@@ -1781,7 +1905,7 @@ int Grow_reshape(char *devname, int fd,
        struct supertype *st;
        char *subarray = NULL;
 
-       int frozen;
+       int frozen = 0;
        int changed = 0;
        char *container = NULL;
        int cfd = -1;
@@ -1790,7 +1914,7 @@ int Grow_reshape(char *devname, int fd,
        int added_disks;
 
        struct mdinfo info;
-       struct mdinfo *sra;
+       struct mdinfo *sra = NULL;
 
        if (md_get_array_info(fd, &array) < 0) {
                pr_err("%s is not an active md array - aborting\n",
@@ -1802,7 +1926,7 @@ int Grow_reshape(char *devname, int fd,
                return 1;
        }
 
-       if (data_offset != INVALID_SECTORS && array.level != 10 &&
+       if (s->data_offset != INVALID_SECTORS && array.level != 10 &&
            (array.level < 4 || array.level > 6)) {
                pr_err("--grow --data-offset not yet supported\n");
                return 1;
@@ -1815,14 +1939,6 @@ int Grow_reshape(char *devname, int fd,
                return 1;
        }
 
-       if (s->raiddisks && s->raiddisks < array.raid_disks &&
-           array.level > 1 && get_linux_version() < 2006032 &&
-           !check_env("MDADM_FORCE_FEWER")) {
-               pr_err("reducing the number of devices is not safe before Linux 2.6.32\n"
-                       "       Please use a newer kernel\n");
-               return 1;
-       }
-
        if (array.level > 1 && s->size > 1 &&
            (unsigned long long) (array.chunk_size / 1024) > s->size) {
                pr_err("component size must be larger than chunk size.\n");
@@ -1838,23 +1954,16 @@ int Grow_reshape(char *devname, int fd,
                pr_err("Cannot increase raid-disks on this array beyond %d\n", st->max_devs);
                return 1;
        }
-       if (s->level == 0 &&
-           (array.state & (1<<MD_SB_BITMAP_PRESENT)) &&
-           !(array.state & (1<<MD_SB_CLUSTERED))) {
-                array.state &= ~(1<<MD_SB_BITMAP_PRESENT);
-                if (md_set_array_info(fd, &array)!= 0) {
-                        pr_err("failed to remove internal bitmap.\n");
-                        return 1;
-                }
-        }
-
-       /* in the external case we need to check that the requested reshape is
-        * supported, and perform an initial check that the container holds the
-        * pre-requisite spare devices (mdmon owns final validation)
-        */
-       if (st->ss->external) {
-               int retval;
+       if (s->level == 0 && (array.state & (1 << MD_SB_BITMAP_PRESENT)) &&
+               !(array.state & (1 << MD_SB_CLUSTERED)) && !st->ss->external) {
+               array.state &= ~(1 << MD_SB_BITMAP_PRESENT);
+               if (md_set_array_info(fd, &array) != 0) {
+                       pr_err("failed to remove internal bitmap.\n");
+                       return 1;
+               }
+       }
 
+       if (st->ss->external) {
                if (subarray) {
                        container = st->container_devnm;
                        cfd = open_dev_excl(st->container_devnm);
@@ -1870,51 +1979,20 @@ int Grow_reshape(char *devname, int fd,
                        return 1;
                }
 
-               retval = st->ss->load_container(st, cfd, NULL);
-
-               if (retval) {
-                       pr_err("Cannot read superblock for %s\n", devname);
+               rv = prepare_external_reshape(devname, subarray, st,
+                                             container, cfd);
+               if (rv > 0) {
                        free(subarray);
-                       return 1;
+                       close(cfd);
+                       goto release;
                }
 
-               /* check if operation is supported for metadata handler */
-               if (st->ss->container_content) {
-                       struct mdinfo *cc = NULL;
-                       struct mdinfo *content = NULL;
-
-                       cc = st->ss->container_content(st, subarray);
-                       for (content = cc; content ; content = content->next) {
-                               int allow_reshape = 1;
-
-                               /* check if reshape is allowed based on metadata
-                                * indications stored in content.array.status
-                                */
-                               if (content->array.state &
-                                   (1 << MD_SB_BLOCK_VOLUME))
-                                       allow_reshape = 0;
-                               if (content->array.state &
-                                   (1 << MD_SB_BLOCK_CONTAINER_RESHAPE))
-                                       allow_reshape = 0;
-                               if (!allow_reshape) {
-                                       pr_err("cannot reshape arrays in container with unsupported metadata: %s(%s)\n",
-                                              devname, container);
-                                       sysfs_free(cc);
-                                       free(subarray);
-                                       return 1;
-                               }
-                               if (content->consistency_policy ==
-                                   CONSISTENCY_POLICY_PPL) {
-                                       pr_err("Operation not supported when ppl consistency policy is enabled\n");
-                                       sysfs_free(cc);
-                                       free(subarray);
-                                       return 1;
-                               }
-                       }
-                       sysfs_free(cc);
+               if (s->raiddisks && subarray) {
+                       pr_err("--raid-devices operation can be performed on a container only\n");
+                       close(cfd);
+                       free(subarray);
+                       return 1;
                }
-               if (mdmon_running(container))
-                       st->update_tail = &st->updates;
        }
 
        added_disks = 0;
@@ -1956,9 +2034,8 @@ int Grow_reshape(char *devname, int fd,
        }
 
        /* ========= set size =============== */
-       if (s->size > 0 &&
-           (s->size == MAX_SIZE || s->size != (unsigned)array.size)) {
-               unsigned long long orig_size = get_component_size(fd)/2;
+       if (s->size > 0 && (s->size == MAX_SIZE || s->size != (unsigned)array.size)) {
+               unsigned long long orig_size = get_component_size(fd) / 2;
                unsigned long long min_csize;
                struct mdinfo *mdi;
                int raid0_takeover = 0;
@@ -1972,9 +2049,13 @@ int Grow_reshape(char *devname, int fd,
                        goto release;
                }
 
-               if (reshape_super(st, s->size, UnSet, UnSet, 0, 0, UnSet, NULL,
-                                 devname, APPLY_METADATA_CHANGES,
-                                 c->verbose > 0)) {
+               if (array.level == 0) {
+                       pr_err("Component size change is not supported for RAID0\n");
+                       rv = 1;
+                       goto release;
+               }
+
+               if (reshape_super_size(st, devname, s->size, APPLY_METADATA_CHANGES, c)) {
                        rv = 1;
                        goto release;
                }
@@ -2053,9 +2134,10 @@ int Grow_reshape(char *devname, int fd,
                        if (!mdmon_running(st->container_devnm))
                                start_mdmon(st->container_devnm);
                        ping_monitor(container);
-                       if (mdmon_running(st->container_devnm) &&
-                                       st->update_tail == NULL)
-                               st->update_tail = &st->updates;
+                       if (mdmon_running(st->container_devnm) == false) {
+                               pr_err("No mdmon found. Grow cannot continue.\n");
+                               goto release;
+                       }
                }
 
                if (s->size == MAX_SIZE)
@@ -2065,11 +2147,7 @@ int Grow_reshape(char *devname, int fd,
                        /* got truncated to 32bit, write to
                         * component_size instead
                         */
-                       if (sra)
-                               rv = sysfs_set_num(sra, NULL,
-                                                  "component_size", s->size);
-                       else
-                               rv = -1;
+                       rv = sysfs_set_num(sra, NULL, "component_size", s->size);
                } else {
                        rv = md_set_array_info(fd, &array);
 
@@ -2095,10 +2173,8 @@ size_change_error:
                        int err = errno;
 
                        /* restore metadata */
-                       if (reshape_super(st, orig_size, UnSet, UnSet, 0, 0,
-                                         UnSet, NULL, devname,
-                                         ROLLBACK_METADATA_CHANGES,
-                                         c->verbose) == 0)
+                       if (reshape_super_size(st, devname, orig_size,
+                                              ROLLBACK_METADATA_CHANGES, c) == 0)
                                sync_metadata(st);
                        pr_err("Cannot set device size for %s: %s\n",
                                devname, strerror(err));
@@ -2113,8 +2189,7 @@ size_change_error:
                         * a backport has been arranged.
                         */
                        if (sra == NULL ||
-                           sysfs_set_str(sra, NULL, "resync_start",
-                                         "none") < 0)
+                           sysfs_set_str(sra, NULL, "resync_start", STR_COMMON_NONE) < 0)
                                pr_err("--assume-clean not supported with --grow on this kernel\n");
                }
                md_get_array_info(fd, &array);
@@ -2130,7 +2205,7 @@ size_change_error:
                                        devname, s->size);
                }
                changed = 1;
-       } else if (array.level != LEVEL_CONTAINER) {
+       } else if (!is_container(array.level)) {
                s->size = get_component_size(fd)/2;
                if (s->size == 0)
                        s->size = array.size;
@@ -2140,7 +2215,7 @@ size_change_error:
        if ((s->level == UnSet || s->level == array.level) &&
            (s->layout_str == NULL) &&
            (s->chunk == 0 || s->chunk == array.chunk_size) &&
-           data_offset == INVALID_SECTORS &&
+           s->data_offset == INVALID_SECTORS &&
            (s->raiddisks == 0 || s->raiddisks == array.raid_disks)) {
                /* Nothing more to do */
                if (!changed && c->verbose >= 0)
@@ -2186,7 +2261,7 @@ size_change_error:
        info.component_size = s->size*2;
        info.new_level = s->level;
        info.new_chunk = s->chunk * 1024;
-       if (info.array.level == LEVEL_CONTAINER) {
+       if (is_container(info.array.level)) {
                info.delta_disks = UnSet;
                info.array.raid_disks = s->raiddisks;
        } else if (s->raiddisks)
@@ -2210,7 +2285,7 @@ size_change_error:
                info.new_layout = UnSet;
                if (info.array.level == 6 && info.new_level == UnSet) {
                        char l[40], *h;
-                       strcpy(l, map_num(r6layout, info.array.layout));
+                       strcpy(l, map_num_s(r6layout, info.array.layout));
                        h = strrchr(l, '-');
                        if (h && strcmp(h, "-6") == 0) {
                                *h = 0;
@@ -2235,7 +2310,7 @@ size_change_error:
                        info.new_layout = info.array.layout;
                else if (info.array.level == 5 && info.new_level == 6) {
                        char l[40];
-                       strcpy(l, map_num(r5layout, info.array.layout));
+                       strcpy(l, map_num_s(r5layout, info.array.layout));
                        strcat(l, "-6");
                        info.new_layout = map_name(r6layout, l);
                } else {
@@ -2299,7 +2374,7 @@ size_change_error:
                                printf("layout for %s set to %d\n",
                                       devname, array.layout);
                }
-       } else if (array.level == LEVEL_CONTAINER) {
+       } else if (is_container(array.level)) {
                /* This change is to be applied to every array in the
                 * container.  This is only needed when the metadata imposes
                 * restraints of the various arrays in the container.
@@ -2308,13 +2383,9 @@ size_change_error:
                 * number of devices (On-Line Capacity Expansion) must be
                 * performed at the level of the container
                 */
-               if (fd > 0) {
-                       close(fd);
-                       fd = -1;
-               }
+               close_fd(&fd);
                rv = reshape_container(container, devname, -1, st, &info,
-                                      c->force, c->backup_file, c->verbose,
-                                      0, 0, 0);
+                                      c, 0, 0);
                frozen = 0;
        } else {
                /* get spare devices from external metadata
@@ -2331,19 +2402,15 @@ size_change_error:
                }
 
                /* Impose these changes on a single array.  First
-                * check that the metadata is OK with the change. */
-
-               if (reshape_super(st, 0, info.new_level,
-                                 info.new_layout, info.new_chunk,
-                                 info.array.raid_disks, info.delta_disks,
-                                 c->backup_file, devname,
-                                 APPLY_METADATA_CHANGES, c->verbose)) {
+                * check that the metadata is OK with the change.
+                */
+               if (reshape_super_non_size(st, devname, &info, c)) {
                        rv = 1;
                        goto release;
                }
                sync_metadata(st);
                rv = reshape_array(container, fd, devname, st, &info, c->force,
-                                  devlist, data_offset, c->backup_file,
+                                  devlist, s->data_offset, c->backup_file,
                                   c->verbose, 0, 0, 0);
                frozen = 0;
        }
@@ -2367,11 +2434,11 @@ release:
 static int verify_reshape_position(struct mdinfo *info, int level)
 {
        int ret_val = 0;
-       char buf[40];
+       char buf[SYSFS_MAX_BUF_SIZE];
        int rv;
 
        /* read sync_max, failure can mean raid0 array */
-       rv = sysfs_get_str(info, NULL, "sync_max", buf, 40);
+       rv = sysfs_get_str(info, NULL, "sync_max", buf, sizeof(buf));
 
        if (rv > 0) {
                char *ep;
@@ -2908,7 +2975,7 @@ static int impose_level(int fd, int level, char *devname, int verbose)
        }
 
        md_get_array_info(fd, &array);
-       if (level == 0 && (array.level >= 4 && array.level <= 6)) {
+       if (level == 0 && is_level456(array.level)) {
                /* To convert to RAID0 we need to fail and
                 * remove any non-data devices. */
                int found = 0;
@@ -2982,47 +3049,6 @@ static void catch_term(int sig)
        sigterm = 1;
 }
 
-static int continue_via_systemd(char *devnm)
-{
-       int skipped, i, pid, status;
-       char pathbuf[1024];
-       /* In a systemd/udev world, it is best to get systemd to
-        * run "mdadm --grow --continue" rather than running in the
-        * background.
-        */
-       switch(fork()) {
-       case  0:
-               /* FIXME yuk. CLOSE_EXEC?? */
-               skipped = 0;
-               for (i = 3; skipped < 20; i++)
-                       if (close(i) < 0)
-                               skipped++;
-                       else
-                               skipped = 0;
-
-               /* Don't want to see error messages from
-                * systemctl.  If the service doesn't exist,
-                * we fork ourselves.
-                */
-               close(2);
-               open("/dev/null", O_WRONLY);
-               snprintf(pathbuf, sizeof(pathbuf),
-                        "mdadm-grow-continue@%s.service", devnm);
-               status = execl("/usr/bin/systemctl", "systemctl", "restart",
-                              pathbuf, NULL);
-               status = execl("/bin/systemctl", "systemctl", "restart",
-                              pathbuf, NULL);
-               exit(1);
-       case -1: /* Just do it ourselves. */
-               break;
-       default: /* parent - good */
-               pid = wait(&status);
-               if (pid >= 0 && status == 0)
-                       return 1;
-       }
-       return 0;
-}
-
 static int reshape_array(char *container, int fd, char *devname,
                         struct supertype *st, struct mdinfo *info,
                         int force, struct mddev_dev *devlist,
@@ -3052,7 +3078,7 @@ static int reshape_array(char *container, int fd, char *devname,
        unsigned long long array_size;
        int done;
        struct mdinfo *sra = NULL;
-       char buf[20];
+       char buf[SYSFS_MAX_BUF_SIZE];
 
        /* when reshaping a RAID0, the component_size might be zero.
         * So try to fix that up.
@@ -3061,6 +3087,8 @@ static int reshape_array(char *container, int fd, char *devname,
                dprintf("Cannot get array information.\n");
                goto release;
        }
+       if (st->update_tail == NULL)
+               st->update_tail = &st->updates;
        if (array.level == 0 && info->component_size == 0) {
                get_dev_size(fd, NULL, &array_size);
                info->component_size = array_size / array.raid_disks;
@@ -3239,7 +3267,7 @@ static int reshape_array(char *container, int fd, char *devname,
         * level and frozen, we can safely add them.
         */
        if (devlist) {
-               if (Manage_subdevs(devname, fd, devlist, verbose, 0, NULL, 0))
+               if (Manage_subdevs(devname, fd, devlist, verbose, 0, UOPT_UNDEFINED, 0))
                        goto release;
        }
 
@@ -3401,6 +3429,7 @@ static int reshape_array(char *container, int fd, char *devname,
                default: /* parent */
                        return 0;
                case 0:
+                       manage_fork_fds(0);
                        map_fork();
                        break;
                }
@@ -3492,7 +3521,7 @@ started:
                goto release;
 
        err = start_reshape(sra, restart, reshape.before.data_disks,
-                           reshape.after.data_disks);
+                           reshape.after.data_disks, st);
        if (err) {
                pr_err("Cannot %s reshape for %s\n",
                       restart ? "continue" : "start", devname);
@@ -3509,8 +3538,9 @@ started:
                return 1;
        }
 
-       if (!forked && !check_env("MDADM_NO_SYSTEMCTL"))
-               if (continue_via_systemd(container ?: sra->sys_name)) {
+       if (!forked)
+               if (continue_via_systemd(container ?: sra->sys_name,
+                                        GROW_SERVICE, NULL)) {
                        free(fdlist);
                        free(offsets);
                        sysfs_free(sra);
@@ -3537,6 +3567,9 @@ started:
                break;
        }
 
+       /* Close unused file descriptor in the forked process */
+       close_fd(&fd);
+
        /* If another array on the same devices is busy, the
         * reshape will wait for them.  This would mean that
         * the first section that we suspend will stay suspended
@@ -3569,14 +3602,14 @@ started:
                        mdstat_wait(30 - (delayed-1) * 25);
        } while (delayed);
        mdstat_close();
-       close(fd);
        if (check_env("MDADM_GROW_VERIFY"))
                fd = open(devname, O_RDONLY | O_DIRECT);
        else
                fd = -1;
        mlockall(MCL_FUTURE);
 
-       signal(SIGTERM, catch_term);
+       if (signal_s(SIGTERM, catch_term) == SIG_ERR)
+               goto release;
 
        if (st->ss->external) {
                /* metadata handler takes it from here */
@@ -3677,23 +3710,15 @@ int reshape_container(char *container, char *devname,
                      int mdfd,
                      struct supertype *st,
                      struct mdinfo *info,
-                     int force,
-                     char *backup_file, int verbose,
-                     int forked, int restart, int freeze_reshape)
+                     struct context *c,
+                     int forked, int restart)
 {
        struct mdinfo *cc = NULL;
        int rv = restart;
        char last_devnm[32] = "";
 
-       /* component_size is not meaningful for a container,
-        * so pass '0' meaning 'no change'
-        */
-       if (!restart &&
-           reshape_super(st, 0, info->new_level,
-                         info->new_layout, info->new_chunk,
-                         info->array.raid_disks, info->delta_disks,
-                         backup_file, devname, APPLY_METADATA_CHANGES,
-                         verbose)) {
+       /* component_size is not meaningful for a container */
+       if (!restart && reshape_super_non_size(st, devname, info, c)) {
                unfreeze(st);
                return 1;
        }
@@ -3704,8 +3729,8 @@ int reshape_container(char *container, char *devname,
         */
        ping_monitor(container);
 
-       if (!forked && !freeze_reshape && !check_env("MDADM_NO_SYSTEMCTL"))
-               if (continue_via_systemd(container))
+       if (!forked && !c->freeze_reshape)
+               if (continue_via_systemd(container, GROW_SERVICE, NULL))
                        return 0;
 
        switch (forked ? 0 : fork()) {
@@ -3714,10 +3739,11 @@ int reshape_container(char *container, char *devname,
                unfreeze(st);
                return 1;
        default: /* parent */
-               if (!freeze_reshape)
+               if (!c->freeze_reshape)
                        printf("%s: multi-array reshape continues in background\n", Name);
                return 0;
        case 0: /* child */
+               manage_fork_fds(0);
                map_fork();
                break;
        }
@@ -3810,12 +3836,12 @@ int reshape_container(char *container, char *devname,
                        flush_mdmon(container);
 
                rv = reshape_array(container, fd, adev, st,
-                                  content, force, NULL, INVALID_SECTORS,
-                                  backup_file, verbose, 1, restart,
-                                  freeze_reshape);
+                                  content, c->force, NULL, INVALID_SECTORS,
+                                  c->backup_file, c->verbose, 1, restart,
+                                  c->freeze_reshape);
                close(fd);
 
-               if (freeze_reshape) {
+               if (c->freeze_reshape) {
                        sysfs_free(cc);
                        exit(0);
                }
@@ -3922,7 +3948,7 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
        unsigned long long array_size = (info->component_size
                                         * reshape->before.data_disks);
        int fd;
-       char buf[20];
+       char buf[SYSFS_MAX_BUF_SIZE];
 
        /* First, we unsuspend any region that is now known to be safe.
         * If suspend_point is on the 'wrong' side of reshape_progress, then
@@ -4100,8 +4126,8 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
                /* Check that sync_action is still 'reshape' to avoid
                 * waiting forever on a dead array
                 */
-               char action[20];
-               if (sysfs_get_str(info, NULL, "sync_action", action, 20) <= 0 ||
+               char action[SYSFS_MAX_BUF_SIZE];
+               if (sysfs_get_str(info, NULL, "sync_action", action, sizeof(action)) <= 0 ||
                    strncmp(action, "reshape", 7) != 0)
                        break;
                /* Some kernels reset 'sync_completed' to zero
@@ -4127,8 +4153,8 @@ int progress_reshape(struct mdinfo *info, struct reshape *reshape,
         */
        if (completed == 0) {
                unsigned long long reshapep;
-               char action[20];
-               if (sysfs_get_str(info, NULL, "sync_action", action, 20) > 0 &&
+               char action[SYSFS_MAX_BUF_SIZE];
+               if (sysfs_get_str(info, NULL, "sync_action", action, sizeof(action)) > 0 &&
                    strncmp(action, "idle", 4) == 0 &&
                    sysfs_get_ll(info, NULL,
                                 "reshape_position", &reshapep) == 0)
@@ -4165,8 +4191,8 @@ check_progress:
         * it was just a device failure that leaves us degraded but
         * functioning.
         */
-       if (sysfs_get_str(info, NULL, "reshape_position", buf,
-                         sizeof(buf)) < 0 || strncmp(buf, "none", 4) != 0) {
+       if (sysfs_get_str(info, NULL, "reshape_position", buf, sizeof(buf)) < 0 ||
+           str_is_none(buf) == false) {
                /* The abort might only be temporary.  Wait up to 10
                 * seconds for fd to contain a valid number again.
                 */
@@ -4246,7 +4272,7 @@ static int grow_backup(struct mdinfo *sra,
                        if (sd->disk.state & (1<<MD_DISK_FAULTY))
                                continue;
                        if (sd->disk.state & (1<<MD_DISK_SYNC)) {
-                               char sbuf[100];
+                               char sbuf[SYSFS_MAX_BUF_SIZE];
 
                                if (sysfs_get_str(sra, sd, "state",
                                                  sbuf, sizeof(sbuf)) < 0 ||
@@ -4421,19 +4447,8 @@ static void validate(int afd, int bfd, unsigned long long offset)
                lseek64(afd, __le64_to_cpu(bsb2.arraystart)*512, 0);
                if ((unsigned long long)read(afd, abuf, len) != len)
                        fail("read first from array failed");
-               if (memcmp(bbuf, abuf, len) != 0) {
-#if 0
-                       int i;
-                       printf("offset=%llu len=%llu\n",
-                              (unsigned long long)__le64_to_cpu(bsb2.arraystart)*512, len);
-                       for (i=0; i<len; i++)
-                               if (bbuf[i] != abuf[i]) {
-                                       printf("first diff byte %d\n", i);
-                                       break;
-                               }
-#endif
+               if (memcmp(bbuf, abuf, len) != 0)
                        fail("data1 compare failed");
-               }
        }
        if (bsb2.length2) {
                unsigned long long len = __le64_to_cpu(bsb2.length2)*512;
@@ -4940,7 +4955,8 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist,
                                continue;
                        st->ss->getinfo_super(st, &dinfo, NULL);
                        dinfo.reshape_progress = info->reshape_progress;
-                       st->ss->update_super(st, &dinfo, "_reshape_progress",
+                       st->ss->update_super(st, &dinfo,
+                                            UOPT_SPEC__RESHAPE_PROGRESS,
                                             NULL,0, 0, NULL);
                        st->ss->store_super(st, fdlist[j]);
                        st->ss->free_super(st);
@@ -4988,8 +5004,7 @@ int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist,
        return 1;
 }
 
-int Grow_continue_command(char *devname, int fd,
-                         char *backup_file, int verbose)
+int Grow_continue_command(char *devname, int fd, struct context *c)
 {
        int ret_val = 0;
        struct supertype *st = NULL;
@@ -5057,7 +5072,7 @@ int Grow_continue_command(char *devname, int fd,
                        }
                        st->ss->getinfo_super(st, content, NULL);
                        if (!content->reshape_active)
-                               sleep(3);
+                               sleep_for(3, 0, true);
                        else
                                break;
                } while (cnt-- > 0);
@@ -5158,9 +5173,7 @@ int Grow_continue_command(char *devname, int fd,
                        start_mdmon(container);
                ping_monitor(container);
 
-               if (mdmon_running(container))
-                       st->update_tail = &st->updates;
-               else {
+               if (mdmon_running(container) == false) {
                        pr_err("No mdmon found. Grow cannot continue.\n");
                        ret_val = 1;
                        goto Grow_continue_command_exit;
@@ -5177,7 +5190,7 @@ int Grow_continue_command(char *devname, int fd,
 
        /* continue reshape
         */
-       ret_val = Grow_continue(fd, st, content, backup_file, 1, 0);
+       ret_val = Grow_continue(fd, st, content, 1, c);
 
 Grow_continue_command_exit:
        if (cfd > -1)
@@ -5191,7 +5204,7 @@ Grow_continue_command_exit:
 }
 
 int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
-                 char *backup_file, int forked, int freeze_reshape)
+                 int forked, struct context *c)
 {
        int ret_val = 2;
 
@@ -5207,14 +5220,12 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info,
                st->ss->load_container(st, cfd, st->container_devnm);
                close(cfd);
                ret_val = reshape_container(st->container_devnm, NULL, mdfd,
-                                           st, info, 0, backup_file, 0,
-                                           forked, 1 | info->reshape_active,
-                                           freeze_reshape);
+                                           st, info, c, forked, 1 | info->reshape_active);
        } else
                ret_val = reshape_array(NULL, mdfd, "array", st, info, 1,
-                                       NULL, INVALID_SECTORS, backup_file,
+                                       NULL, INVALID_SECTORS, c->backup_file,
                                        0, forked, 1 | info->reshape_active,
-                                       freeze_reshape);
+                                       c->freeze_reshape);
 
        return ret_val;
 }
index 98dbcd920f535a44f8764ee91f72fb4fcb6efea1..83db071214ee57ba507e5cfed73f85494cb9950e 100644 (file)
@@ -202,8 +202,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
        if (!match && rv == 2)
                goto out;
 
-       if (match && match->devname &&
-           strcasecmp(match->devname, "<ignore>") == 0) {
+       if (match && match->devname && is_devname_ignore(match->devname) == true) {
                if (c->verbose >= 0)
                        pr_err("array containing %s is explicitly ignored by mdadm.conf\n",
                                devname);
@@ -244,7 +243,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                c->autof = ci->autof;
 
        name_to_use = info.name;
-       if (name_to_use[0] == 0 && info.array.level == LEVEL_CONTAINER) {
+       if (name_to_use[0] == 0 && is_container(info.array.level)) {
                name_to_use = info.text_version;
                trustworthy = METADATA;
        }
@@ -400,7 +399,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                        }
                        st2 = dup_super(st);
                        if (st2->ss->load_super(st2, dfd2, NULL) ||
-                           st->ss->compare_super(st, st2) != 0) {
+                           st->ss->compare_super(st, st2, 1) != 0) {
                                pr_err("metadata mismatch between %s and chosen array %s\n",
                                       devname, chosen_name);
                                close(dfd2);
@@ -460,8 +459,8 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                        info.array.working_disks ++;
 
        }
-       if (strncmp(chosen_name, "/dev/md/", 8) == 0)
-               md_devname = chosen_name+8;
+       if (strncmp(chosen_name, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0)
+               md_devname = chosen_name + DEV_MD_DIR_LEN;
        else
                md_devname = chosen_name;
        if (c->export) {
@@ -472,7 +471,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
 
        /* 7/ Is there enough devices to possibly start the array? */
        /* 7a/ if not, finish with success. */
-       if (info.array.level == LEVEL_CONTAINER) {
+       if (is_container(info.array.level)) {
                char devnm[32];
                /* Try to assemble within the container */
                sysfs_uevent(sra, "change");
@@ -507,6 +506,9 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
                                    GET_OFFSET | GET_SIZE));
        active_disks = count_active(st, sra, mdfd, &avail, &info);
 
+       if (!avail)
+               goto out_unlock;
+
        journal_device_missing = (info.journal_device_required) && (info.journal_clean == 0);
 
        if (info.consistency_policy == CONSISTENCY_POLICY_PPL)
@@ -668,7 +670,7 @@ static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
                        continue;
 
                if (d->disk.raid_disk > -1)
-                       sysfs_set_str(sra, d, "slot", "none");
+                       sysfs_set_str(sra, d, "slot", STR_COMMON_NONE);
                if (sysfs_set_str(sra, d, "state", "remove") == 0)
                        if (verbose >= 0)
                                pr_err("removing old device %s from %s\n",
@@ -831,6 +833,54 @@ container_members_max_degradation(struct map_ent *map, struct map_ent *me)
        return max_degraded;
 }
 
+/**
+ * incremental_external_test_spare_criteria() - helper to test spare criteria.
+ * @st: supertype, must be not NULL, it is duplicated here.
+ * @container_devnm: devnm of the container.
+ * @disk_fd: file descriptor of device to tested.
+ * @verbose: verbose flag.
+ *
+ * The function is used on new drive verification path to check if it can be added to external
+ * container. To test spare criteria, metadata must be loaded. It duplicates super to not mess in
+ * original one.
+ * Function is executed if superblock supports get_spare_criteria(), otherwise success is returned.
+ */
+mdadm_status_t incremental_external_test_spare_criteria(struct supertype *st, char *container_devnm,
+                                                       int disk_fd, int verbose)
+{
+       mdadm_status_t rv = MDADM_STATUS_ERROR;
+       char container_devname[PATH_MAX];
+       struct spare_criteria sc = {0};
+       struct supertype *dup;
+
+       if (!st->ss->get_spare_criteria)
+               return MDADM_STATUS_SUCCESS;
+
+       dup = dup_super(st);
+       snprintf(container_devname, PATH_MAX, "/dev/%s", container_devnm);
+
+       if (dup->ss->get_spare_criteria(dup, container_devname, &sc) != 0) {
+               if (verbose > 1)
+                       pr_err("Failed to get spare criteria for %s\n", container_devname);
+               goto out;
+       }
+
+       if (!disk_fd_matches_criteria(dup, disk_fd, &sc)) {
+               if (verbose > 1)
+                       pr_err("Disk does not match spare criteria for %s\n", container_devname);
+               goto out;
+       }
+
+       rv = MDADM_STATUS_SUCCESS;
+
+out:
+       dev_policy_free(sc.pols);
+       dup->ss->free_super(dup);
+       free(dup);
+
+       return rv;
+}
+
 static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                           struct map_ent *target, int bare,
                           struct supertype *st, int verbose)
@@ -871,8 +921,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                struct supertype *st2;
                struct domainlist *dl = NULL;
                struct mdinfo *sra;
-               unsigned long long devsize, freesize = 0;
-               struct spare_criteria sc = {0, 0};
+               unsigned long long freesize = 0;
 
                if (is_subarray(mp->metadata))
                        continue;
@@ -923,34 +972,19 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                if (sra->array.failed_disks == -1)
                        sra->array.failed_disks = container_members_max_degradation(map, mp);
 
-               get_dev_size(dfd, NULL, &devsize);
                if (sra->component_size == 0) {
-                       /* true for containers, here we must read superblock
-                        * to obtain minimum spare size */
-                       struct supertype *st3 = dup_super(st2);
-                       int mdfd = open_dev(mp->devnm);
-                       if (mdfd < 0) {
-                               free(st3);
+                       /* true for containers */
+                       if (incremental_external_test_spare_criteria(st2, mp->devnm, dfd, verbose))
                                goto next;
-                       }
-                       if (st3->ss->load_container &&
-                           !st3->ss->load_container(st3, mdfd, mp->path)) {
-                               if (st3->ss->get_spare_criteria)
-                                       st3->ss->get_spare_criteria(st3, &sc);
-                               st3->ss->free_super(st3);
-                       }
-                       free(st3);
-                       close(mdfd);
                }
-               if ((sra->component_size > 0 &&
-                    st2->ss->validate_geometry(st2, sra->array.level, sra->array.layout,
+
+               if (sra->component_size > 0 &&
+                   st2->ss->validate_geometry(st2, sra->array.level, sra->array.layout,
                                                sra->array.raid_disks, &sra->array.chunk_size,
                                                sra->component_size,
                                                sra->devs ? sra->devs->data_offset : INVALID_SECTORS,
                                                devname, &freesize, sra->consistency_policy,
-                                               0) &&
-                    freesize < sra->component_size) ||
-                   (sra->component_size == 0 && devsize < sc.min_size)) {
+                                               0) && freesize < sra->component_size) {
                        if (verbose > 1)
                                pr_err("not adding %s to %s as it is too small\n",
                                        devname, mp->path);
@@ -1025,7 +1059,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
                        close(dfd);
                        *dfdp = -1;
                        rv =  Manage_subdevs(chosen->sys_name, mdfd, &devlist,
-                                            -1, 0, NULL, 0);
+                                            -1, 0, UOPT_UNDEFINED, 0);
                        close(mdfd);
                }
                if (verbose > 0) {
@@ -1346,7 +1380,7 @@ restart:
                }
                mdfd = open_dev(me->devnm);
 
-               if (mdfd < 0)
+               if (!is_fd_valid(mdfd))
                        continue;
                if (!isdigit(me->metadata[0])) {
                        /* must be a container */
@@ -1356,7 +1390,7 @@ restart:
 
                        if (st && st->ss->load_container)
                                ret = st->ss->load_container(st, mdfd, NULL);
-                       close(mdfd);
+                       close_fd(&mdfd);
                        if (!ret && st && st->ss->container_content) {
                                if (map_lock(&map))
                                        pr_err("failed to get exclusive lock on mapfile\n");
@@ -1368,7 +1402,7 @@ restart:
                        continue;
                }
                if (md_array_active(mdfd)) {
-                       close(mdfd);
+                       close_fd(&mdfd);
                        continue;
                }
                /* Ok, we can try this one.   Maybe it needs a bitmap */
@@ -1385,9 +1419,9 @@ restart:
                        int bmfd;
 
                        bmfd = open(mddev->bitmap_file, O_RDWR);
-                       if (bmfd >= 0) {
+                       if (is_fd_valid(bmfd)) {
                                added = ioctl(mdfd, SET_BITMAP_FILE, bmfd);
-                               close(bmfd);
+                               close_fd(&bmfd);
                        }
                        if (c->verbose >= 0) {
                                if (added == 0)
@@ -1416,6 +1450,7 @@ restart:
                        }
                        sysfs_free(sra);
                }
+               close_fd(&mdfd);
        }
        map_free(mapl);
        return rv;
@@ -1460,24 +1495,16 @@ static int Incremental_container(struct supertype *st, char *devname,
        int trustworthy;
        struct mddev_ident *match;
        int rv = 0;
-       struct domainlist *domains;
-       struct map_ent *smp;
-       int suuid[4];
-       int sfd;
-       int ra_blocked = 0;
-       int ra_all = 0;
        int result = 0;
 
        st->ss->getinfo_super(st, &info, NULL);
 
-       if ((c->runstop > 0 && info.container_enough >= 0) ||
-           info.container_enough > 0)
-               /* pass */;
-       else {
-               if (c->export) {
+       if (info.container_enough < 0 || (info.container_enough == 0 && c->runstop < 1)) {
+               if (c->export)
                        printf("MD_STARTED=no\n");
-               } else if (c->verbose)
-                       pr_err("not enough devices to start the container\n");
+               else if (c->verbose)
+                       pr_err("Not enough devices to start the container.\n");
+
                return 0;
        }
 
@@ -1504,23 +1531,27 @@ static int Incremental_container(struct supertype *st, char *devname,
                return 0;
        }
        for (ra = list ; ra ; ra = ra->next) {
-               int mdfd;
+               int mdfd = -1;
                char chosen_name[1024];
                struct map_ent *mp;
                struct mddev_ident *match = NULL;
 
-               ra_all++;
                /* do not activate arrays blocked by metadata handler */
                if (ra->array.state & (1 << MD_SB_BLOCK_VOLUME)) {
                        pr_err("Cannot activate array %s in %s.\n",
                                ra->text_version, devname);
-                       ra_blocked++;
                        continue;
                }
                mp = map_by_uuid(&map, ra->uuid);
 
                if (mp) {
                        mdfd = open_dev(mp->devnm);
+                       if (!is_fd_valid(mdfd)) {
+                               pr_err("failed to open %s: %s.\n",
+                                      mp->devnm, strerror(errno));
+                               rv = 2;
+                               goto release;
+                       }
                        if (mp->path)
                                strcpy(chosen_name, mp->path);
                        else
@@ -1565,8 +1596,7 @@ static int Incremental_container(struct supertype *st, char *devname,
                                break;
                        }
 
-                       if (match && match->devname &&
-                           strcasecmp(match->devname, "<ignore>") == 0) {
+                       if (match && match->devname && is_devname_ignore(match->devname) == true) {
                                if (c->verbose > 0)
                                        pr_err("array %s/%s is explicitly ignored by mdadm.conf\n",
                                               match->container, match->member);
@@ -1580,21 +1610,25 @@ static int Incremental_container(struct supertype *st, char *devname,
                                            c->autof,
                                            trustworthy,
                                            chosen_name, 0);
+
+                       if (!is_fd_valid(mdfd)) {
+                               pr_err("create_mddev failed with chosen name %s: %s.\n",
+                                      chosen_name, strerror(errno));
+                               rv = 2;
+                               goto release;
+                       }
                }
-               if (only && (!mp || strcmp(mp->devnm, only) != 0))
-                       continue;
 
-               if (mdfd < 0) {
-                       pr_err("failed to open %s: %s.\n",
-                               chosen_name, strerror(errno));
-                       return 2;
+               if (only && (!mp || strcmp(mp->devnm, only) != 0)) {
+                       close_fd(&mdfd);
+                       continue;
                }
 
                assemble_container_content(st, mdfd, ra, c,
                                           chosen_name, &result);
                map_free(map);
                map = NULL;
-               close(mdfd);
+               close_fd(&mdfd);
        }
        if (c->export && result) {
                char sep = '=';
@@ -1618,109 +1652,24 @@ static int Incremental_container(struct supertype *st, char *devname,
                printf("\n");
        }
 
-       /* don't move spares to container with volume being activated
-          when all volumes are blocked */
-       if (ra_all == ra_blocked)
-               return 0;
-
-       /* Now move all suitable spares from spare container */
-       domains = domain_from_array(list, st->ss->name);
-       memcpy(suuid, uuid_zero, sizeof(int[4]));
-       if (domains &&
-           (smp = map_by_uuid(&map, suuid)) != NULL &&
-           (sfd = open(smp->path, O_RDONLY)) >= 0) {
-               /* spare container found */
-               struct supertype *sst =
-                       super_imsm.match_metadata_desc("imsm");
-               struct mdinfo *sinfo;
-
-               if (!sst->ss->load_container(sst, sfd, NULL)) {
-                       struct spare_criteria sc = {0, 0};
-
-                       if (st->ss->get_spare_criteria)
-                               st->ss->get_spare_criteria(st, &sc);
-
-                       close(sfd);
-                       sinfo = container_choose_spares(sst, &sc,
-                                                       domains, NULL,
-                                                       st->ss->name, 0);
-                       sst->ss->free_super(sst);
-                       if (sinfo){
-                               int count = 0;
-                               struct mdinfo *disks = sinfo->devs;
-                               while (disks) {
-                                       /* move spare from spare
-                                        * container to currently
-                                        * assembled one
-                                        */
-                                       if (move_spare(
-                                                   smp->path,
-                                                   devname,
-                                                   makedev(disks->disk.major,
-                                                           disks->disk.minor)))
-                                               count++;
-                                       disks = disks->next;
-                               }
-                               if (count)
-                                       pr_err("Added %d spare%s to %s\n",
-                                              count, count>1?"s":"", devname);
-                       }
-                       sysfs_free(sinfo);
-               } else
-                       close(sfd);
-       }
-       domain_free(domains);
+release:
        map_free(map);
-       return 0;
-}
-
-static void run_udisks(char *arg1, char *arg2)
-{
-       int pid = fork();
-       int status;
-       if (pid == 0) {
-               execl("/usr/bin/udisks", "udisks", arg1, arg2, NULL);
-               execl("/bin/udisks", "udisks", arg1, arg2, NULL);
-               exit(1);
-       }
-       while (pid > 0 && wait(&status) != pid)
-               ;
-}
-
-static int force_remove(char *devnm, int fd, struct mdinfo *mdi, int verbose)
-{
-       int rv;
-       int devid = devnm2devid(devnm);
-
-       run_udisks("--unmount", map_dev(major(devid), minor(devid), 0));
-       rv = Manage_stop(devnm, fd, verbose, 1);
-       if (rv) {
-               /* At least we can try to trigger a 'remove' */
-               sysfs_uevent(mdi, "remove");
-               if (verbose)
-                       pr_err("Fail to stop %s too.\n", devnm);
-       }
+       sysfs_free(list);
        return rv;
 }
 
 static void remove_from_member_array(struct mdstat_ent *memb,
                                    struct mddev_dev *devlist, int verbose)
 {
-       int rv;
-       struct mdinfo mmdi;
        int subfd = open_dev(memb->devnm);
 
        if (subfd >= 0) {
-               rv = Manage_subdevs(memb->devnm, subfd, devlist, verbose,
-                                   0, NULL, 0);
-               if (rv & 2) {
-                       if (sysfs_init(&mmdi, -1, memb->devnm))
-                               pr_err("unable to initialize sysfs for: %s\n",
-                                      memb->devnm);
-                       else
-                               force_remove(memb->devnm, subfd, &mmdi,
-                                            verbose);
-               }
+               /*
+                * Ignore the return value because it's necessary
+                * to handle failure condition here.
+                */
+               Manage_subdevs(memb->devnm, subfd, devlist, verbose,
+                              0, UOPT_UNDEFINED, 0);
                close(subfd);
        }
 }
@@ -1742,7 +1691,7 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
        struct mdstat_ent *ent;
        struct mddev_dev devlist;
        struct mdinfo mdi;
-       char buf[32];
+       char buf[SYSFS_MAX_BUF_SIZE];
 
        if (!id_path)
                dprintf("incremental removal without --path <id_path> lacks the possibility to re-add new device in this port\n");
@@ -1762,8 +1711,8 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
                return 1;
        }
        mdfd = open_dev_excl(ent->devnm);
-       if (mdfd > 0) {
-               close(mdfd);
+       if (is_fd_valid(mdfd)) {
+               close_fd(&mdfd);
                if (sysfs_get_str(&mdi, NULL, "array_state",
                                  buf, sizeof(buf)) > 0) {
                        if (strncmp(buf, "active", 6) == 0 ||
@@ -1790,7 +1739,7 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
 
        memset(&devlist, 0, sizeof(devlist));
        devlist.devname = devname;
-       devlist.disposition = 'f';
+       devlist.disposition = 'I';
        /* for a container, we must fail each member array */
        if (ent->metadata_version &&
            strncmp(ent->metadata_version, "external:", 9) == 0) {
@@ -1803,21 +1752,19 @@ int IncrementalRemove(char *devname, char *id_path, int verbose)
                }
                free_mdstat(mdstat);
        } else {
-               rv |= Manage_subdevs(ent->devnm, mdfd, &devlist,
-                                   verbose, 0, NULL, 0);
-               if (rv & 2) {
-               /* Failed due to EBUSY, try to stop the array.
-                * Give udisks a chance to unmount it first.
+               /*
+                * This 'I' incremental remove is a try-best effort,
+                * the failure condition can be safely ignored
+                * because of the following up 'r' remove.
                 */
-                       rv = force_remove(ent->devnm, mdfd, &mdi, verbose);
-                       goto end;
-               }
+               Manage_subdevs(ent->devnm, mdfd, &devlist,
+                              verbose, 0, UOPT_UNDEFINED, 0);
        }
 
        devlist.disposition = 'r';
        rv = Manage_subdevs(ent->devnm, mdfd, &devlist,
-                           verbose, 0, NULL, 0);
-end:
+                           verbose, 0, UOPT_UNDEFINED, 0);
+
        close(mdfd);
        free_mdstat(ent);
        return rv;
diff --git a/Kill.c b/Kill.c
index bfd0efdc0b1d463bda64756cb51a2f487ba58de3..43c9abed3b42b512448ca49cba943ba4ebe5603c 100644 (file)
--- a/Kill.c
+++ b/Kill.c
@@ -41,6 +41,7 @@ int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl)
         *  4 - failed to find a superblock.
         */
 
+       bool free_super = false;
        int fd, rv = 0;
 
        if (force)
@@ -52,8 +53,10 @@ int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl)
                                dev);
                return 2;
        }
-       if (st == NULL)
+       if (st == NULL) {
                st = guess_super(fd);
+               free_super = true;
+       }
        if (st == NULL || st->ss->init_super == NULL) {
                if (verbose >= 0)
                        pr_err("Unrecognised md component device - %s\n", dev);
@@ -77,6 +80,10 @@ int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl)
                        rv = 0;
                }
        }
+       if (free_super && st) {
+               st->ss->free_super(st);
+               free(st);
+       }
        close(fd);
        return rv;
 }
diff --git a/MAINTAINERS.md b/MAINTAINERS.md
new file mode 100644 (file)
index 0000000..9c79ba8
--- /dev/null
@@ -0,0 +1,44 @@
+# Maintainer tools
+
+Useful tools used in daily routines:
+- [checkpatch](https://docs.kernel.org/dev-tools/checkpatch.html)
+- [kup](https://korg.docs.kernel.org/kup.html)
+- [Auto-publishing](https://korg.docs.kernel.org/kup.html#auto-publishing-with-git-archive-signer)
+- [b4](https://b4.docs.kernel.org/en/latest/)
+
+# Checklist before applying patch
+
+We don't have CI testing yet, so all those steps must be performed manually:
+- Style check with [checkpatch](https://docs.kernel.org/dev-tools/checkpatch.html):
+
+  This is the current code style follows. We are not strict to all rules. It must be run
+  by **checkpatch --no-tree**, see README.md.
+
+- [Commit style](https://www.kernel.org/doc/html/v4.10/process/submitting-patches.html):
+
+  It doesn't need to be followed as strictly as is in kernel but changes should be logically
+  separated. Submitter should care at least to mention "It is used in next patches" if unused
+  externs/files are added in patch. We love: *Reported-by:*, *Suggested-by:*, *Fixes:* tags.
+
+- Compilation, ideally on various gcc versions.
+- Mdadm test suite execution.
+- Consider requesting new tests from submitter, especially for new functionalities.
+- Ensure that maintainer *sign-off* is added, before pushing.
+
+# Making a release
+
+Assuming that maintainer is certain that release is safe, following steps must be done:
+
+- Update versions strings in release commit, please refer to previous releases for examples.
+
+- Create GPG signed tag and push it to repo. Use same format as was used previously, prefixed by
+  **mdadm-**, e.g. **mdadm-3.1.2**, **mdadm-4.1**.
+
+- [Auto-publishing](https://korg.docs.kernel.org/kup.html#auto-publishing-with-git-archive-signer):
+
+  Adopt script to our release tag model. When ready, push signed note to repository. If it is done
+  correctly, then *(sig)* is added to the package automatically generated by kernel.org automation.
+  There is no need to upload archive manually.
+
+- Update CHANGELOG.md.
+- Write "ANNOUNCE" mail to linux-raid@kernel.org to notify community.
index a33319a890cf1b9335acd4eb4a0ea1dd1467cfc2..adac7905ab576fce4a072b9a074184495f239309 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,7 @@
 
 # define "CXFLAGS" to give extra flags to CC.
 # e.g.  make CXFLAGS=-O to optimise
-CXFLAGS ?=-O2
+CXFLAGS ?=-O2 -D_FORTIFY_SOURCE=2
 TCC = tcc
 UCLIBC_GCC = $(shell for nm in i386-uclibc-linux-gcc i386-uclibc-gcc; do which $$nm > /dev/null && { echo $$nm ; exit; } ; done; echo false No uclibc found )
 #DIET_GCC = diet gcc
@@ -50,14 +50,30 @@ ifeq ($(origin CC),default)
 CC := $(CROSS_COMPILE)gcc
 endif
 CXFLAGS ?= -ggdb
-CWFLAGS = -Wall -Werror -Wstrict-prototypes -Wextra -Wno-unused-parameter
+CWFLAGS ?= -Wall -Werror -Wstrict-prototypes -Wextra -Wno-unused-parameter -Wformat -Wformat-security -Werror=format-security -fstack-protector-strong -fPIE -Warray-bounds
 ifdef WARN_UNUSED
-CWFLAGS += -Wp,-D_FORTIFY_SOURCE=2 -O3
+CWFLAGS += -Wp -O3
 endif
 
-FALLTHROUGH := $(shell gcc -v --help 2>&1 | grep "implicit-fallthrough" | wc -l)
-ifneq "$(FALLTHROUGH)"  "0"
-CWFLAGS += -Wimplicit-fallthrough=0
+ifeq ($(origin FALLTHROUGH), undefined)
+       FALLTHROUGH := $(shell gcc -Q --help=warnings 2>&1 | grep "implicit-fallthrough" | wc -l)
+       ifneq "$(FALLTHROUGH)"  "0"
+       CWFLAGS += -Wimplicit-fallthrough=0
+       endif
+endif
+
+ifeq ($(origin FORMATOVERFLOW), undefined)
+       FORMATOVERFLOW := $(shell gcc -Q --help=warnings 2>&1 | grep "format-overflow" | wc -l)
+       ifneq "$(FORMATOVERFLOW)"  "0"
+       CWFLAGS += -Wformat-overflow
+       endif
+endif
+
+ifeq ($(origin STRINGOPOVERFLOW), undefined)
+       STRINGOPOVERFLOW := $(shell gcc -Q --help=warnings 2>&1 | grep "stringop-overflow" | wc -l)
+       ifneq "$(STRINGOPOVERFLOW)"  "0"
+       CWFLAGS += -Wstringop-overflow
+       endif
 endif
 
 ifdef DEBIAN
@@ -105,7 +121,8 @@ VERSION = $(shell [ -d .git ] && git describe HEAD | sed 's/mdadm-//')
 VERS_DATE = $(shell [ -d .git ] && date --iso-8601 --date="`git log -n1 --format=format:%cd --date=iso --date=short`")
 DVERS = $(if $(VERSION),-DVERSION=\"$(VERSION)\",)
 DDATE = $(if $(VERS_DATE),-DVERS_DATE="\"$(VERS_DATE)\"",)
-CFLAGS += $(DVERS) $(DDATE)
+DEXTRAVERSION = $(if $(EXTRAVERSION),-DEXTRAVERSION="\" - $(EXTRAVERSION)\"",)
+CFLAGS += $(DVERS) $(DDATE) $(DEXTRAVERSION)
 
 # The glibc TLS ABI requires applications that call clone(2) to set up
 # TLS data structures, use pthreads until mdmon implements this support
@@ -115,10 +132,17 @@ CFLAGS += -DUSE_PTHREADS
 MON_LDFLAGS += -pthread
 endif
 
+LDFLAGS ?= -pie -Wl,-z,now,-z,noexecstack
+
 # If you want a static binary, you might uncomment these
-# LDFLAGS = -static
+# LDFLAGS += -static
 # STRIP = -s
-LDLIBS=-ldl
+LDLIBS = -ldl
+
+# To explicitly disable libudev, set -DNO_LIBUDEV in CXFLAGS
+ifeq (, $(findstring -DNO_LIBUDEV,  $(CXFLAGS)))
+       LDLIBS += -ludev
+endif
 
 INSTALL = /usr/bin/install
 DESTDIR =
@@ -139,27 +163,27 @@ else
        ECHO=:
 endif
 
-OBJS =  mdadm.o config.o policy.o mdstat.o  ReadMe.o util.o maps.o lib.o \
-       Manage.o Assemble.o Build.o \
-       Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
-       Incremental.o Dump.o \
-       mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
-       super-mbr.o super-gpt.o \
-       restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o xmalloc.o \
-       platform-intel.o probe_roms.o crc32c.o
+OBJS = mdadm.o config.o policy.o mdstat.o  ReadMe.o uuid.o util.o maps.o lib.o udev.o \
+       Manage.o Assemble.o Build.o \
+       Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
+       Incremental.o Dump.o \
+       mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
+       super-mbr.o super-gpt.o \
+       restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o xmalloc.o \
+       platform-intel.o probe_roms.o crc32c.o drive_encryption.o
 
-CHECK_OBJS = restripe.o sysfs.o maps.o lib.o xmalloc.o dlink.o
+CHECK_OBJS = restripe.o uuid.o sysfs.o maps.o lib.o xmalloc.o dlink.o
 
 SRCS =  $(patsubst %.o,%.c,$(OBJS))
 
 INCL = mdadm.h part.h bitmap.h
 
-MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \
-       policy.o lib.o \
+MON_OBJS = mdmon.o monitor.o managemon.o uuid.o util.o maps.o mdstat.o sysfs.o config.o mapfile.o mdopen.o\
+       policy.o lib.o udev.o \
        Kill.o sg_io.o dlink.o ReadMe.o super-intel.o \
        super-mbr.o super-gpt.o \
        super-ddf.o sha1.o crc32.o msg.o bitmap.o xmalloc.o \
-       platform-intel.o probe_roms.o crc32c.o
+       platform-intel.o probe_roms.o crc32c.o drive_encryption.o
 
 MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS))
 
@@ -176,9 +200,9 @@ check_rundir:
                echo "***** or set CHECK_RUN_DIR=0"; exit 1; \
        fi
 
-everything: all mdadm.static swap_super test_stripe raid6check \
+everything: all swap_super test_stripe raid6check \
        mdadm.Os mdadm.O2 man
-everything-test: all mdadm.static swap_super test_stripe \
+everything-test: all swap_super test_stripe \
        mdadm.Os mdadm.O2 man
 # mdadm.uclibc doesn't work on x86-64
 # mdadm.tcc doesn't work..
@@ -203,14 +227,13 @@ mdadm.Os : $(SRCS) $(INCL)
        $(CC) -o mdadm.Os $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -DHAVE_STDINT_H -Os $(SRCS) $(LDLIBS)
 
 mdadm.O2 : $(SRCS) $(INCL) mdmon.O2
-       $(CC) -o mdadm.O2 $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(SRCS) $(LDLIBS)
+       $(CC) -o mdadm.O2 $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -DHAVE_STDINT_H -O2 $(SRCS) $(LDLIBS)
 
 mdmon.O2 : $(MON_SRCS) $(INCL) mdmon.h
-       $(CC) -o mdmon.O2 $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(MON_SRCS) $(LDLIBS)
+       $(CC) -o mdmon.O2 $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -DHAVE_STDINT_H -O2 $(MON_SRCS) $(LDLIBS)
 
-# use '-z now' to guarantee no dynamic linker interactions with the monitor thread
 mdmon : $(MON_OBJS) | check_rundir
-       $(CC) $(CFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -Wl,-z,now -o mdmon $(MON_OBJS) $(LDLIBS)
+       $(CC) $(CFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -o mdmon $(MON_OBJS) $(LDLIBS)
 msg.o: msg.c msg.h
 
 test_stripe : restripe.c xmalloc.o mdadm.h
@@ -221,7 +244,12 @@ raid6check : raid6check.o mdadm.h $(CHECK_OBJS)
 
 mdadm.8 : mdadm.8.in
        sed -e 's/{DEFAULT_METADATA}/$(DEFAULT_METADATA)/g' \
-       -e 's,{MAP_PATH},$(MAP_PATH),g'  mdadm.8.in > mdadm.8
+       -e 's,{MAP_PATH},$(MAP_PATH),g' -e 's,{CONFFILE},$(CONFFILE),g' \
+       -e 's,{CONFFILE2},$(CONFFILE2),g'  mdadm.8.in > mdadm.8
+
+mdadm.conf.5 : mdadm.conf.5.in
+       sed -e 's,{CONFFILE},$(CONFFILE),g' \
+       -e 's,{CONFFILE2},$(CONFFILE2),g'  mdadm.conf.5.in > mdadm.conf.5
 
 mdadm.man : mdadm.8
        man -l mdadm.8 > mdadm.man
@@ -244,9 +272,7 @@ $(MON_OBJS) : $(INCL) mdmon.h
 sha1.o : sha1.c sha1.h md5.h
        $(CC) $(CFLAGS) -DHAVE_STDINT_H -o sha1.o -c sha1.c
 
-install : mdadm mdmon install-man install-udev
-       $(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm
-       $(INSTALL) -D $(STRIP) -m 755 mdmon $(DESTDIR)$(BINDIR)/mdmon
+install : install-bin install-man install-udev
 
 install-static : mdadm.static install-man
        $(INSTALL) -D $(STRIP) -m 755 mdadm.static $(DESTDIR)$(BINDIR)/mdadm
@@ -296,6 +322,10 @@ install-systemd: systemd/mdmon@.service
        done
        if [ -f /etc/SuSE-release -o -n "$(SUSE)" ] ;then $(INSTALL) -D -m 755 systemd/SUSE-mdadm_env.sh $(DESTDIR)$(LIB_DIR)/mdadm_env.sh ;fi
 
+install-bin: mdadm mdmon
+       $(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm
+       $(INSTALL) -D $(STRIP) -m 755 mdmon $(DESTDIR)$(BINDIR)/mdmon
+
 uninstall:
        rm -f $(DESTDIR)$(MAN8DIR)/mdadm.8 $(DESTDIR)$(MAN8DIR)/mdmon.8 $(DESTDIR)$(MAN4DIR)/md.4 $(DESTDIR)$(MAN5DIR)/mdadm.conf.5 $(DESTDIR)$(BINDIR)/mdadm
 
index b22c3969f05c7a3c7dc29f6bd32501cb22bdbd1c..96e5ee5427a243bd70ca6d11c2e77a54f2362838 100644 (file)
--- a/Manage.c
+++ b/Manage.c
@@ -25,6 +25,7 @@
 #include "mdadm.h"
 #include "md_u.h"
 #include "md_p.h"
+#include "udev.h"
 #include <ctype.h>
 
 int Manage_ro(char *devname, int fd, int readonly)
@@ -177,10 +178,10 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
        struct map_ent *map = NULL;
        struct mdinfo *mdi;
        char devnm[32];
-       char container[32];
+       char container[MD_NAME_MAX] = {0};
        int err;
        int count;
-       char buf[32];
+       char buf[SYSFS_MAX_BUF_SIZE];
        unsigned long long rd1, rd2;
 
        if (will_retry && verbose == 0)
@@ -191,15 +192,9 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
         * to stop is probably a bad idea.
         */
        mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION);
-       if (mdi && is_subarray(mdi->text_version)) {
-               char *sl;
-               strncpy(container, mdi->text_version+1, sizeof(container));
-               container[sizeof(container)-1] = 0;
-               sl = strchr(container, '/');
-               if (sl)
-                       *sl = 0;
-       } else
-               container[0] = 0;
+       if (mdi && is_subarray(mdi->text_version))
+               sysfs_get_container_devnm(mdi, container);
+
        close(fd);
        count = 5;
        while (((fd = ((devname[0] == '/')
@@ -222,6 +217,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                if (verbose >= 0)
                        pr_err("Cannot get exclusive access to %s:Perhaps a running process, mounted filesystem or active volume group?\n",
                               devname);
+               sysfs_free(mdi);
                return 1;
        }
        /* If this is an mdmon managed array, just write 'inactive'
@@ -244,7 +240,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                                            "array_state",
                                            "inactive")) < 0 &&
                       errno == EBUSY) {
-                       usleep(200000);
+                       sleep_for(0, MSEC_TO_NSEC(200), true);
                        count--;
                }
                if (err) {
@@ -307,10 +303,10 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
         *  - unfreeze reshape
         *  - wait on 'sync_completed' for that point to be reached.
         */
-       if (mdi && (mdi->array.level >= 4 && mdi->array.level <= 6) &&
+       if (mdi && is_level456(mdi->array.level) &&
            sysfs_attribute_available(mdi, NULL, "sync_action") &&
            sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
-           sysfs_get_str(mdi, NULL, "sync_action", buf, 20) > 0 &&
+           sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf)) > 0 &&
            strcmp(buf, "reshape\n") == 0 &&
            sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2) {
                unsigned long long position, curr;
@@ -328,7 +324,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                       sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
                        /* must be in the critical section - wait a bit */
                        delay -= 1;
-                       usleep(100000);
+                       sleep_for(0, MSEC_TO_NSEC(100), true);
                }
 
                if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
@@ -400,12 +396,12 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry)
                        unsigned long long max_completed;
                        sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
                        sysfs_fd_get_str(scfd, buf, sizeof(buf));
-                       if (strncmp(buf, "none", 4) == 0) {
+                       if (str_is_none(buf) == true) {
                                /* Either reshape has aborted, or hasn't
                                 * quite started yet.  Wait a bit and
                                 * check  'sync_action' to see.
                                 */
-                               usleep(10000);
+                               sleep_for(0, MSEC_TO_NSEC(10), true);
                                sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf));
                                if (strncmp(buf, "reshape", 7) != 0)
                                        break;
@@ -447,7 +443,7 @@ done:
        count = 25; err = 0;
        while (count && fd >= 0 &&
               (err = ioctl(fd, STOP_ARRAY, NULL)) < 0 && errno == EBUSY) {
-               usleep(200000);
+               sleep_for(0, MSEC_TO_NSEC(200), true);
                count --;
        }
        if (fd >= 0 && err) {
@@ -461,18 +457,7 @@ done:
                goto out;
        }
 
-       if (get_linux_version() < 2006028) {
-               /* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
-                * was stopped, so We'll do it here just to be sure.  Drop any
-                * partitions as well...
-                */
-               if (fd >= 0)
-                       ioctl(fd, BLKRRPART, 0);
-               if (mdi)
-                       sysfs_uevent(mdi, "change");
-       }
-
-       if (devnm[0] && use_udev()) {
+       if (devnm[0] && udev_is_available()) {
                struct map_ent *mp = map_by_devnm(&map, devnm);
                remove_devices(devnm, mp ? mp->path : NULL);
        }
@@ -598,9 +583,8 @@ static void add_set(struct mddev_dev *dv, int fd, char set_char)
 
 int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
                   struct supertype *dev_st, struct supertype *tst,
-                  unsigned long rdev,
-                  char *update, char *devname, int verbose,
-                  mdu_array_info_t *array)
+                  unsigned long rdev, enum update_opt update,
+                  char *devname, int verbose, mdu_array_info_t *array)
 {
        struct mdinfo mdi;
        int duuid[4];
@@ -622,12 +606,6 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
                 * though.
                 */
                mdu_disk_info_t disc;
-               /* re-add doesn't work for version-1 superblocks
-                * before 2.6.18 :-(
-                */
-               if (array->major_version == 1 &&
-                   get_linux_version() <= 2006018)
-                       goto skip_re_add;
                disc.number = mdi.disk.number;
                if (md_get_disk_info(fd, &disc) != 0 ||
                    disc.major != 0 || disc.minor != 0)
@@ -666,19 +644,19 @@ int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
 
                        if (dv->writemostly == FlagSet)
                                rv = dev_st->ss->update_super(
-                                       dev_st, NULL, "writemostly",
+                                       dev_st, NULL, UOPT_SPEC_WRITEMOSTLY,
                                        devname, verbose, 0, NULL);
                        if (dv->writemostly == FlagClear)
                                rv = dev_st->ss->update_super(
-                                       dev_st, NULL, "readwrite",
+                                       dev_st, NULL, UOPT_SPEC_READWRITE,
                                        devname, verbose, 0, NULL);
                        if (dv->failfast == FlagSet)
                                rv = dev_st->ss->update_super(
-                                       dev_st, NULL, "failfast",
+                                       dev_st, NULL, UOPT_SPEC_FAILFAST,
                                        devname, verbose, 0, NULL);
                        if (dv->failfast == FlagClear)
                                rv = dev_st->ss->update_super(
-                                       dev_st, NULL, "nofailfast",
+                                       dev_st, NULL, UOPT_SPEC_NOFAILFAST,
                                        devname, verbose, 0, NULL);
                        if (update)
                                rv = dev_st->ss->update_super(
@@ -711,16 +689,110 @@ skip_re_add:
        return 0;
 }
 
+/**
+ * manage_add_external() - Add disk to external container.
+ * @st: external supertype pointer, must not be NULL, superblock is released here.
+ * @fd: container file descriptor, must not have O_EXCL mode.
+ * @disk_fd: device to add file descriptor.
+ * @disk_name: name of the device to add.
+ * @disc: disk info.
+ *
+ * Superblock is released here because any open fd with O_EXCL will block sysfs_add_disk().
+ */
+mdadm_status_t manage_add_external(struct supertype *st, int fd, char *disk_name,
+                                  mdu_disk_info_t *disc)
+{
+       mdadm_status_t rv = MDADM_STATUS_ERROR;
+       char container_devpath[MD_NAME_MAX];
+       struct dev_policy *pols = NULL;
+       struct mdinfo new_mdi;
+       struct mdinfo *sra = NULL;
+       int container_fd;
+       int disk_fd = -1;
+
+       snprintf(container_devpath, MD_NAME_MAX, "%s", fd2devnm(fd));
+
+       container_fd = open_dev_excl(container_devpath);
+       if (!is_fd_valid(container_fd)) {
+               pr_err("Failed to get exclusive access to container %s\n", container_devpath);
+               return MDADM_STATUS_ERROR;
+       }
+
+       /* Check if metadata handler is able to accept the drive */
+       if (!st->ss->validate_geometry(st, LEVEL_CONTAINER, 0, 1, NULL, 0, 0, disk_name, NULL,
+                                      0, 1))
+               goto out;
+
+       if (mddev_test_and_add_drive_policies(st, &pols, container_fd, 1))
+               goto out;
+
+       Kill(disk_name, NULL, 0, -1, 0);
+
+       disk_fd = dev_open(disk_name, O_RDWR | O_EXCL | O_DIRECT);
+       if (!is_fd_valid(disk_fd)) {
+               pr_err("Failed to exclusively open %s\n", disk_name);
+               goto out;
+       }
+
+       if (drive_test_and_add_policies(st, &pols, disk_fd, 1))
+               goto out;
+
+       if (st->ss->add_to_super(st, disc, disk_fd, disk_name, INVALID_SECTORS))
+               goto out;
+
+       if (!mdmon_running(st->container_devnm))
+               st->ss->sync_metadata(st);
+
+       sra = sysfs_read(container_fd, NULL, 0);
+       if (!sra) {
+               pr_err("Failed to read sysfs for %s\n", disk_name);
+               goto out;
+       }
+
+       sra->array.level = LEVEL_CONTAINER;
+       /* Need to set data_offset and component_size */
+       st->ss->getinfo_super(st, &new_mdi, NULL);
+       new_mdi.disk.major = disc->major;
+       new_mdi.disk.minor = disc->minor;
+       new_mdi.recovery_start = 0;
+
+       st->ss->free_super(st);
+
+       if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
+               pr_err("Failed to add %s to container %s\n", disk_name, container_devpath);
+               goto out;
+       }
+       ping_monitor(container_devpath);
+       rv = MDADM_STATUS_SUCCESS;
+
+out:
+       close(container_fd);
+       dev_policy_free(pols);
+
+       if (sra)
+               sysfs_free(sra);
+
+       if (rv != MDADM_STATUS_SUCCESS && is_fd_valid(disk_fd))
+               /* Metadata handler records this descriptor, so release it only on failure. */
+               close(disk_fd);
+
+       if (st->sb)
+               st->ss->free_super(st);
+
+       return rv;
+}
+
 int Manage_add(int fd, int tfd, struct mddev_dev *dv,
               struct supertype *tst, mdu_array_info_t *array,
               int force, int verbose, char *devname,
-              char *update, unsigned long rdev, unsigned long long array_size,
-              int raid_slot)
+              enum update_opt update, unsigned long rdev,
+              unsigned long long array_size, int raid_slot)
 {
        unsigned long long ldsize;
        struct supertype *dev_st;
        int j;
        mdu_disk_info_t disc;
+       struct map_ent *map = NULL;
 
        if (!get_dev_size(tfd, dv->devname, &ldsize)) {
                if (dv->disposition == 'M')
@@ -809,19 +881,23 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                 * simply re-add it.
                 */
 
-               if (array->not_persistent == 0) {
+               if (array->not_persistent == 0 && dv->disposition != 'S') {
+                       int rv = 0;
+
                        dev_st = dup_super(tst);
                        dev_st->ss->load_super(dev_st, tfd, NULL);
-                       if (dev_st->sb && dv->disposition != 'S') {
-                               int rv;
 
-                               rv = attempt_re_add(fd, tfd, dv, dev_st, tst,
-                                                   rdev, update, devname,
-                                                   verbose, array);
+                       if (dev_st->sb) {
+                               rv = attempt_re_add(fd, tfd, dv, dev_st, tst, rdev, update,
+                                                   devname, verbose, array);
+
                                dev_st->ss->free_super(dev_st);
-                               if (rv)
-                                       return rv;
                        }
+
+                       free(dev_st);
+
+                       if (rv)
+                               return rv;
                }
                if (dv->disposition == 'M') {
                        if (verbose > 0)
@@ -918,6 +994,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                disc.raid_disk = 0;
        }
 
+       if (map_lock(&map))
+               pr_err("failed to get exclusive lock on mapfile when add disk\n");
+
        if (array->not_persistent==0) {
                int dfd;
                if (dv->disposition == 'j')
@@ -929,9 +1008,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
                if (tst->ss->add_to_super(tst, &disc, dfd,
                                          dv->devname, INVALID_SECTORS))
-                       return -1;
+                       goto unlock;
                if (tst->ss->write_init_super(tst))
-                       return -1;
+                       goto unlock;
        } else if (dv->disposition == 'A') {
                /*  this had better be raid1.
                 * As we are "--re-add"ing we must find a spare slot
@@ -974,65 +1053,8 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
        if (dv->failfast == FlagSet)
                disc.state |= (1 << MD_DISK_FAILFAST);
        if (tst->ss->external) {
-               /* add a disk
-                * to an external metadata container */
-               struct mdinfo new_mdi;
-               struct mdinfo *sra;
-               int container_fd;
-               char devnm[32];
-               int dfd;
-
-               strcpy(devnm, fd2devnm(fd));
-
-               container_fd = open_dev_excl(devnm);
-               if (container_fd < 0) {
-                       pr_err("add failed for %s: could not get exclusive access to container\n",
-                              dv->devname);
-                       tst->ss->free_super(tst);
-                       return -1;
-               }
-
-               Kill(dv->devname, NULL, 0, -1, 0);
-               dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
-               if (mdmon_running(tst->container_devnm))
-                       tst->update_tail = &tst->updates;
-               if (tst->ss->add_to_super(tst, &disc, dfd,
-                                         dv->devname, INVALID_SECTORS)) {
-                       close(dfd);
-                       close(container_fd);
-                       return -1;
-               }
-               if (tst->update_tail)
-                       flush_metadata_updates(tst);
-               else
-                       tst->ss->sync_metadata(tst);
-
-               sra = sysfs_read(container_fd, NULL, 0);
-               if (!sra) {
-                       pr_err("add failed for %s: sysfs_read failed\n",
-                              dv->devname);
-                       close(container_fd);
-                       tst->ss->free_super(tst);
-                       return -1;
-               }
-               sra->array.level = LEVEL_CONTAINER;
-               /* Need to set data_offset and component_size */
-               tst->ss->getinfo_super(tst, &new_mdi, NULL);
-               new_mdi.disk.major = disc.major;
-               new_mdi.disk.minor = disc.minor;
-               new_mdi.recovery_start = 0;
-               /* Make sure fds are closed as they are O_EXCL which
-                * would block add_disk */
-               tst->ss->free_super(tst);
-               if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
-                       pr_err("add new device to external metadata failed for %s\n", dv->devname);
-                       close(container_fd);
-                       sysfs_free(sra);
-                       return -1;
-               }
-               ping_monitor(devnm);
-               sysfs_free(sra);
-               close(container_fd);
+               if (manage_add_external(tst, fd, dv->devname, &disc) != MDADM_STATUS_SUCCESS)
+                       goto unlock;
        } else {
                tst->ss->free_super(tst);
                if (ioctl(fd, ADD_NEW_DISK, &disc)) {
@@ -1042,7 +1064,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
                        else
                                pr_err("add new device failed for %s as %d: %s\n",
                                       dv->devname, j, strerror(errno));
-                       return -1;
+                       goto unlock;
                }
                if (dv->disposition == 'j') {
                        pr_err("Journal added successfully, making %s read-write\n", devname);
@@ -1053,7 +1075,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
        }
        if (verbose >= 0)
                pr_err("added %s\n", dv->devname);
+       map_unlock(&map);
        return 1;
+unlock:
+       map_unlock(&map);
+       return -1;
 }
 
 int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
@@ -1102,7 +1128,7 @@ int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
                                ret = sysfs_unique_holder(devnm, rdev);
                                if (ret < 2)
                                        break;
-                               usleep(100 * 1000);     /* 100ms */
+                               sleep_for(0, MSEC_TO_NSEC(100), true);
                        } while (--count > 0);
 
                        if (ret == 0) {
@@ -1282,43 +1308,102 @@ int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
        return -1;
 }
 
+/**
+ * is_remove_safe() - Check if remove is safe.
+ * @array: Array info.
+ * @fd: Array file descriptor.
+ * @devname: Name of device to remove.
+ * @verbose: Verbose.
+ *
+ * The function determines if array will be operational
+ * after removing &devname.
+ *
+ * Return: True if array will be operational, false otherwise.
+ */
+bool is_remove_safe(mdu_array_info_t *array, const int fd, char *devname, const int verbose)
+{
+       dev_t devid = devnm2devid(devname + 5);
+       struct mdinfo *mdi = sysfs_read(fd, NULL, GET_DEVS | GET_DISKS | GET_STATE);
+
+       if (!mdi) {
+               if (verbose)
+                       pr_err("Failed to read sysfs attributes for %s\n", devname);
+               return false;
+       }
+
+       char *avail = xcalloc(array->raid_disks, sizeof(char));
+
+       for (mdi = mdi->devs; mdi; mdi = mdi->next) {
+               if (mdi->disk.raid_disk < 0)
+                       continue;
+               if (!(mdi->disk.state & (1 << MD_DISK_SYNC)))
+                       continue;
+               if (makedev(mdi->disk.major, mdi->disk.minor) == devid)
+                       continue;
+               avail[mdi->disk.raid_disk] = 1;
+       }
+       sysfs_free(mdi);
+
+       bool is_enough = enough(array->level, array->raid_disks,
+                               array->layout, 1, avail);
+
+       free(avail);
+       return is_enough;
+}
+
+/**
+ * Manage_subdevs() - Execute operation depending on devmode.
+ *
+ * @devname: name of the device.
+ * @fd: file descriptor.
+ * @devlist: list of sub-devices to manage.
+ * @verbose: verbose level.
+ * @test: test flag.
+ * @update: type of update.
+ * @force: force flag.
+ *
+ * This function executes operation defined by devmode
+ * for each dev from devlist.
+ * Devmode can be:
+ * 'a' - add the device
+ * 'S' - add the device as a spare - don't try re-add
+ * 'j' - add the device as a journal device
+ * 'A' - re-add the device
+ * 'r' - remove the device: HOT_REMOVE_DISK
+ *       device can be 'faulty' or 'detached' in which case all
+ *       matching devices are removed.
+ * 'f' - set the device faulty SET_DISK_FAULTY
+ *       device can be 'detached' in which case any device that
+ *       is inaccessible will be marked faulty.
+ * 'I' - remove device by using incremental fail
+ *       which is executed when device is removed surprisingly.
+ * 'R' - mark this device as wanting replacement.
+ * 'W' - this device is added if necessary and activated as
+ *       a replacement for a previous 'R' device.
+ * -----
+ * 'w' - 'W' will be changed to 'w' when it is paired with
+ *       a 'R' device.  If a 'W' is found while walking the list
+ *       it must be unpaired, and is an error.
+ * 'M' - this is created by a 'missing' target.  It is a slight
+ *       variant on 'A'
+ * 'F' - Another variant of 'A', where the device was faulty
+ *       so must be removed from the array first.
+ * 'c' - confirm the device as found (for clustered environments)
+ *
+ * For 'f' and 'r', the device can also be a kernel-internal
+ * name such as 'sdb'.
+ *
+ * Return: 0 on success, otherwise 1 or 2.
+ */
 int Manage_subdevs(char *devname, int fd,
                   struct mddev_dev *devlist, int verbose, int test,
-                  char *update, int force)
+                  enum update_opt update, int force)
 {
-       /* Do something to each dev.
-        * devmode can be
-        *  'a' - add the device
-        *  'S' - add the device as a spare - don't try re-add
-        *  'j' - add the device as a journal device
-        *  'A' - re-add the device
-        *  'r' - remove the device: HOT_REMOVE_DISK
-        *        device can be 'faulty' or 'detached' in which case all
-        *        matching devices are removed.
-        *  'f' - set the device faulty SET_DISK_FAULTY
-        *        device can be 'detached' in which case any device that
-        *        is inaccessible will be marked faulty.
-        *  'R' - mark this device as wanting replacement.
-        *  'W' - this device is added if necessary and activated as
-        *        a replacement for a previous 'R' device.
-        * -----
-        *  'w' - 'W' will be changed to 'w' when it is paired with
-        *        a 'R' device.  If a 'W' is found while walking the list
-        *        it must be unpaired, and is an error.
-        *  'M' - this is created by a 'missing' target.  It is a slight
-        *        variant on 'A'
-        *  'F' - Another variant of 'A', where the device was faulty
-        *        so must be removed from the array first.
-        *  'c' - confirm the device as found (for clustered environments)
-        *
-        * For 'f' and 'r', the device can also be a kernel-internal
-        * name such as 'sdb'.
-        */
        mdu_array_info_t array;
        unsigned long long array_size;
        struct mddev_dev *dv;
        int tfd = -1;
-       struct supertype *tst;
+       struct supertype *tst = NULL;
        char *subarray = NULL;
        int sysfd = -1;
        int count = 0; /* number of actions taken */
@@ -1449,8 +1534,9 @@ int Manage_subdevs(char *devname, int fd,
                        /* Assume this is a kernel-internal name like 'sda1' */
                        int found = 0;
                        char dname[55];
-                       if (dv->disposition != 'r' && dv->disposition != 'f') {
-                               pr_err("%s only meaningful with -r or -f, not -%c\n",
+                       if (dv->disposition != 'r' && dv->disposition != 'f' &&
+                           dv->disposition != 'I') {
+                               pr_err("%s only meaningful with -r, -f or -I, not -%c\n",
                                        dv->devname, dv->disposition);
                                goto abort;
                        }
@@ -1458,8 +1544,8 @@ int Manage_subdevs(char *devname, int fd,
                        sprintf(dname, "dev-%s", dv->devname);
                        sysfd = sysfs_open(fd2devnm(fd), dname, "block/dev");
                        if (sysfd >= 0) {
-                               char dn[20];
-                               if (sysfs_fd_get_str(sysfd, dn, 20) > 0 &&
+                               char dn[SYSFS_MAX_BUF_SIZE];
+                               if (sysfs_fd_get_str(sysfd, dn, sizeof(dn)) > 0 &&
                                    sscanf(dn, "%d:%d", &mj,&mn) == 2) {
                                        rdev = makedev(mj,mn);
                                        found = 1;
@@ -1595,7 +1681,14 @@ int Manage_subdevs(char *devname, int fd,
                        break;
 
                case 'f': /* set faulty */
-                       /* FIXME check current member */
+                       if (!is_remove_safe(&array, fd, dv->devname, verbose)) {
+                               pr_err("Cannot remove %s from %s, array will be failed.\n",
+                                      dv->devname, devname);
+                               if (sysfd >= 0)
+                                       close(sysfd);
+                               goto abort;
+                       }
+               case 'I': /* incremental fail */
                        if ((sysfd >= 0 && write(sysfd, "faulty", 6) != 6) ||
                            (sysfd < 0 && ioctl(fd, SET_DISK_FAULTY,
                                                rdev))) {
@@ -1647,6 +1740,7 @@ int Manage_subdevs(char *devname, int fd,
                        break;
                }
        }
+       free(tst);
        if (frozen > 0)
                sysfs_set_str(&info, NULL, "sync_action","idle");
        if (test && count == 0)
@@ -1654,6 +1748,7 @@ int Manage_subdevs(char *devname, int fd,
        return 0;
 
 abort:
+       free(tst);
        if (frozen > 0)
                sysfs_set_str(&info, NULL, "sync_action","idle");
        return !test && busy ? 2 : 1;
@@ -1672,10 +1767,14 @@ int autodetect(void)
        return rv;
 }
 
-int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int verbose)
+int Update_subarray(char *dev, char *subarray, enum update_opt update,
+                   struct mddev_ident *ident, int verbose)
 {
        struct supertype supertype, *st = &supertype;
        int fd, rv = 2;
+       struct mdinfo *info = NULL;
+       char *update_verb = map_num(update_options, update);
+       bool allow_active = update == UOPT_PPL || update == UOPT_NO_PPL;
 
        memset(st, 0, sizeof(*st));
 
@@ -1690,25 +1789,41 @@ int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident
                goto free_super;
        }
 
+       if (!allow_active && is_subarray_active(subarray, st->devnm)) {
+               if (verbose >= 0)
+                       pr_err("Subarray %s in %s is active, cannot update %s\n",
+                               subarray, dev, update_verb);
+               goto free_super;
+       }
+
        if (mdmon_running(st->devnm))
                st->update_tail = &st->updates;
 
+       info = st->ss->container_content(st, subarray);
+
+       if (update == UOPT_PPL && !is_level456(info->array.level)) {
+               pr_err("RWH policy ppl is supported only for raid4, raid5 and raid6.\n");
+               goto free_super;
+       }
+
        rv = st->ss->update_subarray(st, subarray, update, ident);
 
        if (rv) {
                if (verbose >= 0)
                        pr_err("Failed to update %s of subarray-%s in %s\n",
-                               update, subarray, dev);
+                               update_verb, subarray, dev);
        } else if (st->update_tail)
                flush_metadata_updates(st);
        else
                st->ss->sync_metadata(st);
 
-       if (rv == 0 && strcmp(update, "name") == 0 && verbose >= 0)
+       if (rv == 0 && update == UOPT_NAME && verbose >= 0)
                pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
                       subarray, dev);
 
- free_super:
+free_super:
+       if (info)
+               free(info);
        st->ss->free_super(st);
        close(fd);
 
@@ -1743,10 +1858,10 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
        sprintf(devname, "%d:%d", major(devid), minor(devid));
 
        devlist.disposition = 'r';
-       if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
+       if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, UOPT_UNDEFINED, 0) == 0) {
                devlist.disposition = 'a';
                if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0,
-                                  NULL, 0) == 0) {
+                                  UOPT_UNDEFINED, 0) == 0) {
                        /* make sure manager is aware of changes */
                        ping_manager(to_devname);
                        ping_manager(from_devname);
@@ -1756,7 +1871,7 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
                }
                else
                        Manage_subdevs(from_devname, fd2, &devlist,
-                                      -1, 0, NULL, 0);
+                                      -1, 0, UOPT_UNDEFINED, 0);
        }
        close(fd1);
        close(fd2);
index b527165b803acc2eb3a9d84132f44a39f64aeba1..9b016bc3924540920962a51c2bd2edfe1c7ca013 100644 (file)
--- a/Monitor.c
+++ b/Monitor.c
  */
 
 #include       "mdadm.h"
+#include       "udev.h"
 #include       "md_p.h"
 #include       "md_u.h"
 #include       <sys/wait.h>
-#include       <signal.h>
 #include       <limits.h>
 #include       <syslog.h>
 
+#define TASK_COMM_LEN 16
+#define EVENT_NAME_MAX 32
+#define AUTOREBUILD_PID_PATH MDMON_DIR "/autorebuild.pid"
+#define FALLBACK_DELAY 5
+
+/**
+ * struct state - external array or container properties.
+ * @devname: has length of %DEV_MD_DIR + device name + terminating byte
+ * @devnm: to sync with mdstat info
+ * @parent_devnm: or subarray, devnm of parent, for others, ""
+ * @subarray: for a container it is a link to first subarray, for a subarray it is a link to next
+ *           subarray in the same container
+ * @parent: for a subarray it is a link to its container
+ */
 struct state {
-       char *devname;
-       char devnm[32]; /* to sync with mdstat info */
+       char devname[MD_NAME_MAX + sizeof(DEV_MD_DIR)];
+       char devnm[MD_NAME_MAX];
        unsigned int utime;
        int err;
        char *spare_group;
@@ -43,34 +57,80 @@ struct state {
        int devstate[MAX_DISKS];
        dev_t devid[MAX_DISKS];
        int percent;
-       char parent_devnm[32]; /* For subarray, devnm of parent.
-                               * For others, ""
-                               */
+       char parent_devnm[MD_NAME_MAX];
        struct supertype *metadata;
-       struct state *subarray;/* for a container it is a link to first subarray
-                               * for a subarray it is a link to next subarray
-                               * in the same container */
-       struct state *parent;  /* for a subarray it is a link to its container
-                               */
+       struct state *subarray;
+       struct state *parent;
        struct state *next;
 };
 
 struct alert_info {
+       char hostname[HOST_NAME_MAX];
        char *mailaddr;
        char *mailfrom;
        char *alert_cmd;
        int dosyslog;
+       int test;
+} info;
+
+enum event {
+       EVENT_SPARE_ACTIVE = 0,
+       EVENT_NEW_ARRAY,
+       EVENT_MOVE_SPARE,
+       EVENT_TEST_MESSAGE,
+       __SYSLOG_PRIORITY_WARNING,
+       EVENT_REBUILD_STARTED,
+       EVENT_REBUILD,
+       EVENT_REBUILD_FINISHED,
+       EVENT_SPARES_MISSING,
+       __SYSLOG_PRIORITY_CRITICAL,
+       EVENT_DEVICE_DISAPPEARED,
+       EVENT_FAIL,
+       EVENT_FAIL_SPARE,
+       EVENT_DEGRADED_ARRAY,
+       EVENT_UNKNOWN
 };
-static int make_daemon(char *pidfile);
+
+mapping_t events_map[] = {
+       {"SpareActive", EVENT_SPARE_ACTIVE},
+       {"NewArray", EVENT_NEW_ARRAY},
+       {"MoveSpare", EVENT_MOVE_SPARE},
+       {"TestMessage", EVENT_TEST_MESSAGE},
+       {"RebuildStarted", EVENT_REBUILD_STARTED},
+       {"Rebuild", EVENT_REBUILD},
+       {"RebuildFinished", EVENT_REBUILD_FINISHED},
+       {"SparesMissing", EVENT_SPARES_MISSING},
+       {"DeviceDisappeared", EVENT_DEVICE_DISAPPEARED},
+       {"Fail", EVENT_FAIL},
+       {"FailSpare", EVENT_FAIL_SPARE},
+       {"DegradedArray", EVENT_DEGRADED_ARRAY},
+       {NULL, EVENT_UNKNOWN}
+};
+
+struct event_data {
+       enum event event_enum;
+       /*
+        * @event_name: Rebuild event name must be in form "RebuildXX", where XX is rebuild progress.
+        */
+       char event_name[EVENT_NAME_MAX];
+       char message[BUFSIZ];
+       const char *description;
+       const char *dev;
+       const char *disc;
+};
+
+static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist);
+static void try_spare_migration(struct state *statelist);
+static void link_containers_with_subarrays(struct state *list);
+static void free_statelist(struct state *statelist);
+static int check_array(struct state *st, struct mdstat_ent *mdstat, int increments, char *prefer);
 static int check_one_sharer(int scan);
-static void alert(char *event, char *dev, char *disc, struct alert_info *info);
-static int check_array(struct state *st, struct mdstat_ent *mdstat,
-                      int test, struct alert_info *info,
-                      int increments, char *prefer);
-static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
-                         int test, struct alert_info *info);
-static void try_spare_migration(struct state *statelist, struct alert_info *info);
 static void link_containers_with_subarrays(struct state *list);
+static int make_daemon(char *pidfile);
+static void try_spare_migration(struct state *statelist);
+static void wait_for_events(int *delay_for_event, int c_delay);
+static void wait_for_events_mdstat(int *delay_for_event, int c_delay);
+static int write_autorebuild_pid(void);
 
 int Monitor(struct mddev_dev *devlist,
            char *mailaddr, char *alert_cmd,
@@ -116,41 +176,59 @@ int Monitor(struct mddev_dev *devlist,
         *  and if we can get_disk_info and find a name
         *  Then we hot-remove and hot-add to the other array
         *
-        * If devlist is NULL, then we can monitor everything because --scan
+        * If devlist is NULL, then we can monitor everything if --scan
         * was given.  We get an initial list from config file and add anything
         * that appears in /proc/mdstat
         */
 
        struct state *statelist = NULL;
-       struct state *st2;
        int finished = 0;
        struct mdstat_ent *mdstat = NULL;
        char *mailfrom;
-       struct alert_info info;
        struct mddev_ident *mdlist;
+       int delay_for_event = c->delay;
 
-       if (!mailaddr) {
-               mailaddr = conf_get_mailaddr();
-               if (mailaddr && ! c->scan)
-                       pr_err("Monitor using email address \"%s\" from config file\n",
-                              mailaddr);
+       if (devlist && c->scan) {
+               pr_err("Devices list and --scan option cannot be combined - not monitoring.\n");
+               return 1;
        }
-       mailfrom = conf_get_mailfrom();
 
-       if (!alert_cmd) {
+       if (!mailaddr)
+               mailaddr = conf_get_mailaddr();
+
+       if (!alert_cmd)
                alert_cmd = conf_get_program();
-               if (alert_cmd && !c->scan)
-                       pr_err("Monitor using program \"%s\" from config file\n",
-                              alert_cmd);
-       }
+
+       mailfrom = conf_get_mailfrom();
+
        if (c->scan && !mailaddr && !alert_cmd && !dosyslog) {
                pr_err("No mail address or alert command - not monitoring.\n");
                return 1;
        }
+
+       if (c->verbose) {
+               pr_err("Monitor is started with delay %ds\n", c->delay);
+               if (mailaddr)
+                       pr_err("Monitor using email address %s\n", mailaddr);
+               if (alert_cmd)
+                       pr_err("Monitor using program %s\n", alert_cmd);
+       }
+
        info.alert_cmd = alert_cmd;
        info.mailaddr = mailaddr;
        info.mailfrom = mailfrom;
        info.dosyslog = dosyslog;
+       info.test = c->test;
+
+       if (s_gethostname(info.hostname, sizeof(info.hostname)) != 0) {
+               pr_err("Cannot get hostname.\n");
+               return 1;
+       }
+
+       if (share){
+               if (check_one_sharer(c->scan) == 2)
+                       return 1;
+       }
 
        if (daemonise) {
                int rv = make_daemon(pidfile);
@@ -159,7 +237,7 @@ int Monitor(struct mddev_dev *devlist,
        }
 
        if (share)
-               if (check_one_sharer(c->scan))
+               if (write_autorebuild_pid() != 0)
                        return 1;
 
        if (devlist == NULL) {
@@ -169,16 +247,14 @@ int Monitor(struct mddev_dev *devlist,
 
                        if (mdlist->devname == NULL)
                                continue;
-                       if (strcasecmp(mdlist->devname, "<ignore>") == 0)
+                       if (is_devname_ignore(mdlist->devname) == true)
                                continue;
+                       if (!is_mddev(mdlist->devname))
+                               continue;
+
                        st = xcalloc(1, sizeof *st);
-                       if (mdlist->devname[0] == '/')
-                               st->devname = xstrdup(mdlist->devname);
-                       else {
-                               st->devname = xmalloc(8+strlen(mdlist->devname)+1);
-                               strcpy(strcpy(st->devname, "/dev/md/"),
-                                      mdlist->devname);
-                       }
+                       snprintf(st->devname, MD_NAME_MAX + sizeof(DEV_MD_DIR), DEV_MD_DIR "%s",
+                                basename(mdlist->devname));
                        st->next = statelist;
                        st->devnm[0] = 0;
                        st->percent = RESYNC_UNKNOWN;
@@ -192,9 +268,14 @@ int Monitor(struct mddev_dev *devlist,
                struct mddev_dev *dv;
 
                for (dv = devlist; dv; dv = dv->next) {
-                       struct state *st = xcalloc(1, sizeof *st);
+                       struct state *st;
+
+                       if (!is_mddev(dv->devname))
+                               continue;
+
+                       st = xcalloc(1, sizeof *st);
                        mdlist = conf_get_ident(dv->devname);
-                       st->devname = xstrdup(dv->devname);
+                       snprintf(st->devname, MD_NAME_MAX + sizeof(DEV_MD_DIR), "%s", dv->devname);
                        st->next = statelist;
                        st->devnm[0] = 0;
                        st->percent = RESYNC_UNKNOWN;
@@ -212,57 +293,108 @@ int Monitor(struct mddev_dev *devlist,
                int new_found = 0;
                struct state *st, **stp;
                int anydegraded = 0;
+               int anyredundant = 0;
 
                if (mdstat)
                        free_mdstat(mdstat);
                mdstat = mdstat_read(oneshot ? 0 : 1, 0);
-               if (!mdstat)
-                       mdstat_close();
 
-               for (st = statelist; st; st = st->next)
-                       if (check_array(st, mdstat, c->test, &info,
-                                       increments, c->prefer))
+               for (st = statelist; st; st = st->next) {
+                       if (check_array(st, mdstat, increments, c->prefer))
                                anydegraded = 1;
+                       /* for external arrays, metadata is filled for
+                        * containers only
+                        */
+                       if (st->metadata && st->metadata->ss->external)
+                               continue;
+                       if (st->err == 0 && !anyredundant)
+                               anyredundant = 1;
+               }
 
                /* now check if there are any new devices found in mdstat */
                if (c->scan)
-                       new_found = add_new_arrays(mdstat, &statelist, c->test,
-                                                  &info);
+                       new_found = add_new_arrays(mdstat, &statelist);
 
                /* If an array has active < raid && spare == 0 && spare_group != NULL
                 * Look for another array with spare > 0 and active == raid and same spare_group
-                *  if found, choose a device and hotremove/hotadd
+                * if found, choose a device and hotremove/hotadd
                 */
                if (share && anydegraded)
-                       try_spare_migration(statelist, &info);
+                       try_spare_migration(statelist);
                if (!new_found) {
                        if (oneshot)
                                break;
-                       else
-                               mdstat_wait(c->delay);
+                       if (!anyredundant) {
+                               pr_err("No array with redundancy detected, stopping\n");
+                               break;
+                       }
+
+                       wait_for_events(&delay_for_event, c->delay);
                }
-               c->test = 0;
+               info.test = 0;
 
                for (stp = &statelist; (st = *stp) != NULL; ) {
                        if (st->from_auto && st->err > 5) {
                                *stp = st->next;
-                               free(st->devname);
-                               free(st->spare_group);
+                               if (st->spare_group)
+                                       free(st->spare_group);
+
                                free(st);
                        } else
                                stp = &st->next;
                }
        }
-       for (st2 = statelist; st2; st2 = statelist) {
-               statelist = st2->next;
-               free(st2);
-       }
+
+       free_statelist(statelist);
 
        if (pidfile)
                unlink(pidfile);
        return 0;
 }
 
+/*
+ * wait_for_events() - Waits for events on md devices.
+ * @delay_for_event: pointer to current event delay
+ * @c_delay: delay from config
+ */
+static void wait_for_events(int *delay_for_event, int c_delay)
+{
+#ifndef NO_LIBUDEV
+       if (udev_is_available()) {
+               if (udev_wait_for_events(*delay_for_event) == UDEV_STATUS_ERROR)
+                       pr_err("Error while waiting for udev events.\n");
+               return;
+       }
+#endif
+       wait_for_events_mdstat(delay_for_event, c_delay);
+}
+
+/*
+ * wait_for_events_mdstat() - Waits for events on mdstat.
+ * @delay_for_event: pointer to current event delay
+ * @c_delay: delay from config
+ */
+static void wait_for_events_mdstat(int *delay_for_event, int c_delay)
+{
+       int wait_result = mdstat_wait(*delay_for_event);
+
+       if (wait_result < 0) {
+               pr_err("Error while waiting for events on mdstat.\n");
+               return;
+       }
+
+       /*
+        * Give chance to process new device
+        */
+       if (wait_result != 0) {
+               if (c_delay > FALLBACK_DELAY)
+                       *delay_for_event = FALLBACK_DELAY;
+       } else {
+               *delay_for_event = c_delay;
+       }
+       mdstat_close();
+}
+
 static int make_daemon(char *pidfile)
 {
        /* Return:
@@ -276,8 +408,11 @@ static int make_daemon(char *pidfile)
                if (!pidfile)
                        printf("%d\n", pid);
                else {
-                       FILE *pid_file;
-                       pid_file=fopen(pidfile, "w");
+                       FILE *pid_file = NULL;
+                       int fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC,
+                                     0644);
+                       if (fd >= 0)
+                               pid_file = fdopen(fd, "w");
                        if (!pid_file)
                                perror("cannot create pid file");
                        else {
@@ -291,170 +426,317 @@ static int make_daemon(char *pidfile)
                perror("daemonise");
                return 1;
        }
-       close(0);
-       open("/dev/null", O_RDWR);
-       dup2(0, 1);
-       dup2(0, 2);
+       manage_fork_fds(0);
        setsid();
        return -1;
 }
 
+/*
+ * check_one_sharer() - Checks for other mdmon processes running.
+ *
+ * Return:
+ * 0 - no other processes running,
+ * 1 - warning,
+ * 2 - error, or when scan mode is enabled, and one mdmon process already exists
+ */
 static int check_one_sharer(int scan)
 {
-       int pid, rv;
-       FILE *fp;
-       char dir[20];
-       char path[100];
-       struct stat buf;
-       sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
-       fp = fopen(path, "r");
-       if (fp) {
-               if (fscanf(fp, "%d", &pid) != 1)
-                       pid = -1;
-               sprintf(dir, "/proc/%d", pid);
-               rv = stat(dir, &buf);
-               if (rv != -1) {
-                       if (scan) {
-                               pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
-                               fclose(fp);
-                               return 1;
-                       } else {
-                               pr_err("Warning: One autorebuild process already running.\n");
-                       }
-               }
+       int pid;
+       FILE *fp, *comm_fp;
+       char comm_path[PATH_MAX];
+       char comm[TASK_COMM_LEN];
+
+       if (!is_directory(MDMON_DIR)) {
+               pr_err("%s is not a regular directory.\n", MDMON_DIR);
+               return 2;
+       }
+
+       fp = fopen(AUTOREBUILD_PID_PATH, "r");
+       if (!fp) {
+               /* PID file does not exist */
+               if (errno == ENOENT)
+                       return 0;
+
+               pr_err("Cannot open %s file.\n", AUTOREBUILD_PID_PATH);
+               return 2;
+       }
+
+       if (!is_file(AUTOREBUILD_PID_PATH)) {
+               pr_err("%s is not a regular file.\n", AUTOREBUILD_PID_PATH);
                fclose(fp);
+               return 2;
        }
-       if (scan) {
-               if (mkdir(MDMON_DIR, S_IRWXU) < 0 && errno != EEXIST) {
-                       pr_err("Can't create autorebuild.pid file\n");
-               } else {
-                       fp = fopen(path, "w");
-                       if (!fp)
-                               pr_err("Cannot create autorebuild.pidfile\n");
-                       else {
-                               pid = getpid();
-                               fprintf(fp, "%d\n", pid);
-                               fclose(fp);
-                       }
+
+       if (fscanf(fp, "%d", &pid) != 1) {
+               pr_err("Cannot read pid from %s file.\n", AUTOREBUILD_PID_PATH);
+               fclose(fp);
+               return 2;
+       }
+
+       snprintf(comm_path, sizeof(comm_path), "/proc/%d/comm", pid);
+
+       comm_fp = fopen(comm_path, "r");
+       if (!comm_fp) {
+               dprintf("Warning: Cannot open %s, continuing\n", comm_path);
+               fclose(fp);
+               return 1;
+       }
+
+       if (fscanf(comm_fp, "%15s", comm) == 0) {
+               dprintf("Warning: Cannot read comm from %s, continuing\n", comm_path);
+               fclose(comm_fp);
+               fclose(fp);
+               return 1;
+       }
+
+       if (strncmp(basename(comm), Name, strlen(Name)) == 0) {
+               if (scan) {
+                       pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
+                       fclose(comm_fp);
+                       fclose(fp);
+                       return 2;
                }
+               pr_err("Warning: One autorebuild process already running.\n");
        }
+       fclose(comm_fp);
+       fclose(fp);
        return 0;
 }
 
-static void alert(char *event, char *dev, char *disc, struct alert_info *info)
+/*
+ * write_autorebuild_pid() - Writes pid to autorebuild.pid file.
+ *
+ * Return: 0 on success, 1 on error
+ */
+static int write_autorebuild_pid(void)
 {
-       int priority;
+       FILE *fp;
+       int fd;
 
-       if (!info->alert_cmd && !info->mailaddr && !info->dosyslog) {
-               time_t now = time(0);
+       if (mkdir(MDMON_DIR, 0700) < 0 && errno != EEXIST) {
+               pr_err("%s: %s\n", strerror(errno), MDMON_DIR);
+               return 1;
+       }
 
-               printf("%1.15s: %s on %s %s\n", ctime(&now) + 4,
-                      event, dev, disc?disc:"unknown device");
+       if (!is_directory(MDMON_DIR)) {
+               pr_err("%s is not a regular directory.\n", MDMON_DIR);
+               return 1;
        }
-       if (info->alert_cmd) {
-               int pid = fork();
-               switch(pid) {
-               default:
-                       waitpid(pid, NULL, 0);
-                       break;
-               case -1:
-                       break;
-               case 0:
-                       execl(info->alert_cmd, info->alert_cmd,
-                             event, dev, disc, NULL);
-                       exit(2);
-               }
+
+       fd = open(AUTOREBUILD_PID_PATH, O_WRONLY | O_CREAT | O_TRUNC, 0700);
+
+       if (fd < 0) {
+               pr_err("Error opening %s file.\n", AUTOREBUILD_PID_PATH);
+               return 1;
        }
-       if (info->mailaddr && (strncmp(event, "Fail", 4) == 0 ||
-                              strncmp(event, "Test", 4) == 0 ||
-                              strncmp(event, "Spares", 6) == 0 ||
-                              strncmp(event, "Degrade", 7) == 0)) {
-               FILE *mp = popen(Sendmail, "w");
-               if (mp) {
-                       FILE *mdstat;
-                       char hname[256];
-                       gethostname(hname, sizeof(hname));
-                       signal(SIGPIPE, SIG_IGN);
-                       if (info->mailfrom)
-                               fprintf(mp, "From: %s\n", info->mailfrom);
-                       else
-                               fprintf(mp, "From: %s monitoring <root>\n",
-                                       Name);
-                       fprintf(mp, "To: %s\n", info->mailaddr);
-                       fprintf(mp, "Subject: %s event on %s:%s\n\n",
-                               event, dev, hname);
-
-                       fprintf(mp,
-                               "This is an automatically generated mail message from %s\n", Name);
-                       fprintf(mp, "running on %s\n\n", hname);
-
-                       fprintf(mp,
-                               "A %s event had been detected on md device %s.\n\n", event, dev);
-
-                       if (disc && disc[0] != ' ')
-                               fprintf(mp,
-                                       "It could be related to component device %s.\n\n", disc);
-                       if (disc && disc[0] == ' ')
-                               fprintf(mp, "Extra information:%s.\n\n", disc);
-
-                       fprintf(mp, "Faithfully yours, etc.\n");
-
-                       mdstat = fopen("/proc/mdstat", "r");
-                       if (mdstat) {
-                               char buf[8192];
-                               int n;
-                               fprintf(mp,
-                                       "\nP.S. The /proc/mdstat file currently contains the following:\n\n");
-                               while ((n = fread(buf, 1, sizeof(buf),
-                                                 mdstat)) > 0)
-                                       n = fwrite(buf, 1, n, mp);
-                               fclose(mdstat);
-                       }
-                       pclose(mp);
-               }
+
+       fp = fdopen(fd, "w");
+
+       if (!fp) {
+               pr_err("Error opening fd for %s file.\n", AUTOREBUILD_PID_PATH);
+               return 1;
        }
 
-       /* log the event to syslog maybe */
-       if (info->dosyslog) {
-               /* Log at a different severity depending on the event.
-                *
-                * These are the critical events:  */
-               if (strncmp(event, "Fail", 4) == 0 ||
-                   strncmp(event, "Degrade", 7) == 0 ||
-                   strncmp(event, "DeviceDisappeared", 17) == 0)
-                       priority = LOG_CRIT;
-               /* Good to know about, but are not failures: */
-               else if (strncmp(event, "Rebuild", 7) == 0 ||
-                        strncmp(event, "MoveSpare", 9) == 0 ||
-                        strncmp(event, "Spares", 6) != 0)
-                       priority = LOG_WARNING;
-               /* Everything else: */
-               else
-                       priority = LOG_INFO;
-
-               if (disc && disc[0] != ' ')
-                       syslog(priority,
-                              "%s event detected on md device %s, component device %s", event, dev, disc);
-               else if (disc)
-                       syslog(priority,
-                              "%s event detected on md device %s: %s",
-                              event, dev, disc);
-               else
-                       syslog(priority,
-                              "%s event detected on md device %s",
-                              event, dev);
+       fprintf(fp, "%d\n", getpid());
+
+       fclose(fp);
+       return 0;
+}
+
+#define BASE_MESSAGE "%s event detected on md device %s"
+#define COMPONENT_DEVICE_MESSAGE ", component device %s"
+#define DESCRIPTION_MESSAGE ": %s"
+/*
+ * sprint_event_message() - Writes basic message about detected event to destination ptr.
+ * @dest: message destination, should be at least the size of BUFSIZ
+ * @data: event data
+ *
+ * Return: 0 on success, 1 on error
+ */
+static int sprint_event_message(char *dest, const struct event_data *data)
+{
+       if (!dest || !data)
+               return 1;
+
+       if (data->disc && data->description)
+               snprintf(dest, BUFSIZ, BASE_MESSAGE COMPONENT_DEVICE_MESSAGE DESCRIPTION_MESSAGE,
+                        data->event_name, data->dev, data->disc, data->description);
+       else if (data->disc)
+               snprintf(dest, BUFSIZ, BASE_MESSAGE COMPONENT_DEVICE_MESSAGE,
+                        data->event_name, data->dev, data->disc);
+       else if (data->description)
+               snprintf(dest, BUFSIZ, BASE_MESSAGE DESCRIPTION_MESSAGE,
+                        data->event_name, data->dev, data->description);
+       else
+               snprintf(dest, BUFSIZ, BASE_MESSAGE, data->event_name, data->dev);
+
+       return 0;
+}
+
+/*
+ * get_syslog_event_priority() - Determines event priority.
+ * @event_enum: event to be checked
+ *
+ * Return: LOG_CRIT, LOG_WARNING or LOG_INFO
+ */
+static int get_syslog_event_priority(const enum event event_enum)
+{
+       if (event_enum > __SYSLOG_PRIORITY_CRITICAL)
+               return LOG_CRIT;
+       if (event_enum > __SYSLOG_PRIORITY_WARNING)
+               return LOG_WARNING;
+       return LOG_INFO;
+}
+
+/*
+ * is_email_event() - Determines whether email for event should be sent or not.
+ * @event_enum: event to be checked
+ *
+ * Return: true if email should be sent, false otherwise
+ */
+static bool is_email_event(const enum event event_enum)
+{
+       static const enum event email_events[] = {
+       EVENT_FAIL,
+       EVENT_FAIL_SPARE,
+       EVENT_DEGRADED_ARRAY,
+       EVENT_SPARES_MISSING,
+       EVENT_TEST_MESSAGE
+       };
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(email_events); ++i) {
+               if (event_enum == email_events[i])
+                       return true;
+       }
+       return false;
+}
+
+/*
+ * execute_alert_cmd() - Forks and executes command provided as alert_cmd.
+ * @data: event data
+ */
+static void execute_alert_cmd(const struct event_data *data)
+{
+       int pid = fork();
+
+       switch (pid) {
+       default:
+               waitpid(pid, NULL, 0);
+               break;
+       case -1:
+               pr_err("Cannot fork to execute alert command");
+               break;
+       case 0:
+               execl(info.alert_cmd, info.alert_cmd, data->event_name, data->dev, data->disc, NULL);
+               exit(2);
+       }
+}
+
+/*
+ * send_event_email() - Sends an email about event detected by monitor.
+ * @data: event data
+ */
+static void send_event_email(const struct event_data *data)
+{
+       FILE *mp, *mdstat;
+       char buf[BUFSIZ];
+       int n;
+
+       mp = popen(Sendmail, "w");
+       if (!mp) {
+               pr_err("Cannot open pipe stream for sendmail.\n");
+               return;
+       }
+
+       signal(SIGPIPE, SIG_IGN);
+       if (info.mailfrom)
+               fprintf(mp, "From: %s\n", info.mailfrom);
+       else
+               fprintf(mp, "From: %s monitoring <root>\n", Name);
+       fprintf(mp, "To: %s\n", info.mailaddr);
+       fprintf(mp, "Subject: %s event on %s:%s\n\n", data->event_name, data->dev, info.hostname);
+       fprintf(mp, "This is an automatically generated mail message.\n");
+       fprintf(mp, "%s\n", data->message);
+
+       mdstat = fopen("/proc/mdstat", "r");
+       if (!mdstat) {
+               pr_err("Cannot open /proc/mdstat\n");
+               pclose(mp);
+               return;
+       }
+
+       fprintf(mp, "The /proc/mdstat file currently contains the following:\n\n");
+       while ((n = fread(buf, 1, sizeof(buf), mdstat)) > 0)
+               n = fwrite(buf, 1, n, mp);
+       fclose(mdstat);
+       pclose(mp);
+}
+
+/*
+ * log_event_to_syslog() - Logs an event into syslog.
+ * @data: event data
+ */
+static void log_event_to_syslog(const struct event_data *data)
+{
+       int priority;
+
+       priority = get_syslog_event_priority(data->event_enum);
+
+       syslog(priority, "%s\n", data->message);
+}
+
+/*
+ * alert() - Alerts about the monitor event.
+ * @event_enum: event to be sent
+ * @description: event description
+ * @progress: rebuild progress
+ * @dev: md device name
+ * @disc: component device
+ *
+ * If needed function executes alert command, sends an email or logs event to syslog.
+ */
+static void alert(const enum event event_enum, const char *description, const uint8_t progress,
+                 const char *dev, const char *disc)
+{
+       struct event_data data = {.dev = dev, .disc = disc, .description = description};
+
+       if (!dev)
+               return;
+
+       if (event_enum == EVENT_REBUILD) {
+               snprintf(data.event_name, sizeof(data.event_name), "%s%02d",
+                        map_num_s(events_map, EVENT_REBUILD), progress);
+       } else {
+               snprintf(data.event_name, sizeof(data.event_name), "%s", map_num_s(events_map, event_enum));
+       }
+
+       data.event_enum = event_enum;
+
+       if (sprint_event_message(data.message, &data) != 0) {
+               pr_err("Cannot create event message.\n");
+               return;
        }
+       pr_err("%s\n", data.message);
+
+       if (info.alert_cmd)
+               execute_alert_cmd(&data);
+
+       if (info.mailaddr && is_email_event(event_enum))
+               send_event_email(&data);
+
+       if (info.dosyslog)
+               log_event_to_syslog(&data);
 }
 
 static int check_array(struct state *st, struct mdstat_ent *mdstat,
-                      int test, struct alert_info *ainfo,
                       int increments, char *prefer)
 {
        /* Update the state 'st' to reflect any changes shown in mdstat,
         * or found by directly examining the array, and return
         * '1' if the array is degraded, or '0' if it is optimal (or dead).
         */
-       struct { int state, major, minor; } info[MAX_DISKS];
+       struct { int state, major, minor; } disks_info[MAX_DISKS];
        struct mdinfo *sra = NULL;
        mdu_array_info_t array;
        struct mdstat_ent *mse = NULL, *mse2;
@@ -468,8 +750,8 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
        int is_container = 0;
        unsigned long redundancy_only_flags = 0;
 
-       if (test)
-               alert("TestMessage", dev, NULL, ainfo);
+       if (info.test)
+               alert(EVENT_TEST_MESSAGE, NULL, 0, dev, NULL);
 
        retval = 0;
 
@@ -478,7 +760,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
                goto disappeared;
 
        if (st->devnm[0] == 0)
-               strcpy(st->devnm, fd2devnm(fd));
+               snprintf(st->devnm, MD_NAME_MAX, "%s", fd2devnm(fd));
 
        for (mse2 = mdstat; mse2; mse2 = mse2->next)
                if (strcmp(mse2->devnm, st->devnm) == 0) {
@@ -518,7 +800,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
         */
        if (sra->array.level == 0 || sra->array.level == -1) {
                if (!st->err && !st->from_config)
-                       alert("DeviceDisappeared", dev, " Wrong-Level", ainfo);
+                       alert(EVENT_DEVICE_DISAPPEARED, "Wrong-Level", 0, dev, NULL);
                st->err++;
                goto out;
        }
@@ -534,7 +816,8 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
                st->err = 0;
                st->percent = RESYNC_NONE;
                new_array = 1;
-               alert("NewArray", st->devname, NULL, ainfo);
+               if (!is_container)
+                       alert(EVENT_NEW_ARRAY, NULL, 0, st->devname, NULL);
        }
 
        if (st->utime == array.utime && st->failed == sra->array.failed_disks &&
@@ -547,29 +830,20 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
        }
        if (st->utime == 0 && /* new array */
            mse->pattern && strchr(mse->pattern, '_') /* degraded */)
-               alert("DegradedArray", dev, NULL, ainfo);
+               alert(EVENT_DEGRADED_ARRAY, NULL, 0, dev, NULL);
 
        if (st->utime == 0 && /* new array */ st->expected_spares > 0 &&
            sra->array.spare_disks < st->expected_spares)
-               alert("SparesMissing", dev, NULL, ainfo);
+               alert(EVENT_SPARES_MISSING, NULL, 0, dev, NULL);
        if (st->percent < 0 && st->percent != RESYNC_UNKNOWN &&
            mse->percent >= 0)
-               alert("RebuildStarted", dev, NULL, ainfo);
+               alert(EVENT_REBUILD_STARTED, NULL, 0, dev, NULL);
        if (st->percent >= 0 && mse->percent >= 0 &&
            (mse->percent / increments) > (st->percent / increments)) {
-               char percentalert[18];
-               /*
-                * "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
-                */
-
                if((mse->percent / increments) == 0)
-                       snprintf(percentalert, sizeof(percentalert),
-                                "RebuildStarted");
+                       alert(EVENT_REBUILD_STARTED, NULL, 0, dev, NULL);
                else
-                       snprintf(percentalert, sizeof(percentalert),
-                                "Rebuild%02d", mse->percent);
-
-               alert(percentalert, dev, NULL, ainfo);
+                       alert(EVENT_REBUILD, NULL, mse->percent, dev, NULL);
        }
 
        if (mse->percent == RESYNC_NONE && st->percent >= 0) {
@@ -582,9 +856,9 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
                        snprintf(cnt, sizeof(cnt),
                                 " mismatches found: %d (on raid level %d)",
                                 sra->mismatch_cnt, sra->array.level);
-                       alert("RebuildFinished", dev, cnt, ainfo);
+                       alert(EVENT_REBUILD_FINISHED, NULL, 0, dev, cnt);
                } else
-                       alert("RebuildFinished", dev, NULL, ainfo);
+                       alert(EVENT_REBUILD_FINISHED, NULL, 0, dev, NULL);
        }
        st->percent = mse->percent;
 
@@ -593,13 +867,13 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
                mdu_disk_info_t disc;
                disc.number = i;
                if (md_get_disk_info(fd, &disc) >= 0) {
-                       info[i].state = disc.state;
-                       info[i].major = disc.major;
-                       info[i].minor = disc.minor;
+                       disks_info[i].state = disc.state;
+                       disks_info[i].major = disc.major;
+                       disks_info[i].minor = disc.minor;
                        if (disc.major || disc.minor)
                                remaining_disks --;
                } else
-                       info[i].major = info[i].minor = 0;
+                       disks_info[i].major = disks_info[i].minor = 0;
        }
        last_disk = i;
 
@@ -607,7 +881,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
            strncmp(mse->metadata_version, "external:", 9) == 0 &&
            is_subarray(mse->metadata_version+9)) {
                char *sl;
-               strcpy(st->parent_devnm, mse->metadata_version + 10);
+               snprintf(st->parent_devnm, MD_NAME_MAX, "%s", mse->metadata_version + 10);
                sl = strchr(st->parent_devnm, '/');
                if (sl)
                        *sl = 0;
@@ -622,13 +896,13 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
                int change;
                char *dv = NULL;
                disc.number = i;
-               if (i < last_disk && (info[i].major || info[i].minor)) {
-                       newstate = info[i].state;
-                       dv = map_dev_preferred(info[i].major, info[i].minor, 1,
+               if (i < last_disk && (disks_info[i].major || disks_info[i].minor)) {
+                       newstate = disks_info[i].state;
+                       dv = map_dev_preferred(disks_info[i].major, disks_info[i].minor, 1,
                                               prefer);
                        disc.state = newstate;
-                       disc.major = info[i].major;
-                       disc.minor = info[i].minor;
+                       disc.major = disks_info[i].major;
+                       disc.minor = disks_info[i].minor;
                } else
                        newstate = (1 << MD_DISK_REMOVED);
 
@@ -638,14 +912,14 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
                change = newstate ^ st->devstate[i];
                if (st->utime && change && !st->err && !new_array) {
                        if ((st->devstate[i]&change) & (1 << MD_DISK_SYNC))
-                               alert("Fail", dev, dv, ainfo);
+                               alert(EVENT_FAIL, NULL, 0, dev, dv);
                        else if ((newstate & (1 << MD_DISK_FAULTY)) &&
                                 (disc.major || disc.minor) &&
                                 st->devid[i] == makedev(disc.major,
                                                         disc.minor))
-                               alert("FailSpare", dev, dv, ainfo);
+                               alert(EVENT_FAIL_SPARE, NULL, 0, dev, dv);
                        else if ((newstate&change) & (1 << MD_DISK_SYNC))
-                               alert("SpareActive", dev, dv, ainfo);
+                               alert(EVENT_SPARE_ACTIVE, NULL, 0, dev, dv);
                }
                st->devstate[i] = newstate;
                st->devid[i] = makedev(disc.major, disc.minor);
@@ -668,14 +942,13 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
        return retval;
 
  disappeared:
-       if (!st->err)
-               alert("DeviceDisappeared", dev, NULL, ainfo);
+       if (!st->err && !is_container)
+               alert(EVENT_DEVICE_DISAPPEARED, NULL, 0, dev, NULL);
        st->err++;
        goto out;
 }
 
-static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
-                         int test, struct alert_info *info)
+static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist)
 {
        struct mdstat_ent *mse;
        int new_found = 0;
@@ -695,14 +968,13 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
                                continue;
                        }
 
-                       st->devname = xstrdup(name);
+                       snprintf(st->devname, MD_NAME_MAX + sizeof(DEV_MD_DIR), "%s", name);
                        if ((fd = open(st->devname, O_RDONLY)) < 0 ||
                            md_get_array_info(fd, &array) < 0) {
                                /* no such array */
                                if (fd >= 0)
                                        close(fd);
                                put_md_name(st->devname);
-                               free(st->devname);
                                if (st->metadata) {
                                        st->metadata->ss->free_super(st->metadata);
                                        free(st->metadata);
@@ -714,7 +986,7 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
                        st->next = *statelist;
                        st->err = 1;
                        st->from_auto = 1;
-                       strcpy(st->devnm, mse->devnm);
+                       snprintf(st->devnm, MD_NAME_MAX, "%s", mse->devnm);
                        st->percent = RESYNC_UNKNOWN;
                        st->expected_spares = -1;
                        if (mse->metadata_version &&
@@ -722,48 +994,20 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
                                    "external:", 9) == 0 &&
                            is_subarray(mse->metadata_version+9)) {
                                char *sl;
-                               strcpy(st->parent_devnm,
-                                       mse->metadata_version+10);
+                               snprintf(st->parent_devnm, MD_NAME_MAX,
+                                        "%s", mse->metadata_version + 10);
                                sl = strchr(st->parent_devnm, '/');
                                *sl = 0;
                        } else
                                st->parent_devnm[0] = 0;
                        *statelist = st;
-                       if (test)
-                               alert("TestMessage", st->devname, NULL, info);
+                       if (info.test)
+                               alert(EVENT_TEST_MESSAGE, NULL, 0, st->devname, NULL);
                        new_found = 1;
                }
        return new_found;
 }
 
-static int get_required_spare_criteria(struct state *st,
-                                      struct spare_criteria *sc)
-{
-       int fd;
-
-       if (!st->metadata || !st->metadata->ss->get_spare_criteria) {
-               sc->min_size = 0;
-               sc->sector_size = 0;
-               return 0;
-       }
-
-       fd = open(st->devname, O_RDONLY);
-       if (fd < 0)
-               return 1;
-       if (st->metadata->ss->external)
-               st->metadata->ss->load_container(st->metadata, fd, st->devname);
-       else
-               st->metadata->ss->load_super(st->metadata, fd, st->devname);
-       close(fd);
-       if (!st->metadata->sb)
-               return 1;
-
-       st->metadata->ss->get_spare_criteria(st->metadata, sc);
-       st->metadata->ss->free_super(st->metadata);
-
-       return 0;
-}
-
 static int check_donor(struct state *from, struct state *to)
 {
        struct state *sub;
@@ -798,22 +1042,12 @@ static dev_t choose_spare(struct state *from, struct state *to,
        for (d = from->raid; !dev && d < MAX_DISKS; d++) {
                if (from->devid[d] > 0 && from->devstate[d] == 0) {
                        struct dev_policy *pol;
-                       unsigned long long dev_size;
-                       unsigned int dev_sector_size;
 
                        if (to->metadata->ss->external &&
                            test_partition_from_id(from->devid[d]))
                                continue;
 
-                       if (sc->min_size &&
-                           dev_size_from_id(from->devid[d], &dev_size) &&
-                           dev_size < sc->min_size)
-                               continue;
-
-                       if (sc->sector_size &&
-                           dev_sector_size_from_id(from->devid[d],
-                                                   &dev_sector_size) &&
-                           sc->sector_size != dev_sector_size)
+                       if (devid_matches_criteria(to->metadata, from->devid[d], sc) == false)
                                continue;
 
                        pol = devid_policy(from->devid[d]);
@@ -894,16 +1128,16 @@ static dev_t container_choose_spare(struct state *from, struct state *to,
        return dev;
 }
 
-static void try_spare_migration(struct state *statelist, struct alert_info *info)
+static void try_spare_migration(struct state *statelist)
 {
        struct state *from;
        struct state *st;
-       struct spare_criteria sc;
 
        link_containers_with_subarrays(statelist);
        for (st = statelist; st; st = st->next)
                if (st->active < st->raid && st->spare == 0 && !st->err) {
                        struct domainlist *domlist = NULL;
+                       struct spare_criteria sc = {0};
                        int d;
                        struct state *to = st;
 
@@ -916,8 +1150,11 @@ static void try_spare_migration(struct state *statelist, struct alert_info *info
                                /* member of a container */
                                to = to->parent;
 
-                       if (get_required_spare_criteria(to, &sc))
-                               continue;
+                       if (to->metadata->ss->get_spare_criteria)
+                               if (to->metadata->ss->get_spare_criteria(to->metadata, to->devname,
+                                                                        &sc))
+                                       continue;
+
                        if (to->metadata->ss->external) {
                                /* We must make sure there is
                                 * no suitable spare in container already.
@@ -953,12 +1190,12 @@ static void try_spare_migration(struct state *statelist, struct alert_info *info
                                if (devid > 0 &&
                                    move_spare(from->devname, to->devname,
                                               devid)) {
-                                       alert("MoveSpare", to->devname,
-                                             from->devname, info);
+                                       alert(EVENT_MOVE_SPARE, NULL, 0, to->devname, from->devname);
                                        break;
                                }
                        }
                        domain_free(domlist);
+                       dev_policy_free(sc.pols);
                }
 }
 
@@ -988,6 +1225,24 @@ static void link_containers_with_subarrays(struct state *list)
                                }
 }
 
+/**
+ * free_statelist() - Frees statelist.
+ * @statelist: statelist to free
+ */
+static void free_statelist(struct state *statelist)
+{
+       struct state *tmp = NULL;
+
+       while (statelist) {
+               if (statelist->spare_group)
+                       free(statelist->spare_group);
+
+               tmp = statelist;
+               statelist = statelist->next;
+               free(tmp);
+       }
+}
+
 /* Not really Monitor but ... */
 int Wait(char *dev)
 {
@@ -1022,12 +1277,12 @@ int Wait(char *dev)
                         * sync_action does.
                         */
                        struct mdinfo mdi;
-                       char buf[21];
+                       char buf[SYSFS_MAX_BUF_SIZE];
 
                        if (sysfs_init(&mdi, -1, devnm))
                                return 2;
                        if (sysfs_get_str(&mdi, NULL, "sync_action",
-                                         buf, 20) > 0 &&
+                                         buf, sizeof(buf)) > 0 &&
                            strcmp(buf,"idle\n") != 0) {
                                e->percent = RESYNC_UNKNOWN;
                                if (strcmp(buf, "frozen\n") == 0) {
@@ -1106,7 +1361,7 @@ int WaitClean(char *dev, int verbose)
 
        if (rv) {
                int state_fd = sysfs_open(fd2devnm(fd), NULL, "array_state");
-               char buf[20];
+               char buf[SYSFS_MAX_BUF_SIZE];
                int delay = 5000;
 
                /* minimize the safe_mode_delay and prepare to wait up to 5s
diff --git a/Query.c b/Query.c
index 23fbf8aa15eb20eb86fdac3fd6e49846e1fd5b0c..adcd231e051b19fa5fb5b418d5af6f2980a53dc5 100644 (file)
--- a/Query.c
+++ b/Query.c
@@ -93,7 +93,7 @@ int Query(char *dev)
        else {
                printf("%s: %s %s %d devices, %d spare%s. Use mdadm --detail for more detail.\n",
                       dev, human_size_brief(larray_size,IEC),
-                      map_num(pers, level), raid_disks,
+                      map_num_s(pers, level), raid_disks,
                       spare_disks, spare_disks == 1 ? "" : "s");
        }
        st = guess_super(fd);
@@ -131,7 +131,7 @@ int Query(char *dev)
                       dev,
                       info.disk.number, info.array.raid_disks,
                       activity,
-                      map_num(pers, info.array.level),
+                      map_num_s(pers, info.array.level),
                       mddev);
                if (st->ss == &super0)
                        put_md_name(mddev);
diff --git a/README.initramfs b/README.initramfs
deleted file mode 100644 (file)
index c5fa668..0000000
+++ /dev/null
@@ -1,122 +0,0 @@
-Assembling md arrays at boot time.
----------------------------------
-December 2005
-
-These notes apply to 2.6 kernels only and, in some cases,
-to 2.6.15 or later.
-
-Md arrays can be assembled at boot time using the 'autodetect' functionality
-which is triggered by storing components of an array in partitions of type
-'fd' - Linux Raid Autodetect.
-They can also be assembled by specifying the component devices in a
-kernel parameter such as
-  md=0,/dev/sda,/dev/sdb
-In this case, /dev/md0 will be assembled (because of the 0) from the listed
-devices.
-
-These mechanisms, while useful, do not provide complete functionality
-and are unlikely to be extended.  The preferred way to assemble md
-arrays at boot time is using 'mdadm'.  To assemble an array which
-contains the root filesystem, mdadm needs to be run before that
-filesystem is mounted, and so needs to be run from an initial-ram-fs.
-It is how this can work that is the primary focus of this document.
-
-It should be noted up front that only the array containing the root
-filesystem should be assembled from the initramfs.  Any other arrays
-should be assembled under the control of files on the main filesystem
-as this enhanced flexibility and maintainability.
-
-A minimal initramfs for assembling md arrays can be created using 3
-files and one directory.  These are:
-
-/bin           Directory
-/bin/mdadm     statically linked mdadm binary
-/bin/busybox   statically linked busybox binary
-/bin/sh        hard link to /bin/busybox
-/init          a shell script which call mdadm appropriately.
-
-An example init script is:
-
-==============================================
-#!/bin/sh
-
-echo 'Auto-assembling boot md array'
-mkdir /proc
-mount -t proc proc /proc
-if [ -n "$rootuuid" ]
-then arg=--uuid=$rootuuid
-elif [ -n "$mdminor" ]
-then arg=--super-minor=$mdminor
-else arg=--super-minor=0
-fi
-echo "Using $arg"
-mdadm -Acpartitions $arg --auto=part /dev/mda
-cd /
-mount /dev/mda1 /root ||  mount /dev/mda /root
-umount /proc
-cd /root
-exec chroot . /sbin/init < /dev/console > /dev/console 2>&1
-=============================================
-
-This could certainly be extended, or merged into a larger init script.
-Though tested and in production use, it is not presented here as
-"The Right Way" to do it, but as a useful example.
-Some key points are:
-
-  /proc needs to be mounted so that /proc/partitions can be accessed
-  by mdadm, and so that /proc/filesystems can be accessed by mount.
-
-  The uuid of the array can be passed in as a kernel parameter
-  (rootuuid).  As the kernel doesn't use this value, it is made available
-  in the environment for /init
-
-  If no uuid is given, we default to md0, (--super-minor=0) which is a
-  commonly used to store the root filesystem.  This may not work in
-  all situations.
-
-  We assemble the array as a partitionable array (/dev/mda) even if we
-  end up using the whole array.  There is no cost in using the partitionable
-  interface, and in this context it is simpler.
-
-  We try mounting both /dev/mda1 and /dev/mda as they are the most like
-  part of the array to contain the root filesystem.
-
-  The --auto flag is given to mdadm so that it will create /dev/md*
-  files automatically.  This is needed as /dev will not contain
-  and md files, and udev will not create them (as udev only created device
-  files after the device exists, and mdadm need the device file to create
-  the device).  Note that the created md files may not exist in /dev
-  of the mounted root filesystem.  This needs to be deal with separately
-  from mdadm - possibly using udev.
-
-  We do not need to create device files for the components which will
-  be assembled into /dev/mda.  mdadm finds the major/minor numbers from
-  /proc/partitions and creates a temporary /dev file if one doesn't already
-  exist.
-
-The script "mkinitramfs" which is included with the mdadm distribution
-can be used to create a minimal initramfs.  It creates a file called
-'init.cpio.gz' which can be specified as an 'initrd' to lilo or grub
-(or whatever boot loader is being used).
-
-
-
-
-Resume from an md array
------------------------
-
-If you want to make use of the suspend-to-disk/resume functionality in Linux,
-and want to have swap on an md array, you will need to assemble the array
-before resume is possible.
-However, because the array is active in the resumed image, you do not want
-anything written to any drives during the resume process, such as superblock
-updates or array resync.
-
-This can be achieved in 2.6.15-rc1 and later kernels using the
-'start_readonly' module parameter.
-Simply include the command
-  echo 1 > /sys/module/md_mod/parameters/start_ro
-before assembling the array with 'mdadm'.
-You can then echo
-  9:0
-or whatever is appropriate to /sys/power/resume to trigger the resume.
diff --git a/README.md b/README.md
new file mode 100644 (file)
index 0000000..64f2ece
--- /dev/null
+++ b/README.md
@@ -0,0 +1,83 @@
+**mdadm** is a utility used to create and manage **software RAID** devices implemented through
+**Multiple devices driver (MD)** in kernel. It supports following RAID metadata formats:
+
+* [Linux native RAID](https://raid.wiki.kernel.org/index.php/RAID_superblock_formats):
+
+  Known as **native** or **native RAID**. First and default metadata format. Metadata management
+  is implemented in **MD driver**.
+
+* Matrix Storage Manager Support (no reference, metadata format documentation is proprietary).
+
+  Known as **IMSM**. Metadata format developed and maintained by **Intel®** as a part of **VROC**
+  solution. There are some functional differences between **native** and **imsm**. The most
+  important difference is that the metadata is managed from userspace.
+
+  **CAUTION:** **imsm** is compatible with **Intel RST**, however it is not officially supported.
+  You are using it on your own risk.
+
+* [Common RAID DDF Specification Revision](https://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf)
+
+    **IMPORTANT:** DDF is in **maintenance only** mode. There is no active development around it.
+    Please do not use it in new solutions.
+
+# How to Contribute
+
+ **mdadm** is hosted on [kernel.org](https://kernel.org/). You can access repository
+[here](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git).
+
+It is maintained similarly to kernel, using *mailing list*. Patches must be send through email.
+Please familiarize with general kernel
+[submitting patches](https://www.kernel.org/doc/html/v4.17/process/submitting-patches.html)
+documentation. Formatting, tags and commit message guidelines applies to **mdadm**.
+
+## Sending patches step-by-step
+
+To maximize change of patches being taken, follow this instruction when submitting:
+
+1. Create possibly logically separated commits and generate patches:
+
+   Use ``git format-patch --cover-letter --signoff -v <nr>`` to create patches:
+   * ``--cover-letter`` can be skipped if it is only one patch;
+   * ``--signoff`` adds sign-off tag;
+   * ``-v <nr>`` indicates review revision number, sender should increment it before resending.
+
+2. Check style of every patch with kernel
+   [checkpatch](https://docs.kernel.org/dev-tools/checkpatch.html) script:
+
+   It is important to keep same coding style that is why in **mdadm**
+   [kernel coding style](https://www.kernel.org/doc/html/v4.10/process/coding-style.html)
+   is preferred. ``checkpath --no-tree <patch_file>`` can be used to verify patches.
+   Following checkpatch issues can be ignored:
+   - New typedefs.
+   - comparing with *True/False*.
+   - kernel *MAINTAINERS* file warning.
+   - *extern* keyword in headers.
+
+3. Send patches using ``git send-mail --to=linux-raid@vger.kernel.org <cover-letter> <patch1> <patch2> (...)``
+
+# Maintainers
+
+It is good practice to add **mdadm maintainers** to recipients for patches:
+
+- Jes Sorensen <jes@trained-monkey.org>;
+- Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>;
+
+Adding **MD maintainers** could be reasonable, especially if patches may affect MD driver:
+
+- Song Liu <song@kernel.org>;
+- Yu Kuai <yukuai3@huawei.com>;
+
+# Reviewers
+
+**mdadm** utility is not part of kernel tree, so there is no certificated *Reviewers* list. Everyone
+can comment on mailing list, last decision (and merging) belongs to maintainers.
+
+# Minimal supported kernel version
+
+We do not support kernel versions below **v3.10**. Please be aware that maintainers may remove
+workarounds and fixes for legacy issues.
+
+# License
+
+It is released under the terms of the **GNU General Public License version 2** as published
+by the **Free Software Foundation**.
index eaf104238d9e3fd20fd3275fa3bf596e9dbefc39..d57ebb6c2c3102d2f7cfc1540353a1216c88beb1 100644 (file)
--- a/ReadMe.c
+++ b/ReadMe.c
 #include "mdadm.h"
 
 #ifndef VERSION
-#define VERSION "4.1"
+#define VERSION "4.3"
 #endif
 #ifndef VERS_DATE
-#define VERS_DATE "2018-10-01"
+#define VERS_DATE "2024-02-15"
 #endif
-char Version[] = "mdadm - v" VERSION " - " VERS_DATE "\n";
+#ifndef EXTRAVERSION
+#define EXTRAVERSION ""
+#endif
+char Version[] = "mdadm - v" VERSION " - " VERS_DATE EXTRAVERSION "\n";
 
 /*
  * File: ReadMe.c
@@ -79,6 +82,7 @@ char Version[] = "mdadm - v" VERSION " - " VERS_DATE "\n";
  */
 
 char short_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
+char short_monitor_options[]="-ABCDEFGIQhVXYWZ:vqbc:i:l:p:m:r:n:x:u:c:d:z:U:N:safRSow1tye:k:";
 char short_bitmap_options[]=
                "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:k:";
 char short_bitmap_auto_options[]=
@@ -134,6 +138,7 @@ struct option long_options[] = {
     {"size",     1, 0, 'z'},
     {"auto",     1, 0, Auto}, /* also for --assemble */
     {"assume-clean",0,0, AssumeClean },
+    {"write-zeroes",0,0, WriteZeroes },
     {"metadata",  1, 0, 'e'}, /* superblock format */
     {"bitmap",   1, 0, Bitmap},
     {"bitmap-chunk", 1, 0, BitmapChunk},
@@ -143,7 +148,6 @@ struct option long_options[] = {
     {"nofailfast",0, 0,  NoFailFast},
     {"re-add",    0, 0,  ReAdd},
     {"homehost",  1, 0,  HomeHost},
-    {"symlinks",  1, 0,  Symlinks},
     {"data-offset",1, 0, DataOffset},
     {"nodes",1, 0, Nodes}, /* also for --assemble */
     {"home-cluster",1, 0, ClusterName},
@@ -387,6 +391,7 @@ char Help_create[] =
 "  --write-journal=      : Specify journal device for RAID-4/5/6 array\n"
 "  --consistency-policy= : Specify the policy that determines how the array\n"
 "                     -k : maintains consistency in case of unexpected shutdown.\n"
+"  --write-zeroes        : Write zeroes to the disks before creating. This will bypass initial sync.\n"
 "\n"
 ;
 
@@ -474,7 +479,7 @@ char Help_assemble[] =
 ;
 
 char Help_manage[] =
-"Usage: mdadm arraydevice options component devices...\n"
+"Usage: mdadm [mode] arraydevice [options] <component devices...>\n"
 "\n"
 "This usage is for managing the component devices within an array.\n"
 "The --manage option is not needed and is assumed if the first argument\n"
@@ -610,7 +615,6 @@ char Help_incr[] =
 ;
 
 char Help_config[] =
-"The /etc/mdadm.conf config file:\n\n"
 " The config file contains, apart from blank lines and comment lines that\n"
 " start with a hash(#), array lines, device lines, and various\n"
 " configuration lines.\n"
@@ -633,10 +637,12 @@ char Help_config[] =
 " than a device must match all of them to be considered.\n"
 "\n"
 " Other configuration lines include:\n"
-"  mailaddr, mailfrom, program     used for --monitor mode\n"
-"  create, auto                    used when creating device names in /dev\n"
-"  homehost, policy, part-policy   used to guide policy in various\n"
-"                                  situations\n"
+"  mailaddr, mailfrom, program, monitordelay    used for --monitor mode\n"
+"  create, auto                                 used when creating device names in /dev\n"
+"  homehost, homecluster, policy, part-policy   used to guide policy in various\n"
+"                                               situations\n"
+"\n"
+"For more details see mdadm.conf(5).\n"
 "\n"
 ;
 
@@ -651,3 +657,34 @@ char *mode_help[mode_count] = {
        [GROW]          = Help_grow,
        [INCREMENTAL]   = Help_incr,
 };
+
+/**
+ * fprint_update_options() - Print valid update options depending on the mode.
+ * @outf: File (output stream)
+ * @update_mode: Used to distinguish update and update_subarray
+ */
+void fprint_update_options(FILE *outf, enum update_opt update_mode)
+{
+       int counter = UOPT_NAME, breakpoint = UOPT_HELP;
+       mapping_t *map = update_options;
+
+       if (!outf)
+               return;
+       if (update_mode == UOPT_SUBARRAY_ONLY) {
+               breakpoint = UOPT_SUBARRAY_ONLY;
+               fprintf(outf, "Valid --update options for update-subarray are:\n\t");
+       } else
+               fprintf(outf, "Valid --update options are:\n\t");
+       while (map->num) {
+               if (map->num >= breakpoint)
+                       break;
+               fprintf(outf, "'%s', ", map->name);
+               if (counter % 5 == 0)
+                       fprintf(outf, "\n\t");
+               counter++;
+               map++;
+       }
+       if ((counter - 1) % 5)
+               fprintf(outf, "\n");
+       fprintf(outf, "\r");
+}
diff --git a/TODO b/TODO
deleted file mode 100644 (file)
index 279d20d..0000000
--- a/TODO
+++ /dev/null
@@ -1,213 +0,0 @@
- - add 'name' field to metadata type and use it.
- - use validate_geometry more
- - metadata should be able to check/reject bitmap stuff.
-
-DDF:
-  Three new metadata types:
-    ddf - used only to create a container.
-    ddf-bvd - used to create an array in a container
-    ddf-svd - used to create a secondary array from bvds.
-
-  Usage:
-    mdadm -C /dev/ddf1 /dev/sd[abcdef]
-    mdadm -C /dev/md1 -e ddf /dev/sd[a-f]
-    mdadm -C /dev/md1 -l container /dev/sd[a-f]
-
-        Each of these create a new ddf container using all those
-       devices.  The name 'ddf*' signals that ddf metadata should be used.
-       '-e ddf' only supports one level - 'container'.  'container' is only
-       supported by ddf.
-
-    mdadm -C /dev/md1 -l0 -n4 /dev/ddf1 # or maybe not ???
-    mdadm -C /dev/md1 -l1 -n2 /dev/sda /dev/sdb
-       If exactly one device is given, and it is a container, we select
-       devices from that container.
-       If devices are given that are already in use, they must be in use by
-       a container, and the array is created in the container.
-       If devices given are bvds, we slip under the hood to make
-         the svd arrays.
-
-    mdadm -A /dev/ddf ......
-       base drives make a container.  Anything in that container is started
-        auto-read-only.
-        if /dev/ddf is already assembled, we assemble bvds and svds inside it.
-
-
-2005-dec-20
-  Want an incremental assembly mode to work nicely with udev.
-  Core usage would be something like
-       mdadm --incr-assemble /dev/newdevice
-  This would
-     - examine the device to determine  uuid etc.
-     - look for a match in /etc/mdadm.conf, abort if not found
-     - find that device and collect current contents
-     - perform an 'assemble' analysis to make sure we have the best set of devices.
-     - remove or add devices as appropriate
-     - possibly start the array if it was complete
-
-   Other usages could involve
-     - specify which array to auto-add to.
-       This requires an existing array for uuid matching... is there any point?
-
-     -
-
-
-2004-june-02
-  * Don't print 'errors' flag, it is meaningless. DONE
-  * Handle new superblock format
-  * create device file on demand, particularly partitionable devices. DONE
-      BUT figure a way to create the partition devices.
-              auto=partN
-  * Use Event: interface to listen for events. DONE, untested
-  * Make sure mdadm -As can assemble multi-level RAIDs ok.
-  * --build to build raid1 or multipath arrays 
-       clean or not ???
-  
-----------------------------------------------------------------------------
-* mdadm --monitor to monitor failed multipath paths and re-instate them.
-
-* Maybe make "--help" fit in 80x24 and have a --long-help with more info. DONE
-
-
-* maybe "missing" instead of <bold>missing</> in doco DONE
-* possibly wait for resync to start, or even finish while assembling.- NO
-
-* -Db should have a devices= entry if possible. - DONE
-* when assembling multipath arrays, ignore any error indicators. - DONE
-* rationalise --monitor usage:
-     mdadm --monitor
-  doesn't do as expected. DONE
-
-* --assemble could have a --update option. - DONE
-  following word can be:
-       sparc2.2
-       super-minor
-
-* mdadm /dev/md11, where md11 is raid0 can segfault, particularly when looking in the 
-   [UU_UUU] string ... which doesn't exist !
-It should be more sensible.  DONE
-
-Example:
-
-from  Raimund Sacherer <raimund.sacherer@ngit.at>
-
-mke2fs -m0 -q /dev/ram1 300
-mount -n -t ext2 /dev/ram1 /tmp
-echo DEVICE /dev/[sh]* >> /tmp/mdadm.conf
-mdadm -Esb /dev/[sh]* 2>/dev/null >> /tmp/mdadm.conf
-mdadm -ARsc /tmp/mdadm.conf
-umount /tmp
-
-
-?? Allow -S /dev/md? - current complains subsequent not a/d/r - DONE
-
-* new "Query" mode to subsume --detail and --examine.
-   --query or -Q, takes a device and tells if it is an MD device,
-   and also tells in a raid superblock is found. 
- DONE
-
-* write mdstat.c to parse /proc/mdstat file
-   Build list of arrays:  name, rebuild-percent
-  DONE
-
-* parse /proc/partitions and map major/minor into /dev/* names,
-  and use that for default DEVICE list ????
-
-* --detail --scan to read /proc/mdstat, and then iterate over these,
-    but assume --brief.  --verbose can override
-    check each subdevice to see if it is in conf_get_devs.
-    Warn if not.
-  DONE, but don't warn yet...
-
-* Support multipath ... maybe...
-  maybe DONE
-
-* --follow to syslog 
-
-* --follow to move spares around DONE
-
-* --follow to notice other events: DONE
-     rebuild started
-     spare activated
-     spare removed
-     spare added
-
-------------------------------------
-- --examine --scan scans all drives and build an mdadm.conf file DONE
-
-- check superblock checksum in examine DONE
-- report "chunk" or "rounding" depending on raid level DONE
-- report "linear" instead of "-1" for raid level DONE
-- decode ayout depending on raid level DONE
-- --verbose and --force flags. DONE
-
-- set md_minor, *_disks for Create  - DONE
-- for create raid5, how to choose between 
-   all working, but not insync
-   one missing, one spare, insync  DONE (--force)
-- and for raid1 - some failed drives...  (missing)
-
-- when RUN_ARRAY, make sure *_disks counts are right
-
-- get --detail to extract extra stuff from superblock,
-   like uuid  DONE
-- --detail --brief to give a config file line DONE
-- parse config file. DONE
-- test...
-
-- when --assemble --scan, if an underlying device is an md device, 
-  then try to assemble that device first.
-
-
-- mdadm -S /dev/md0 /dev/md1 gives internal error FIXED
-
-- mdadm --detail --scan print summary of what it can find? DONE
-
-
----------
-Assemble doesn't add spares. - DONE
-Create to allow "missing" name for devices.
-Create to accept "--force" for do exactly what is requested
-- get Assemble to upgrade devices if force flag.
-ARRAY lines in config file to have super_minor=n
-ARRAY lines in config file to have device=pattern, and only accept
-   those devices
-   If UUID given, insist on that
-   If not, but super_minor given, require all found with that minor
-    to have same uuid
-   If only device given, all valid supers on those devices must have 
-    same uuid
-allow /dev/mdX as first argument before any options
-Possible --dry-run option for create and assemble--force
-
-Assemble to check that all devices mentioned in superblock
-  are present.
-
-New mode: --Monitor (or --Follow)
-  Periodically check status of all arrays (listed in config file).
-  Log every event and apparent cause - or differences
-  Email and alert - or run a program - for important events
-  Move spares around if necessary.
-
-  An Array line can have a spare-group= field that indicates that
-   the array shares spares with other arrays with the same
-   spare-group name.
-   If an array has a failed and no spares, then check all other
-     arrays in the spare group.  If one has no failures and a spare,
-     then consider that spare.
-    Choose the smallest considered spare that is large enough.
-    If there is one, then hot-remove it from it's home, and
-    hot-add it to the array in question.
-
-  --mail-to address  
-  --alert-handler program
-  
-  Will also extract information from /proc/mdstat if present,
-  and consider 20% marks in rebuild as events.
-
-  Events are:
-     drive fails  - causes mail to be sent
-     rebuild started
-     spare activated
-     spare removed
-     spare added
index e38cb9650390235d481aeba3b3fa38b724e37910..9a7ffe3bb1bf209d2246ef539800272444ec63ce 100644 (file)
--- a/bitmap.c
+++ b/bitmap.c
@@ -180,13 +180,14 @@ out:
 }
 
 static int
-bitmap_file_open(char *filename, struct supertype **stp, int node_num)
+bitmap_file_open(char *filename, struct supertype **stp, int node_num, int fd)
 {
-       int fd;
        struct stat stb;
        struct supertype *st = *stp;
 
-       fd = open(filename, O_RDONLY|O_DIRECT);
+       /* won't re-open filename when (fd >= 0) */
+       if (fd < 0)
+               fd = open(filename, O_RDONLY|O_DIRECT);
        if (fd < 0) {
                pr_err("failed to open bitmap file %s: %s\n",
                       filename, strerror(errno));
@@ -249,7 +250,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
        int fd, i;
        __u32 uuid32[4];
 
-       fd = bitmap_file_open(filename, &st, 0);
+       fd = bitmap_file_open(filename, &st, 0, -1);
        if (fd < 0)
                return rv;
 
@@ -263,7 +264,6 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
                pr_err("Reporting bitmap that would be used if this array were used\n");
                pr_err("as a member of some other array\n");
        }
-       close(fd);
        printf("        Filename : %s\n", filename);
        printf("           Magic : %08x\n", sb->magic);
        if (sb->magic != BITMAP_MAGIC) {
@@ -332,15 +332,13 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
                for (i = 0; i < (int)sb->nodes; i++) {
                        st = NULL;
                        free(info);
-                       fd = bitmap_file_open(filename, &st, i);
+                       fd = bitmap_file_open(filename, &st, i, fd);
                        if (fd < 0) {
                                printf("   Unable to open bitmap file on node: %i\n", i);
-
                                continue;
                        }
                        info = bitmap_fd_read(fd, brief);
                        if (!info) {
-                               close(fd);
                                printf("   Unable to read bitmap on node: %i\n", i);
                                continue;
                        }
@@ -359,13 +357,72 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
                        printf("          Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n",
                               info->total_bits, info->dirty_bits,
                               100.0 * info->dirty_bits / (info->total_bits?:1));
-                        close(fd);
                }
        }
 
 free_info:
+       close(fd);
+       free(info);
+       return rv;
+}
+
+int IsBitmapDirty(char *filename)
+{
+       /*
+        * Read the bitmap file
+        * It will break reading bitmap action immediately when meeting any error.
+        *
+        * Return: 1(dirty), 0 (clean), -1(error)
+        */
+
+       int fd = -1, rv = 0, i;
+       struct supertype *st = NULL;
+       bitmap_info_t *info = NULL;
+       bitmap_super_t *sb = NULL;
+
+       fd = bitmap_file_open(filename, &st, 0, fd);
+       free(st);
+       if (fd < 0)
+               goto out;
+
+       info = bitmap_fd_read(fd, 0);
+       if (!info) {
+               close(fd);
+               goto out;
+       }
+
+       sb = &info->sb;
+       for (i = 0; i < (int)sb->nodes; i++) {
+               st = NULL;
+               free(info);
+               info = NULL;
+
+               fd = bitmap_file_open(filename, &st, i, fd);
+               free(st);
+               if (fd < 0)
+                       goto out;
+
+               info = bitmap_fd_read(fd, 0);
+               if (!info) {
+                       close(fd);
+                       goto out;
+               }
+
+               sb = &info->sb;
+               if (sb->magic != BITMAP_MAGIC) { /* invalid bitmap magic */
+                       free(info);
+                       close(fd);
+                       goto out;
+               }
+
+               if (info->dirty_bits)
+                       rv = 1;
+       }
+       close(fd);
        free(info);
        return rv;
+out:
+       return -1;
 }
 
 int CreateBitmap(char *filename, int force, char uuid[16],
index 7592b2d7aea94a623270fb1029aefd4d54539593..b46d71cb3825ad289338a464c01a9a51d6a5e616 100644 (file)
--- a/config.c
+++ b/config.c
@@ -81,7 +81,7 @@ char DefaultAltConfDir[] = CONFFILE2 ".d";
 
 enum linetype { Devices, Array, Mailaddr, Mailfrom, Program, CreateDev,
                Homehost, HomeCluster, AutoMode, Policy, PartPolicy, Sysfs,
-               LTEnd };
+               MonitorDelay, EncryptionNoVerify, LTEnd };
 char *keywords[] = {
        [Devices]  = "devices",
        [Array]    = "array",
@@ -95,6 +95,8 @@ char *keywords[] = {
        [Policy]   = "policy",
        [PartPolicy]="part-policy",
        [Sysfs]    = "sysfs",
+       [MonitorDelay] = "monitordelay",
+       [EncryptionNoVerify] = "ENCRYPTION_NO_VERIFY",
        [LTEnd]    = NULL
 };
 
@@ -118,6 +120,196 @@ int match_keyword(char *word)
        return -1;
 }
 
+/**
+ * is_devname_ignore() - check if &devname is a special "<ignore>" keyword.
+ */
+bool is_devname_ignore(const char *devname)
+{
+       static const char keyword[] = "<ignore>";
+
+       if (strcasecmp(devname, keyword) == 0)
+               return true;
+       return false;
+}
+
+/**
+ * ident_log() - generate and write message to the user.
+ * @param_name: name of the property.
+ * @value: value of the property.
+ * @reason: meaningful description.
+ * @cmdline: context dependent actions, see below.
+ *
+ * The function is made to provide similar error handling for both config and cmdline. The behavior
+ * is configurable via @cmdline. Message has following format:
+ * "Value "@value" cannot be set for @param_name. Reason: @reason."
+ *
+ * If cmdline is on:
+ * - message is written to stderr.
+ * otherwise:
+ * - message is written to stdout.
+ * - "Value ignored" is added at the end of the message.
+ */
+static void ident_log(const char *param_name, const char *value, const char *reason,
+                     const bool cmdline)
+{
+       if (cmdline == true)
+               pr_err("Value \"%s\" cannot be set as %s. Reason: %s.\n", value, param_name,
+                      reason);
+       else
+               pr_info("Value \"%s\" cannot be set as %s. Reason: %s. Value ignored.\n", value,
+                       param_name, reason);
+}
+
+/**
+ * ident_init() - Set defaults.
+ * @ident: ident pointer, not NULL.
+ */
+inline void ident_init(struct mddev_ident *ident)
+{
+       assert(ident);
+
+       ident->assembled = false;
+       ident->autof = 0;
+       ident->bitmap_fd = -1;
+       ident->bitmap_file = NULL;
+       ident->container = NULL;
+       ident->devices = NULL;
+       ident->devname = NULL;
+       ident->level = UnSet;
+       ident->member = NULL;
+       ident->name[0] = 0;
+       ident->next = NULL;
+       ident->raid_disks = UnSet;
+       ident->spare_group = NULL;
+       ident->spare_disks = 0;
+       ident->st = NULL;
+       ident->super_minor = UnSet;
+       ident->uuid[0] = 0;
+       ident->uuid_set = 0;
+}
+
+/**
+ * _ident_set_devname()- verify devname and set it in &mddev_ident.
+ * @ident: pointer to &mddev_ident.
+ * @devname: devname to be set.
+ * @cmdline: context dependent actions. If set, ignore keyword is not allowed.
+ *
+ * @devname can have following forms:
+ *     '<ignore>' keyword (if allowed)
+ *     /dev/md{number}
+ *     /dev/md_d{number} (legacy)
+ *     /dev/md_{name}
+ *     /dev/md/{name}
+ *     {name}
+ *
+ * {name} must follow name's criteria and be POSIX compatible.
+ * If criteria passed, duplicate memory and set devname in @ident.
+ *
+ * Return: %MDADM_STATUS_SUCCESS or %MDADM_STATUS_ERROR.
+ */
+mdadm_status_t _ident_set_devname(struct mddev_ident *ident, const char *devname,
+                                 const bool cmdline)
+{
+       assert(ident);
+       assert(devname);
+
+       static const char named_dev_pref[] = DEV_NUM_PREF "_";
+       static const int named_dev_pref_size = sizeof(named_dev_pref) - 1;
+       const char *prop_name = "devname";
+       const char *name;
+
+       if (ident->devname) {
+               ident_log(prop_name, devname, "Already defined", cmdline);
+               return MDADM_STATUS_ERROR;
+       }
+
+       if (is_devname_ignore(devname) == true) {
+               if (!cmdline)
+                       goto pass;
+
+               ident_log(prop_name, devname, "Special keyword is invalid in this context",
+                         cmdline);
+               return MDADM_STATUS_ERROR;
+       }
+
+       if (is_devname_md_numbered(devname) == true || is_devname_md_d_numbered(devname) == true)
+               goto pass;
+
+       if (strncmp(devname, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0)
+               name = devname + DEV_MD_DIR_LEN;
+       else if (strncmp(devname, named_dev_pref, named_dev_pref_size) == 0)
+               name = devname + named_dev_pref_size;
+       else
+               name = devname;
+
+       if (is_name_posix_compatible(name) == false) {
+               ident_log(prop_name, name, "Not POSIX compatible", cmdline);
+               return MDADM_STATUS_ERROR;
+       }
+
+       if (is_string_lq(name, MD_NAME_MAX + 1) == false) {
+               ident_log(prop_name, devname, "Invalid length", cmdline);
+               return MDADM_STATUS_ERROR;
+       }
+pass:
+       ident->devname = xstrdup(devname);
+       return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * _ident_set_name()- set name in &mddev_ident.
+ * @ident: pointer to &mddev_ident.
+ * @name: name to be set.
+ * @cmdline: context dependent actions.
+ *
+ * If criteria passed, set name in @ident.
+ * Note: name is not used by config file, it for cmdline only.
+ *
+ * Return: %MDADM_STATUS_SUCCESS or %MDADM_STATUS_ERROR.
+ */
+static mdadm_status_t _ident_set_name(struct mddev_ident *ident, const char *name,
+                                     const bool cmdline)
+{
+       assert(name);
+       assert(ident);
+
+       const char *prop_name = "name";
+
+       if (ident->name[0]) {
+               ident_log(prop_name, name, "Already defined", cmdline);
+               return MDADM_STATUS_ERROR;
+       }
+
+       if (is_string_lq(name, MD_NAME_MAX + 1) == false) {
+               ident_log(prop_name, name, "Too long or empty", cmdline);
+               return MDADM_STATUS_ERROR;
+       }
+
+       if (is_name_posix_compatible(name) == false) {
+               ident_log(prop_name, name, "Not POSIX compatible", cmdline);
+               return MDADM_STATUS_ERROR;
+       }
+
+       snprintf(ident->name, MD_NAME_MAX + 1, "%s", name);
+       return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * ident_set_devname()- exported, for cmdline.
+ */
+mdadm_status_t ident_set_devname(struct mddev_ident *ident, const char *name)
+{
+       return _ident_set_devname(ident, name, true);
+}
+
+/**
+ * ident_set_name()- exported, for cmdline.
+ */
+mdadm_status_t ident_set_name(struct mddev_ident *ident, const char *name)
+{
+       return _ident_set_name(ident, name, true);
+}
+
 struct conf_dev {
        struct conf_dev *next;
        char *name;
@@ -193,7 +385,6 @@ struct mddev_dev *load_containers(void)
 
 struct createinfo createinfo = {
        .autof = 2, /* by default, create devices with standard names */
-       .symlinks = 1,
        .names = 0, /* By default, stick with numbered md devices. */
        .bblist = 1, /* Use a bad block list by default */
 #ifdef DEBIAN
@@ -309,11 +500,7 @@ static void createline(char *line)
                        if (!createinfo.supertype)
                                pr_err("metadata format %s unknown, ignoring\n",
                                        w+9);
-               } else if (strncasecmp(w, "symlinks=yes", 12) == 0)
-                       createinfo.symlinks = 1;
-               else if  (strncasecmp(w, "symlinks=no", 11) == 0)
-                       createinfo.symlinks = 0;
-               else if (strncasecmp(w, "names=yes", 12) == 0)
+               } else if (strncasecmp(w, "names=yes", 12) == 0)
                        createinfo.names = 1;
                else if  (strncasecmp(w, "names=no", 11) == 0)
                        createinfo.names = 0;
@@ -349,17 +536,6 @@ void devline(char *line)
 struct mddev_ident *mddevlist = NULL;
 struct mddev_ident **mddevlp = &mddevlist;
 
-static int is_number(char *w)
-{
-       /* check if there are 1 or more digits and nothing else */
-       int digits = 0;
-       while (*w && isdigit(*w)) {
-               digits++;
-               w++;
-       }
-       return (digits && ! *w);
-}
-
 void arrayline(char *line)
 {
        char *w;
@@ -367,50 +543,11 @@ void arrayline(char *line)
        struct mddev_ident mis;
        struct mddev_ident *mi;
 
-       mis.uuid_set = 0;
-       mis.super_minor = UnSet;
-       mis.level = UnSet;
-       mis.raid_disks = UnSet;
-       mis.spare_disks = 0;
-       mis.devices = NULL;
-       mis.devname = NULL;
-       mis.spare_group = NULL;
-       mis.autof = 0;
-       mis.next = NULL;
-       mis.st = NULL;
-       mis.bitmap_fd = -1;
-       mis.bitmap_file = NULL;
-       mis.name[0] = 0;
-       mis.container = NULL;
-       mis.member = NULL;
+       ident_init(&mis);
 
        for (w = dl_next(line); w != line; w = dl_next(w)) {
                if (w[0] == '/' || strchr(w, '=') == NULL) {
-                       /* This names the device, or is '<ignore>'.
-                        * The rules match those in create_mddev.
-                        * 'w' must be:
-                        *  /dev/md/{anything}
-                        *  /dev/mdNN
-                        *  /dev/md_dNN
-                        *  <ignore>
-                        *  or anything that doesn't start '/' or '<'
-                        */
-                       if (strcasecmp(w, "<ignore>") == 0 ||
-                           strncmp(w, "/dev/md/", 8) == 0 ||
-                           (w[0] != '/' && w[0] != '<') ||
-                           (strncmp(w, "/dev/md", 7) == 0 &&
-                            is_number(w + 7)) ||
-                           (strncmp(w, "/dev/md_d", 9) == 0 &&
-                            is_number(w + 9))) {
-                               /* This is acceptable */;
-                               if (mis.devname)
-                                       pr_err("only give one device per ARRAY line: %s and %s\n",
-                                               mis.devname, w);
-                               else
-                                       mis.devname = w;
-                       }else {
-                               pr_err("%s is an invalid name for an md device - ignored.\n", w);
-                       }
+                       _ident_set_devname(&mis, w, false);
                } else if (strncasecmp(w, "uuid=", 5) == 0) {
                        if (mis.uuid_set)
                                pr_err("only specify uuid once, %s ignored.\n",
@@ -436,14 +573,8 @@ void arrayline(char *line)
                                        mis.super_minor = minor;
                        }
                } else if (strncasecmp(w, "name=", 5) == 0) {
-                       if (mis.name[0])
-                               pr_err("only specify name once, %s ignored.\n",
-                                       w);
-                       else if (strlen(w + 5) > 32)
-                               pr_err("name too long, ignoring %s\n", w);
-                       else
-                               strcpy(mis.name, w + 5);
-
+                       /* Ignore name in confile */
+                       continue;
                } else if (strncasecmp(w, "bitmap=", 7) == 0) {
                        if (mis.bitmap_file)
                                pr_err("only specify bitmap file once. %s ignored\n",
@@ -562,7 +693,7 @@ void homehostline(char *line)
        char *w;
 
        for (w = dl_next(line); w != line; w = dl_next(w)) {
-               if (strcasecmp(w, "<ignore>") == 0)
+               if (is_devname_ignore(w) == true)
                        require_homehost = 0;
                else if (home_host == NULL) {
                        if (strcasecmp(w, "<none>") == 0)
@@ -588,6 +719,30 @@ void homeclusterline(char *line)
        }
 }
 
+static int monitor_delay;
+void monitordelayline(char *line)
+{
+       char *w;
+
+       for (w = dl_next(line); w != line; w = dl_next(w)) {
+               if (monitor_delay == 0)
+                       monitor_delay = strtol(w, NULL, 10);
+       }
+}
+
+static bool sata_opal_encryption_no_verify;
+void encryption_no_verify_line(char *line)
+{
+       char *word;
+
+       for (word = dl_next(line); word != line; word = dl_next(word)) {
+               if (strcasecmp(word, "sata_opal") == 0)
+                       sata_opal_encryption_no_verify = true;
+               else
+                       pr_err("unrecognised word on ENCRYPTION_NO_VERIFY line: %s\n", word);
+       }
+}
+
 char auto_yes[] = "yes";
 char auto_no[] = "no";
 char auto_homehost[] = "homehost";
@@ -769,6 +924,12 @@ void conf_file(FILE *f)
                case Sysfs:
                        sysfsline(line);
                        break;
+               case MonitorDelay:
+                       monitordelayline(line);
+                       break;
+               case EncryptionNoVerify:
+                       encryption_no_verify_line(line);
+                       break;
                default:
                        pr_err("Unknown keyword %s\n", line);
                }
@@ -856,7 +1017,7 @@ void load_conffile(void)
                dl_add(list, dl_strdup("partitions"));
                devline(list);
                free_line(list);
-       } else if (strcmp(conffile, "none") != 0) {
+       } else if (str_is_none(conffile) == false) {
                f = fopen(conffile, "r");
                /* Debian chose to relocate mdadm.conf into /etc/mdadm/.
                 * To allow Debian users to compile from clean source and still
@@ -925,6 +1086,18 @@ char *conf_get_homecluster(void)
        return home_cluster;
 }
 
+int conf_get_monitor_delay(void)
+{
+       load_conffile();
+       return monitor_delay;
+}
+
+bool conf_get_sata_opal_encryption_no_verify(void)
+{
+       load_conffile();
+       return sata_opal_encryption_no_verify;
+}
+
 struct createinfo *conf_get_create_info(void)
 {
        load_conffile();
@@ -1073,13 +1246,13 @@ int devname_matches(char *name, char *match)
         *  mdNN with NN
         * then just strcmp
         */
-       if (strncmp(name, "/dev/md/", 8) == 0)
-               name += 8;
+       if (strncmp(name, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0)
+               name += DEV_MD_DIR_LEN;
        else if (strncmp(name, "/dev/", 5) == 0)
                name += 5;
 
-       if (strncmp(match, "/dev/md/", 8) == 0)
-               match += 8;
+       if (strncmp(match, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0)
+               match += DEV_MD_DIR_LEN;
        else if (strncmp(match, "/dev/", 5) == 0)
                match += 5;
 
@@ -1131,13 +1304,7 @@ struct mddev_ident *conf_match(struct supertype *st,
                                       array_list->devname);
                        continue;
                }
-               if (array_list->name[0] &&
-                   strcasecmp(array_list->name, info->name) != 0) {
-                       if (verbose >= 2 && array_list->devname)
-                               pr_err("Name differs from %s.\n",
-                                      array_list->devname);
-                       continue;
-               }
+
                if (array_list->devices && devname &&
                    !match_oneof(array_list->devices, devname)) {
                        if (verbose >= 2 && array_list->devname)
diff --git a/dlink.c b/dlink.c
index 3efa94b751ab233ee50ae7d03228490065fe6b7d..69aa7aa3930f348ad352050011287d10f700bce2 100644 (file)
--- a/dlink.c
+++ b/dlink.c
@@ -63,7 +63,7 @@ char *dl_strndup(char *s, int l)
     if (s == NULL)
        return NULL;
     n = dl_newv(char, l+1);
-    strncpy(n, s, l);
+    strncpy(n, s, l+1);
     n[l] = 0;
     return n;
 }
similarity index 99%
rename from external-reshape-design.txt
rename to documentation/external-reshape-design.txt
index 10c57ccb64c96ef0b2b785f74fe6a1a244f105c3..e4cf4e1634a50c87080d54dda980cfef5f09bbab 100644 (file)
@@ -277,4 +277,4 @@ sync_action
 
 ...
 
-[1]: Linux kernel design patterns - part 3, Neil Brown http://lwn.net/Articles/336262/
+[1]: Linux kernel design patterns - part 3, Neil Brown https://lwn.net/Articles/336262/
diff --git a/drive_encryption.c b/drive_encryption.c
new file mode 100644 (file)
index 0000000..27da962
--- /dev/null
@@ -0,0 +1,724 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Read encryption information for Opal and ATA devices.
+ *
+ * Copyright (C) 2024 Intel Corporation
+ *     Author: Blazej Kucman <blazej.kucman@intel.com>
+ */
+
+#include "mdadm.h"
+
+#include <asm/types.h>
+#include <linux/nvme_ioctl.h>
+#include <scsi/sg.h>
+#include <scsi/scsi.h>
+#include "drive_encryption.h"
+
+#define DEFAULT_SECTOR_SIZE (512)
+
+/*
+ * Opal defines
+ * TCG Storage Opal SSC 2.01 chapter 3.3.3
+ * NVM ExpressTM Revision 1.4c, chapter 5
+ */
+#define TCG_SECP_01 (0x01)
+#define TCG_SECP_00 (0x00)
+#define OPAL_DISCOVERY_COMID (0x0001)
+#define OPAL_LOCKING_FEATURE (0x0002)
+#define OPAL_IO_BUFFER_LEN 2048
+#define OPAL_DISCOVERY_FEATURE_HEADER_LEN (4)
+
+/*
+ * NVMe defines
+ * NVM ExpressTM Revision 1.4c, chapter 5
+ */
+#define NVME_SECURITY_RECV (0x82)
+#define NVME_IDENTIFY (0x06)
+#define NVME_IDENTIFY_RESPONSE_LEN 4096
+#define NVME_OACS_BYTE_POSITION (256)
+#define NVME_IDENTIFY_CONTROLLER_DATA (1)
+
+/*
+ * ATA defines
+ * ATA/ATAPI Command Set ATA8-ACS
+ * SCSI / ATA Translation - 3 (SAT-3)
+ * SCSI Primary Commands - 4 (SPC-4)
+ * AT Attachment-8 - ATA Serial Transport (ATA8-AST)
+ * ATA Command Pass-Through
+ */
+#define ATA_IDENTIFY (0xec)
+#define ATA_TRUSTED_RECEIVE (0x5c)
+#define ATA_SECURITY_WORD_POSITION (128)
+#define HDIO_DRIVE_CMD (0x031f)
+#define ATA_TRUSTED_COMPUTING_POS (48)
+#define ATA_PASS_THROUGH_12 (0xa1)
+#define ATA_IDENTIFY_RESPONSE_LEN (512)
+#define ATA_PIO_DATA_IN (4)
+#define SG_CHECK_CONDITION (0x02)
+#define ATA_STATUS_RETURN_DESCRIPTOR (0x09)
+#define ATA_PT_INFORMATION_AVAILABLE_ASCQ (0x1d)
+#define ATA_PT_INFORMATION_AVAILABLE_ASC (0x00)
+#define ATA_INQUIRY_LENGTH (0x0c)
+#define SG_INTERFACE_ID 'S'
+#define SG_IO_TIMEOUT (60000)
+#define SG_SENSE_SIZE (32)
+#define SENSE_DATA_CURRENT_FIXED (0x70)
+#define SENSE_DATA_CURRENT_DESC (0x72)
+#define SENSE_CURRENT_RES_DESC_POS (8)
+#define SG_DRIVER_SENSE        (0x08)
+
+typedef enum drive_feature_support_status {
+       /* Drive feature is supported. */
+       DRIVE_FEAT_SUP_ST = 0,
+       /* Drive feature is not supported. */
+       DRIVE_FEAT_NOT_SUP_ST,
+       /* Drive feature support check failed. */
+       DRIVE_FEAT_CHECK_FAILED_ST
+} drive_feat_sup_st;
+
+/* TCG Storage Opal SSC 2.01 chapter 3.1.1.3 */
+typedef struct opal_locking_feature {
+       /* feature header */
+       __u16 feature_code;
+       __u8 reserved : 4;
+       __u8 version : 4;
+       __u8 description_length;
+       /* feature description */
+       __u8 locking_supported : 1;
+       __u8 locking_enabled : 1;
+       __u8 locked : 1;
+       __u8 media_encryption : 1;
+       __u8 mbr_enabled : 1;
+       __u8 mbr_done : 1;
+       __u8 mbr_shadowing_not_supported : 1;
+       __u8 hw_reset_for_dor_supported : 1;
+       __u8 reserved1[11];
+} __attribute__((__packed__)) opal_locking_feature_t;
+
+/* TCG Storage Opal SSC 2.01 chapter 3.1.1.1 */
+typedef struct opal_level0_header {
+       __u32 length;
+       __u32 version;
+       __u64 reserved;
+       __u8 vendor_specific[32];
+} opal_level0_header_t;
+
+/**
+ * NVM ExpressTM Revision 1.4c, Figure 249
+ * Structure specifies only OACS filed, which is needed in the current use case.
+ */
+typedef struct nvme_identify_ctrl {
+       __u8 reserved[255];
+       __u16 oacs;
+       __u8 reserved2[3839];
+} nvme_identify_ctrl_t;
+
+/* SCSI Primary Commands - 4 (SPC-4), Table 512 */
+typedef struct supported_security_protocols {
+       __u8  reserved[6];
+       __u16 list_length;
+       __u8  list[504];
+} supported_security_protocols_t;
+
+/* ATA/ATAPI Command Set - 3 (ACS-3), Table 45 */
+typedef struct ata_security_status {
+       __u16 security_supported : 1;
+       __u16 security_enabled : 1;
+       __u16 security_locked : 1;
+       __u16 security_frozen : 1;
+       __u16 security_count_expired : 1;
+       __u16 enhanced_security_erase_supported : 1;
+       __u16 reserved1 : 2;
+       __u16 security_level : 1;
+       __u16 reserved2 : 7;
+} __attribute__((__packed__)) ata_security_status_t;
+
+/* ATA/ATAPI Command Set - 3 (ACS-3), Table 45 */
+typedef struct ata_trusted_computing {
+       __u16 tc_feature :1;
+       __u16 reserved : 13;
+       __u16 var1 : 1;
+       __u16 var2 : 1;
+} __attribute__((__packed__)) ata_trusted_computing_t;
+
+mapping_t encryption_ability_map[] = {
+       { "None", ENC_ABILITY_NONE },
+       { "Other", ENC_ABILITY_OTHER },
+       { "SED", ENC_ABILITY_SED },
+       { NULL, UnSet }
+};
+
+mapping_t encryption_status_map[] = {
+       { "Unencrypted", ENC_STATUS_UNENCRYPTED },
+       { "Locked", ENC_STATUS_LOCKED },
+       { "Unlocked", ENC_STATUS_UNLOCKED },
+       { NULL, UnSet }
+};
+
+/**
+ * get_encryption_ability_string() - get encryption ability name string.
+ * @ability: encryption ability enum.
+ *
+ * Return: encryption ability string.
+ */
+const char *get_encryption_ability_string(enum encryption_ability ability)
+{
+       return map_num_s(encryption_ability_map, ability);
+}
+
+/**
+ * get_encryption_status_string() - get encryption status name string.
+ * @ability: encryption status enum.
+ *
+ * Return: encryption status string.
+ */
+const char *get_encryption_status_string(enum encryption_status status)
+{
+       return map_num_s(encryption_status_map, status);
+}
+
+/**
+ * get_opal_locking_feature_description() - get opal locking feature description.
+ * @response: response from Opal Discovery Level 0.
+ *
+ * Based on the documentation TCG Storage Opal SSC 2.01 chapter 3.1.1,
+ * a Locking feature is searched for in Opal Level 0 Discovery response.
+ *
+ * Return: if locking feature is found, pointer to struct %opal_locking_feature_t, NULL otherwise.
+ */
+static opal_locking_feature_t *get_opal_locking_feature_description(__u8 *response)
+{
+       opal_level0_header_t *response_header = (opal_level0_header_t *)response;
+       int features_length = __be32_to_cpu(response_header->length);
+       int current_position = sizeof(*response_header);
+
+       while (current_position < features_length) {
+               opal_locking_feature_t *feature;
+
+               feature = (opal_locking_feature_t *)(response + current_position);
+
+               if (__be16_to_cpu(feature->feature_code) == OPAL_LOCKING_FEATURE)
+                       return feature;
+
+               current_position += feature->description_length + OPAL_DISCOVERY_FEATURE_HEADER_LEN;
+       }
+
+       return NULL;
+}
+
+/**
+ * nvme_security_recv_ioctl() - nvme security receive ioctl.
+ * @disk_fd: a disk file descriptor.
+ * @sec_protocol: security protocol.
+ * @comm_id: command id.
+ * @response_buffer: response buffer to fill out.
+ * @buf_size: response buffer size.
+ * @verbose: verbose flag.
+ *
+ * Based on the documentations TCG Storage Opal SSC 2.01 chapter 3.3.3 and
+ * NVM ExpressTM Revision 1.4c, chapter 5.25,
+ * read security receive command via ioctl().
+ * On success, @response_buffer is completed.
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise.
+ */
+static mdadm_status_t
+nvme_security_recv_ioctl(int disk_fd, __u8 sec_protocol, __u16 comm_id, void *response_buffer,
+                        size_t buf_size, const int verbose)
+{
+       struct nvme_admin_cmd nvme_cmd = {0};
+       int status;
+
+       nvme_cmd.opcode = NVME_SECURITY_RECV;
+       nvme_cmd.cdw10 = sec_protocol << 24 | comm_id << 8;
+       nvme_cmd.cdw11 = buf_size;
+       nvme_cmd.data_len = buf_size;
+       nvme_cmd.addr = (__u64)response_buffer;
+
+       status = ioctl(disk_fd, NVME_IOCTL_ADMIN_CMD, &nvme_cmd);
+       if (status != 0) {
+               pr_vrb("Failed to read NVMe security receive ioctl() for device /dev/%s, status: %d\n",
+                      fd2kname(disk_fd), status);
+               return MDADM_STATUS_ERROR;
+       }
+
+       return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * nvme_identify_ioctl() - NVMe identify ioctl.
+ * @disk_fd: a disk file descriptor.
+ * @response_buffer: response buffer to fill out.
+ * @buf_size: response buffer size.
+ * @verbose: verbose flag.
+ *
+ * Based on the documentations TCG Storage Opal SSC 2.01 chapter 3.3.3 and
+ * NVM ExpressTM Revision 1.4c, chapter 5.25,
+ * read NVMe identify via ioctl().
+ * On success, @response_buffer will be completed.
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise.
+ */
+static mdadm_status_t
+nvme_identify_ioctl(int disk_fd, void *response_buffer, size_t buf_size, const int verbose)
+{
+       struct nvme_admin_cmd nvme_cmd = {0};
+       int status;
+
+       nvme_cmd.opcode = NVME_IDENTIFY;
+       nvme_cmd.cdw10 = NVME_IDENTIFY_CONTROLLER_DATA;
+       nvme_cmd.data_len = buf_size;
+       nvme_cmd.addr = (__u64)response_buffer;
+
+       status = ioctl(disk_fd, NVME_IOCTL_ADMIN_CMD, &nvme_cmd);
+       if (status != 0) {
+               pr_vrb("Failed to read NVMe identify ioctl() for device /dev/%s, status: %d\n",
+                      fd2kname(disk_fd), status);
+               return MDADM_STATUS_ERROR;
+       }
+
+       return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * is_sec_prot_01h_supported() - check if security protocol 01h supported.
+ * @security_protocols: struct with response from disk (NVMe, SATA) describing supported
+ * security protocols.
+ *
+ * Return: true if TCG_SECP_01 found, false otherwise.
+ */
+static bool is_sec_prot_01h_supported(supported_security_protocols_t *security_protocols)
+{
+       int list_length = be16toh(security_protocols->list_length);
+       int index;
+
+       for (index = 0 ; index < list_length; index++) {
+               if (security_protocols->list[index] == TCG_SECP_01)
+                       return true;
+       }
+
+       return false;
+}
+
+/**
+ * is_sec_prot_01h_supported_nvme() - check if security protocol 01h supported for given NVMe disk.
+ * @disk_fd: a disk file descriptor.
+ * @verbose: verbose flag.
+ *
+ * Return: %DRIVE_FEAT_SUP_ST if TCG_SECP_01 supported, %DRIVE_FEAT_NOT_SUP_ST if not supported,
+ * %DRIVE_FEAT_CHECK_FAILED_ST if failed to check.
+ */
+static drive_feat_sup_st is_sec_prot_01h_supported_nvme(int disk_fd, const int verbose)
+{
+       supported_security_protocols_t security_protocols = {0};
+
+       /* security_protocol: TCG_SECP_00, comm_id: not applicable */
+       if (nvme_security_recv_ioctl(disk_fd, TCG_SECP_00, 0x0, &security_protocols,
+                                    sizeof(security_protocols), verbose))
+               return DRIVE_FEAT_CHECK_FAILED_ST;
+
+       if (is_sec_prot_01h_supported(&security_protocols))
+               return DRIVE_FEAT_SUP_ST;
+
+       return DRIVE_FEAT_NOT_SUP_ST;
+}
+
+/**
+ * is_nvme_sec_send_recv_supported() - check if Security Send and Security Receive is supported.
+ * @disk_fd: a disk file descriptor.
+ * @verbose: verbose flag.
+ *
+ * Check if "Optional Admin Command Support" bit 0 is set in NVMe identify.
+ * Bit 0 set to 1 means controller supports the Security Send and Security Receive commands.
+ *
+ * Return: %DRIVE_FEAT_SUP_ST if security send/receive supported,
+ * %DRIVE_FEAT_NOT_SUP_ST if not supported, %DRIVE_FEAT_CHECK_FAILED_ST if check failed.
+ */
+static drive_feat_sup_st is_nvme_sec_send_recv_supported(int disk_fd, const int verbose)
+{
+       nvme_identify_ctrl_t nvme_identify = {0};
+       int status = 0;
+
+       status = nvme_identify_ioctl(disk_fd, &nvme_identify, sizeof(nvme_identify), verbose);
+       if (status)
+               return DRIVE_FEAT_CHECK_FAILED_ST;
+
+       if ((__le16_to_cpu(nvme_identify.oacs) & 0x1) == 0x1)
+               return DRIVE_FEAT_SUP_ST;
+
+       return DRIVE_FEAT_NOT_SUP_ST;
+}
+
+/**
+ * get_opal_encryption_information() - get Opal encryption information.
+ * @buffer: buffer with Opal Level 0 Discovery response.
+ * @information: struct to fill out, describing encryption status of disk.
+ *
+ * If Locking feature frame is in response from Opal Level 0 discovery, &encryption_information_t
+ * structure is completed with status and ability otherwise the status is set to &None.
+ * For possible encryption statuses and abilities,
+ * please refer to enums &encryption_status and &encryption_ability.
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise.
+ */
+static mdadm_status_t get_opal_encryption_information(__u8 *buffer,
+                                                     encryption_information_t *information)
+{
+       opal_locking_feature_t *opal_locking_feature =
+                                       get_opal_locking_feature_description(buffer);
+
+       if (!opal_locking_feature)
+               return MDADM_STATUS_ERROR;
+
+       if (opal_locking_feature->locking_supported == 1) {
+               information->ability = ENC_ABILITY_SED;
+
+               if (opal_locking_feature->locking_enabled == 0)
+                       information->status = ENC_STATUS_UNENCRYPTED;
+               else if (opal_locking_feature->locked == 1)
+                       information->status = ENC_STATUS_LOCKED;
+               else
+                       information->status = ENC_STATUS_UNLOCKED;
+       } else {
+               information->ability = ENC_ABILITY_NONE;
+               information->status = ENC_STATUS_UNENCRYPTED;
+       }
+
+       return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * get_nvme_opal_encryption_information() - get NVMe Opal encryption information.
+ * @disk_fd: a disk file descriptor.
+ * @information: struct to fill out, describing encryption status of disk.
+ * @verbose: verbose flag.
+ *
+ * In case the disk supports Opal Level 0 discovery, &encryption_information_t structure
+ * is completed with status and ability based on ioctl response,
+ * otherwise the ability is set to %ENC_ABILITY_NONE and &status to %ENC_STATUS_UNENCRYPTED.
+ * As the current use case does not need the knowledge of Opal support, if there is no support,
+ * %MDADM_STATUS_SUCCESS will be returned, with the values described above.
+ * For possible encryption statuses and abilities,
+ * please refer to enums &encryption_status and &encryption_ability.
+ *
+ * %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise.
+ */
+mdadm_status_t
+get_nvme_opal_encryption_information(int disk_fd, encryption_information_t *information,
+                                    const int verbose)
+{
+       __u8 buffer[OPAL_IO_BUFFER_LEN];
+       int sec_send_recv_supported = 0;
+       int protocol_01h_supported = 0;
+       mdadm_status_t status;
+
+       information->ability = ENC_ABILITY_NONE;
+       information->status = ENC_STATUS_UNENCRYPTED;
+
+       sec_send_recv_supported = is_nvme_sec_send_recv_supported(disk_fd, verbose);
+       if (sec_send_recv_supported == DRIVE_FEAT_CHECK_FAILED_ST)
+               return MDADM_STATUS_ERROR;
+
+       /* Opal not supported */
+       if (sec_send_recv_supported == DRIVE_FEAT_NOT_SUP_ST)
+               return MDADM_STATUS_SUCCESS;
+
+       /**
+        * sec_send_recv_supported determine that it should be possible to read
+        * supported sec protocols
+        */
+       protocol_01h_supported = is_sec_prot_01h_supported_nvme(disk_fd, verbose);
+       if (protocol_01h_supported == DRIVE_FEAT_CHECK_FAILED_ST)
+               return MDADM_STATUS_ERROR;
+
+       /* Opal not supported */
+       if (sec_send_recv_supported == DRIVE_FEAT_SUP_ST &&
+           protocol_01h_supported == DRIVE_FEAT_NOT_SUP_ST)
+               return MDADM_STATUS_SUCCESS;
+
+       if (nvme_security_recv_ioctl(disk_fd, TCG_SECP_01, OPAL_DISCOVERY_COMID, (void *)&buffer,
+                                    OPAL_IO_BUFFER_LEN, verbose))
+               return MDADM_STATUS_ERROR;
+
+       status = get_opal_encryption_information((__u8 *)&buffer, information);
+       if (status)
+               pr_vrb("Locking feature description not found in Level 0 discovery response. Device /dev/%s.\n",
+                      fd2kname(disk_fd));
+
+       if (information->ability == ENC_ABILITY_NONE)
+               assert(information->status == ENC_STATUS_UNENCRYPTED);
+
+       return status;
+}
+
+/**
+ * ata_pass_through12_ioctl() - ata pass through12 ioctl.
+ * @disk_fd: a disk file descriptor.
+ * @ata_command: ata command.
+ * @sec_protocol: security protocol.
+ * @comm_id: additional command id.
+ * @response_buffer: response buffer to fill out.
+ * @buf_size: response buffer size.
+ * @verbose: verbose flag.
+ *
+ * Based on the documentations ATA Command Pass-Through, chapter 13.2.2 and
+ * ATA Translation - 3 (SAT-3), send read ata pass through 12 command via ioctl().
+ * On success, @response_buffer will be completed.
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR on fail.
+ */
+static mdadm_status_t
+ata_pass_through12_ioctl(int disk_fd, __u8 ata_command,  __u8 sec_protocol, __u16 comm_id,
+                        void *response_buffer, size_t buf_size, const int verbose)
+{
+       __u8 cdb[ATA_INQUIRY_LENGTH] = {0};
+       __u8 sense[SG_SENSE_SIZE] = {0};
+       __u8 *sense_desc = NULL;
+       sg_io_hdr_t sg = {0};
+
+       /*
+        * ATA Command Pass-Through, chapter 13.2.2
+        * SCSI Primary Commands - 4 (SPC-4)
+        * ATA Translation - 3 (SAT-3)
+        */
+       cdb[0] = ATA_PASS_THROUGH_12;
+       /* protocol, bits 1-4 */
+       cdb[1] = ATA_PIO_DATA_IN << 1;
+       /* Bytes: CK_COND=1, T_DIR = 1, BYTE_BLOCK = 1, Length in Sector Count = 2 */
+       cdb[2] = 0x2E;
+       cdb[3] = sec_protocol;
+       /* Sector count */
+       cdb[4] = buf_size / DEFAULT_SECTOR_SIZE;
+       cdb[6] = (comm_id) & 0xFF;
+       cdb[7] = (comm_id >> 8) & 0xFF;
+       cdb[9] = ata_command;
+
+       sg.interface_id = SG_INTERFACE_ID;
+       sg.cmd_len = sizeof(cdb);
+       sg.mx_sb_len = sizeof(sense);
+       sg.dxfer_direction = SG_DXFER_FROM_DEV;
+       sg.dxfer_len = buf_size;
+       sg.dxferp = response_buffer;
+       sg.cmdp = cdb;
+       sg.sbp = sense;
+       sg.timeout = SG_IO_TIMEOUT;
+       sg.usr_ptr = NULL;
+
+       if (ioctl(disk_fd, SG_IO, &sg) < 0) {
+               pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s.\n", fd2kname(disk_fd));
+               return MDADM_STATUS_ERROR;
+       }
+
+       if ((sg.status && sg.status != SG_CHECK_CONDITION) || sg.host_status ||
+           (sg.driver_status && sg.driver_status != SG_DRIVER_SENSE)) {
+               pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s.\n", fd2kname(disk_fd));
+               pr_vrb("SG_IO error: ATA_12 Status: %d Host Status: %d, Driver Status: %d\n",
+                      sg.status, sg.host_status, sg.driver_status);
+               return MDADM_STATUS_ERROR;
+       }
+
+       /* verify expected sense response code */
+       if (!(sense[0] == SENSE_DATA_CURRENT_DESC || sense[0] == SENSE_DATA_CURRENT_FIXED)) {
+               pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s.\n", fd2kname(disk_fd));
+               return MDADM_STATUS_ERROR;
+       }
+
+       sense_desc = sense + SENSE_CURRENT_RES_DESC_POS;
+       /* verify sense data current response with descriptor format */
+       if (sense[0] == SENSE_DATA_CURRENT_DESC &&
+           !(sense_desc[0] == ATA_STATUS_RETURN_DESCRIPTOR &&
+           sense_desc[1] == ATA_INQUIRY_LENGTH)) {
+               pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s. Sense data ASC: %d, ASCQ: %d.\n",
+                      fd2kname(disk_fd), sense[2], sense[3]);
+               return MDADM_STATUS_ERROR;
+       }
+
+       /* verify sense data current response with fixed format */
+       if (sense[0] == SENSE_DATA_CURRENT_FIXED &&
+           !(sense[12] == ATA_PT_INFORMATION_AVAILABLE_ASC &&
+           sense[13] == ATA_PT_INFORMATION_AVAILABLE_ASCQ)) {
+               pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s. Sense data ASC: %d, ASCQ: %d.\n",
+                      fd2kname(disk_fd), sense[12], sense[13]);
+               return MDADM_STATUS_ERROR;
+       }
+
+       return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * is_sec_prot_01h_supported_ata() - check if security protocol 01h supported for given SATA disk.
+ * @disk_fd: a disk file descriptor.
+ * @verbose: verbose flag.
+ *
+ * Return: %DRIVE_FEAT_SUP_ST if TCG_SECP_01 supported, %DRIVE_FEAT_NOT_SUP_ST if not supported,
+ * %DRIVE_FEAT_CHECK_FAILED_ST if failed.
+ */
+static drive_feat_sup_st is_sec_prot_01h_supported_ata(int disk_fd, const int verbose)
+{
+       supported_security_protocols_t security_protocols;
+
+       mdadm_status_t result = ata_pass_through12_ioctl(disk_fd, ATA_TRUSTED_RECEIVE, TCG_SECP_00,
+                                                        0x0, &security_protocols,
+                                                        sizeof(security_protocols), verbose);
+       if (result)
+               return DRIVE_FEAT_CHECK_FAILED_ST;
+
+       if (is_sec_prot_01h_supported(&security_protocols))
+               return DRIVE_FEAT_SUP_ST;
+
+       return DRIVE_FEAT_NOT_SUP_ST;
+}
+
+/**
+ * is_ata_trusted_computing_supported() - check if ata trusted computing supported.
+ * @buffer: buffer with ATA identify response, not NULL.
+ *
+ * Return: true if trusted computing bit set, false otherwise.
+ */
+bool is_ata_trusted_computing_supported(__u16 *buffer)
+{
+       /* Added due to warnings from the compiler about a possible uninitialized variable below. */
+       assert(buffer);
+
+       __u16 security_tc_frame = __le16_to_cpu(buffer[ATA_TRUSTED_COMPUTING_POS]);
+       ata_trusted_computing_t *security_tc = (ata_trusted_computing_t *)&security_tc_frame;
+
+       if (security_tc->tc_feature == 1)
+               return true;
+
+       return false;
+}
+
+/**
+ * get_ata_standard_security_status() - get ATA disk encryption information from ATA identify.
+ * @buffer: buffer with response from ATA identify, not NULL.
+ * @information: struct to fill out, describing encryption status of disk.
+ *
+ * The function based on the Security status frame from ATA identify,
+ * completed encryption information.
+ * For possible encryption statuses and abilities,
+ * please refer to enums &encryption_status and &encryption_ability.
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR on fail.
+ */
+static mdadm_status_t get_ata_standard_security_status(__u16 *buffer,
+                                                      struct encryption_information *information)
+{
+       /* Added due to warnings from the compiler about a possible uninitialized variable below. */
+       assert(buffer);
+
+       __u16 security_status_frame = __le16_to_cpu(buffer[ATA_SECURITY_WORD_POSITION]);
+       ata_security_status_t *security_status = (ata_security_status_t *)&security_status_frame;
+
+       if (!security_status->security_supported) {
+               information->ability = ENC_ABILITY_NONE;
+               information->status = ENC_STATUS_UNENCRYPTED;
+
+               return MDADM_STATUS_SUCCESS;
+       }
+
+       information->ability = ENC_ABILITY_OTHER;
+
+       if (security_status->security_enabled == 0)
+               information->status = ENC_STATUS_UNENCRYPTED;
+       else if (security_status->security_locked == 1)
+               information->status = ENC_STATUS_LOCKED;
+       else
+               information->status = ENC_STATUS_UNLOCKED;
+
+       return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * is_ata_opal() - check if SATA disk support Opal.
+ * @disk_fd: a disk file descriptor.
+ * @buffer: buffer with ATA identify response.
+ * @verbose: verbose flag.
+ *
+ * Return: %DRIVE_FEAT_SUP_ST if TCG_SECP_01 supported, %DRIVE_FEAT_NOT_SUP_ST if not supported,
+ * %DRIVE_FEAT_CHECK_FAILED_ST if failed to check.
+ */
+static drive_feat_sup_st is_ata_opal(int disk_fd, __u16 *buffer_identify, const int verbose)
+{
+       bool tc_status = is_ata_trusted_computing_supported(buffer_identify);
+       drive_feat_sup_st tcg_sec_prot_status;
+
+       if (!tc_status)
+               return DRIVE_FEAT_NOT_SUP_ST;
+
+       tcg_sec_prot_status = is_sec_prot_01h_supported_ata(disk_fd, verbose);
+
+       if (tcg_sec_prot_status == DRIVE_FEAT_CHECK_FAILED_ST) {
+               pr_vrb("Failed to verify if security protocol 01h supported. Device /dev/%s.\n",
+                      fd2kname(disk_fd));
+               return DRIVE_FEAT_CHECK_FAILED_ST;
+       }
+
+       if (tc_status && tcg_sec_prot_status == DRIVE_FEAT_SUP_ST)
+               return DRIVE_FEAT_SUP_ST;
+
+       return DRIVE_FEAT_NOT_SUP_ST;
+}
+
+/**
+ * get_ata_encryption_information() - get ATA disk encryption information.
+ * @disk_fd: a disk file descriptor.
+ * @information: struct to fill out, describing encryption status of disk.
+ * @verbose: verbose flag.
+ *
+ * The function reads information about encryption, if the disk supports Opal,
+ * the information is completed based on Opal Level 0 discovery, otherwise,
+ * based on ATA security status frame from ATA identification response.
+ * For possible encryption statuses and abilities,
+ * please refer to enums &encryption_status and &encryption_ability.
+ *
+ * Based on the documentations ATA/ATAPI Command Set ATA8-ACS and
+ * AT Attachment-8 - ATA Serial Transport (ATA8-AST).
+ *
+ * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR on fail.
+ */
+mdadm_status_t
+get_ata_encryption_information(int disk_fd, struct encryption_information *information,
+                              const int verbose)
+{
+       __u8 buffer_opal_level0_discovery[OPAL_IO_BUFFER_LEN] = {0};
+       __u16 buffer_identify[ATA_IDENTIFY_RESPONSE_LEN] = {0};
+       drive_feat_sup_st ata_opal_status;
+       mdadm_status_t status;
+
+       /* Get disk ATA identification */
+       status = ata_pass_through12_ioctl(disk_fd, ATA_IDENTIFY, 0x0, 0x0, buffer_identify,
+                                         sizeof(buffer_identify), verbose);
+       if (status == MDADM_STATUS_ERROR)
+               return MDADM_STATUS_ERROR;
+
+       /* Possible OPAL support, further checks require tpm_enabled.*/
+       if (is_ata_trusted_computing_supported(buffer_identify)) {
+               /* OPAL SATA encryption checking disabled. */
+               if (conf_get_sata_opal_encryption_no_verify())
+                       return MDADM_STATUS_SUCCESS;
+
+               if (!sysfs_is_libata_allow_tpm_enabled(verbose)) {
+                       pr_vrb("Detected SATA drive /dev/%s with Trusted Computing support.\n",
+                              fd2kname(disk_fd));
+                       pr_vrb("Cannot verify encryption state. Requires libata.tpm_enabled=1.\n");
+                       return MDADM_STATUS_ERROR;
+               }
+       }
+
+       ata_opal_status = is_ata_opal(disk_fd, buffer_identify, verbose);
+       if (ata_opal_status == DRIVE_FEAT_CHECK_FAILED_ST)
+               return MDADM_STATUS_ERROR;
+
+       if (ata_opal_status == DRIVE_FEAT_NOT_SUP_ST)
+               return get_ata_standard_security_status(buffer_identify, information);
+
+       /* SATA Opal */
+       status = ata_pass_through12_ioctl(disk_fd, ATA_TRUSTED_RECEIVE, TCG_SECP_01,
+                                         OPAL_DISCOVERY_COMID, buffer_opal_level0_discovery,
+                                         OPAL_IO_BUFFER_LEN, verbose);
+       if (status != MDADM_STATUS_SUCCESS)
+               return MDADM_STATUS_ERROR;
+
+       return get_opal_encryption_information(buffer_opal_level0_discovery, information);
+}
diff --git a/drive_encryption.h b/drive_encryption.h
new file mode 100644 (file)
index 0000000..0cb8ff1
--- /dev/null
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Read encryption information for Opal and ATA devices.
+ *
+ * Copyright (C) 2024 Intel Corporation
+ *     Author: Blazej Kucman <blazej.kucman@intel.com>
+ */
+
+typedef enum encryption_status {
+       /* The drive is not currently encrypted. */
+       ENC_STATUS_UNENCRYPTED = 0,
+       /* The drive is encrypted and the data is not accessible. */
+       ENC_STATUS_LOCKED,
+       /* The drive is encrypted but the data is accessible in unencrypted form. */
+       ENC_STATUS_UNLOCKED
+} encryption_status_t;
+
+typedef enum encryption_ability {
+       ENC_ABILITY_NONE = 0,
+       ENC_ABILITY_OTHER,
+       /* Self encrypted drive */
+       ENC_ABILITY_SED
+} encryption_ability_t;
+
+typedef struct encryption_information {
+       encryption_ability_t ability;
+       encryption_status_t status;
+} encryption_information_t;
+
+mdadm_status_t
+get_nvme_opal_encryption_information(int disk_fd, struct encryption_information *information,
+                                    const int verbose);
+mdadm_status_t
+get_ata_encryption_information(int disk_fd, struct encryption_information *information,
+                              const int verbose);
+const char *get_encryption_ability_string(enum encryption_ability ability);
+const char *get_encryption_status_string(enum encryption_status status);
diff --git a/inventory b/inventory
deleted file mode 100755 (executable)
index 8d9c104..0000000
--- a/inventory
+++ /dev/null
@@ -1,275 +0,0 @@
-
-.gitignore
-ANNOUNCE-3.0
-ANNOUNCE-3.0.1
-ANNOUNCE-3.0.2
-ANNOUNCE-3.0.3
-ANNOUNCE-3.1
-ANNOUNCE-3.1.1
-ANNOUNCE-3.1.2
-ANNOUNCE-3.1.3
-ANNOUNCE-3.1.4
-ANNOUNCE-3.1.5
-ANNOUNCE-3.2
-ANNOUNCE-3.2.1
-ANNOUNCE-3.2.2
-ANNOUNCE-3.2.3
-ANNOUNCE-3.2.4
-ANNOUNCE-3.2.5
-ANNOUNCE-3.2.6
-ANNOUNCE-3.3
-ANNOUNCE-3.3.1
-ANNOUNCE-3.3.2
-ANNOUNCE-3.3.3
-ANNOUNCE-3.3.4
-ANNOUNCE-3.4
-ANNOUNCE-4.0
-ANNOUNCE-4.1
-Assemble.c
-Build.c
-COPYING
-ChangeLog
-Create.c
-Detail.c
-Dump.c
-Examine.c
-Grow.c
-INSTALL
-Incremental.c
-Kill.c
-Makefile
-Manage.c
-Monitor.c
-Query.c
-README.initramfs
-ReadMe.c
-TODO
-bitmap.c
-bitmap.h
-clustermd_tests/
-clustermd_tests/00r10_Create
-clustermd_tests/00r1_Create
-clustermd_tests/01r10_Grow_bitmap-switch
-clustermd_tests/01r10_Grow_resize
-clustermd_tests/01r1_Grow_add
-clustermd_tests/01r1_Grow_bitmap-switch
-clustermd_tests/01r1_Grow_resize
-clustermd_tests/02r10_Manage_add
-clustermd_tests/02r10_Manage_add-spare
-clustermd_tests/02r10_Manage_re-add
-clustermd_tests/02r1_Manage_add
-clustermd_tests/02r1_Manage_add-spare
-clustermd_tests/02r1_Manage_re-add
-clustermd_tests/03r10_switch-recovery
-clustermd_tests/03r10_switch-resync
-clustermd_tests/03r1_switch-recovery
-clustermd_tests/03r1_switch-resync
-clustermd_tests/cluster_conf
-clustermd_tests/func.sh
-config.c
-coverity-gcc-hack.h
-crc32.c
-crc32.h
-crc32c.c
-dlink.c
-dlink.h
-external-reshape-design.txt
-inventory
-lib.c
-makedist
-managemon.c
-mapfile.c
-maps.c
-md.4
-md5.h
-md_p.h
-md_u.h
-mdadm.8.in
-mdadm.c
-mdadm.conf-example
-mdadm.conf.5
-mdadm.h
-mdadm.spec
-mdmon-design.txt
-mdmon.8
-mdmon.c
-mdmon.h
-mdopen.c
-mdstat.c
-misc/
-misc/mdcheck
-misc/syslog-events
-mkinitramfs
-monitor.c
-msg.c
-msg.h
-part.h
-platform-intel.c
-platform-intel.h
-policy.c
-probe_roms.c
-probe_roms.h
-pwgr.c
-raid5extend.c
-raid6check.8
-raid6check.c
-restripe.c
-sg_io.c
-sha1.c
-sha1.h
-super-ddf.c
-super-gpt.c
-super-intel.c
-super-mbr.c
-super0.c
-super1.c
-swap_super.c
-sysfs.c
-systemd/
-systemd/SUSE-mdadm_env.sh
-systemd/mdadm-grow-continue@.service
-systemd/mdadm-last-resort@.service
-systemd/mdadm-last-resort@.timer
-systemd/mdadm.shutdown
-systemd/mdmon@.service
-systemd/mdmonitor.service
-test
-tests/
-tests/00linear
-tests/00multipath
-tests/00names
-tests/00raid0
-tests/00raid1
-tests/00raid10
-tests/00raid4
-tests/00raid5
-tests/00raid6
-tests/00readonly
-tests/01r1fail
-tests/01r5fail
-tests/01r5integ
-tests/01raid6integ
-tests/01replace
-tests/02lineargrow
-tests/02r1add
-tests/02r1grow
-tests/02r5grow
-tests/02r6grow
-tests/03assem-incr
-tests/03r0assem
-tests/03r5assem
-tests/03r5assem-failed
-tests/03r5assemV1
-tests/04r0update
-tests/04r1update
-tests/04r5swap
-tests/04update-metadata
-tests/04update-uuid
-tests/05r1-add-internalbitmap
-tests/05r1-add-internalbitmap-v1a
-tests/05r1-add-internalbitmap-v1b
-tests/05r1-add-internalbitmap-v1c
-tests/05r1-bitmapfile
-tests/05r1-grow-external
-tests/05r1-grow-internal
-tests/05r1-grow-internal-1
-tests/05r1-internalbitmap
-tests/05r1-internalbitmap-v1a
-tests/05r1-internalbitmap-v1b
-tests/05r1-internalbitmap-v1c
-tests/05r1-n3-bitmapfile
-tests/05r1-re-add
-tests/05r1-re-add-nosuper
-tests/05r1-remove-internalbitmap
-tests/05r1-remove-internalbitmap-v1a
-tests/05r1-remove-internalbitmap-v1b
-tests/05r1-remove-internalbitmap-v1c
-tests/05r5-bitmapfile
-tests/05r5-internalbitmap
-tests/05r6-bitmapfile
-tests/05r6tor0
-tests/06name
-tests/06sysfs
-tests/06wrmostly
-tests/07autoassemble
-tests/07autodetect
-tests/07changelevelintr
-tests/07changelevels
-tests/07layouts
-tests/07reshape5intr
-tests/07revert-grow
-tests/07revert-inplace
-tests/07revert-shrink
-tests/07testreshape5
-tests/09imsm-assemble
-tests/09imsm-create-fail-rebuild
-tests/09imsm-overlap
-tests/10ddf-assemble-missing
-tests/10ddf-create
-tests/10ddf-create-fail-rebuild
-tests/10ddf-fail-create-race
-tests/10ddf-fail-readd
-tests/10ddf-fail-readd-readonly
-tests/10ddf-fail-spare
-tests/10ddf-fail-stop-readd
-tests/10ddf-fail-twice
-tests/10ddf-fail-two-spares
-tests/10ddf-geometry
-tests/10ddf-incremental-wrong-order
-tests/10ddf-sudden-degraded
-tests/11spare-migration
-tests/12imsm-r0_2d-grow-r0_3d
-tests/12imsm-r0_2d-grow-r0_4d
-tests/12imsm-r0_2d-grow-r0_5d
-tests/12imsm-r0_3d-grow-r0_4d
-tests/12imsm-r5_3d-grow-r5_4d
-tests/12imsm-r5_3d-grow-r5_5d
-tests/13imsm-r0_r0_2d-grow-r0_r0_4d
-tests/13imsm-r0_r0_2d-grow-r0_r0_5d
-tests/13imsm-r0_r0_3d-grow-r0_r0_4d
-tests/13imsm-r0_r5_3d-grow-r0_r5_4d
-tests/13imsm-r0_r5_3d-grow-r0_r5_5d
-tests/13imsm-r5_r0_3d-grow-r5_r0_4d
-tests/13imsm-r5_r0_3d-grow-r5_r0_5d
-tests/14imsm-r0_3d-r5_3d-migrate-r5_4d-r5_4d
-tests/14imsm-r0_3d_no_spares-migrate-r5_3d
-tests/14imsm-r0_r0_2d-takeover-r10_4d
-tests/14imsm-r10_4d-grow-r10_5d
-tests/14imsm-r10_r5_4d-takeover-r0_2d
-tests/14imsm-r1_2d-grow-r1_3d
-tests/14imsm-r1_2d-takeover-r0_2d
-tests/14imsm-r5_3d-grow-r5_5d-no-spares
-tests/14imsm-r5_3d-migrate-r4_3d
-tests/15imsm-r0_3d_64k-migrate-r0_3d_256k
-tests/15imsm-r5_3d_4k-migrate-r5_3d_256k
-tests/15imsm-r5_3d_64k-migrate-r5_3d_256k
-tests/15imsm-r5_6d_4k-migrate-r5_6d_256k
-tests/15imsm-r5_r0_3d_64k-migrate-r5_r0_3d_256k
-tests/16imsm-r0_3d-migrate-r5_4d
-tests/16imsm-r0_5d-migrate-r5_6d
-tests/16imsm-r5_3d-migrate-r0_3d
-tests/16imsm-r5_5d-migrate-r0_5d
-tests/18imsm-1d-takeover-r0_1d
-tests/18imsm-1d-takeover-r1_2d
-tests/18imsm-r0_2d-takeover-r10_4d
-tests/18imsm-r10_4d-takeover-r0_2d
-tests/18imsm-r1_2d-takeover-r0_1d
-tests/19raid6auto-repair
-tests/19raid6check
-tests/19raid6repair
-tests/19repair-does-not-destroy
-tests/20raid5journal
-tests/21raid5cache
-tests/ToTest
-tests/env-ddf-template
-tests/env-imsm-template
-tests/func.sh
-tests/imsm-grow-template
-tests/utils
-udev-md-clustered-confirm-device.rules
-udev-md-raid-arrays.rules
-udev-md-raid-assembly.rules
-udev-md-raid-creating.rules
-udev-md-raid-safe-timeouts.rules
-util.c
-xmalloc.c
diff --git a/lib.c b/lib.c
index 60890b95baf888762e0a1cf9c919e1314abf1285..2b09293cfaded961d73fd318c737163f7138f2b4 100644 (file)
--- a/lib.c
+++ b/lib.c
 #include       "mdadm.h"
 #include       "dlink.h"
 #include       <ctype.h>
+#include       <limits.h>
+
+/**
+ * is_string_lq() - Check if string length with NULL byte is lower or equal to requested.
+ * @str: string to check.
+ * @max_len: max length.
+ *
+ * @str length must be bigger than 0 and be lower or equal @max_len, including termination byte.
+ */
+bool is_string_lq(const char * const str, size_t max_len)
+{
+       assert(str);
+
+       size_t _len = strnlen(str, max_len);
+
+       if (_len > 0 && _len < max_len)
+               return true;
+       return false;
+}
+
+bool is_dev_alive(char *path)
+{
+       if (!path)
+               return false;
+
+       if (access(path, R_OK) == 0)
+               return true;
+
+       return false;
+}
 
 /* This fill contains various 'library' style function.  They
  * have no dependency on anything outside this file.
@@ -152,6 +182,18 @@ char *stat2devnm(struct stat *st)
        return devid2devnm(st->st_rdev);
 }
 
+bool stat_is_md_dev(struct stat *st)
+{
+       if ((S_IFMT & st->st_mode) != S_IFBLK)
+               return false;
+       if (major(st->st_rdev) == MD_MAJOR)
+               return true;
+       if (major(st->st_rdev) == (unsigned)get_mdp_major())
+               return true;
+
+       return false;
+}
+
 char *fd2devnm(int fd)
 {
        struct stat stb;
@@ -162,35 +204,6 @@ char *fd2devnm(int fd)
        return NULL;
 }
 
-/* When we create a new array, we don't want the content to
- * be immediately examined by udev - it is probably meaningless.
- * So create /run/mdadm/creating-mdXXX and expect that a udev
- * rule will noticed this and act accordingly.
- */
-static char block_path[] = "/run/mdadm/creating-%s";
-static char *unblock_path = NULL;
-void udev_block(char *devnm)
-{
-       int fd;
-       char *path = NULL;
-
-       xasprintf(&path, block_path, devnm);
-       fd = open(path, O_CREAT|O_RDWR, 0600);
-       if (fd >= 0) {
-               close(fd);
-               unblock_path = path;
-       } else
-               free(path);
-}
-
-void udev_unblock(void)
-{
-       if (unblock_path)
-               unlink(unblock_path);
-       free(unblock_path);
-       unblock_path = NULL;
-}
-
 /*
  * convert a major/minor pair for a block device into a name in /dev, if possible.
  * On the first call, walk /dev collecting name.
@@ -289,7 +302,7 @@ char *map_dev_preferred(int major, int minor, int create,
 
        for (p = devlist; p; p = p->next)
                if (p->major == major && p->minor == minor) {
-                       if (strncmp(p->name, "/dev/md/",8) == 0 ||
+                       if (strncmp(p->name, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0 ||
                            (prefer && strstr(p->name, prefer))) {
                                if (preferred == NULL ||
                                    strlen(p->name) < strlen(preferred))
@@ -441,24 +454,50 @@ void print_quoted(char *str)
        putchar(q);
 }
 
-void print_escape(char *str)
+/**
+ * is_alphanum() - Check if sign is letter or digit.
+ * @c: char to analyze.
+ *
+ * Similar to isalnum() but additional locales are excluded.
+ *
+ * Return: %true on success, %false otherwise.
+ */
+bool is_alphanum(const char c)
 {
-       /* print str, but change space and tab to '_'
-        * as is suitable for device names
-        */
-       for (; *str; str++) {
-               switch (*str) {
-               case ' ':
-               case '\t':
-                       putchar('_');
-                       break;
-               case '/':
-                       putchar('-');
-                       break;
-               default:
-                       putchar(*str);
-               }
+       if (isupper(c) || islower(c) || isdigit(c) != 0)
+               return true;
+       return false;
+}
+
+/**
+ * is_name_posix_compatible() - Check if name is POSIX compatible.
+ * @name: name to check.
+ *
+ *  POSIX portable file name character set contains ASCII letters,
+ *  digits, '_', '.', and '-'. Also forbid leading '-'.
+ *  The length of the name cannot exceed NAME_MAX - 1 (ensure NULL ending).
+ *
+ * Return: %true on success, %false otherwise.
+ */
+bool is_name_posix_compatible(const char * const name)
+{
+       assert(name);
+
+       char allowed_symbols[] = "-_.";
+       const char *n = name;
+
+       if (!is_string_lq(name, NAME_MAX))
+               return false;
+
+       if (*n == '-')
+               return false;
+
+       while (*n != '\0') {
+               if (!is_alphanum(*n) && !strchr(allowed_symbols, *n))
+                       return false;
+               n++;
        }
+       return true;
 }
 
 int check_env(char *name)
@@ -471,19 +510,6 @@ int check_env(char *name)
        return 0;
 }
 
-int use_udev(void)
-{
-       static int use = -1;
-       struct stat stb;
-
-       if (use < 0) {
-               use = ((stat("/dev/.udev", &stb) == 0 ||
-                       stat("/run/udev", &stb) == 0) &&
-                      check_env("MDADM_NO_UDEV") == 0);
-       }
-       return use;
-}
-
 unsigned long GCD(unsigned long a, unsigned long b)
 {
        while (a != b) {
@@ -534,3 +560,49 @@ void free_line(char *line)
        }
        dl_free(line);
 }
+
+/**
+ * parse_num() - Parse int from string.
+ * @dest: Pointer to destination.
+ * @num: Pointer to string that is going to be parsed.
+ *
+ * If string contains anything after a number, error code is returned.
+ * The same happens when number is bigger than INT_MAX or smaller than 0.
+ * Writes to destination only if successfully read the number.
+ *
+ * Return: 0 on success, 1 otherwise.
+ */
+int parse_num(int *dest, const char *num)
+{
+       char *c = NULL;
+       long temp;
+
+       if (!num)
+               return 1;
+
+       errno = 0;
+       temp = strtol(num, &c, 10);
+       if (temp < 0 || temp > INT_MAX || *c || errno != 0 || num == c)
+               return 1;
+       *dest = temp;
+       return 0;
+}
+
+/**
+ * s_gethostname() - secure get hostname. Assure null-terminated string.
+ *
+ * @buf: buffer for hostname.
+ * @buf_len: buffer length.
+ *
+ * Return: gethostname() result.
+ */
+int s_gethostname(char *buf, int buf_len)
+{
+       assert(buf);
+
+       int ret = gethostname(buf, buf_len);
+
+       buf[buf_len - 1] = 0;
+
+       return ret;
+}
diff --git a/makedist b/makedist
deleted file mode 100755 (executable)
index 0c4b39e..0000000
--- a/makedist
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/bin/sh
-# avoid silly sorting
-export LANG=C
-arg=$1
-target=~/public_html/source/mdadm
-if [ " $arg" = " test" ]
-then
-  target=/tmp/mdadm-test
-  rm -rf $target
-  mkdir -p $target
-fi
-if [ -d $target ]
-then :
-else echo $target is not a directory
-     exit 2
-fi
-set `grep '^#define VERSION' ReadMe.c `
-version=`echo $3 | sed -e 's/"//g'`
-grep "^.TH MDADM 8 .. v$version" mdadm.8.in > /dev/null 2>&1 ||
- {
-   echo mdadm.8.in does not mention version $version.
-   exit 1
- }
-grep "^.TH MDMON 8 .. v$version" mdmon.8 > /dev/null 2>&1 ||
- {
-   echo mdmon.8 does not mention version $version.
-   exit 1
- }
-rpmv=`echo $version | tr - _`
-grep "^Version: *$rpmv$" mdadm.spec > /dev/null 2>&1 ||
- {
-   echo mdadm.spec does not mention version $version.
-   exit 1
- }
-if [ -f ANNOUNCE-$version ]
-then :
-else
-   echo ANNOUNCE-$version does not exist
-   exit 1
-fi
-if grep "^ANNOUNCE-$version\$" inventory
-then :
-else { cat inventory ; echo ANNOUNCE-$version ; } | sort -o inventory
-fi
-
-echo version = $version
-base=mdadm-$rpmv.tar.gz
-if [ " $arg" != " diff" ]
-then
-  if [ -f $target/$base ]
-  then
-    echo $target/$base exists.
-    exit 1
-  fi
-  trap "rm $target/$base; exit" 1 2 3
-  git archive --prefix=mdadm-$rpmv/ HEAD | gzip --best > $target/$base
-  chmod a+r $target/$base
-  ls -l $target/$base
-  if tar tzf $target/$base | sed 's,[^/]*/,,' | sort | diff -u inventory -
-  then : correct files found
-  else echo "Extra files, or inventory is out-of-date"
-       rm $target/$base
-       exit 1
-  fi
-  rpmbuild -ta $target/$base || exit 1
-  find ~/rpmbuild/RPMS -name "*mdadm-$version-*" \
-     -exec cp {} $target/RPM \;
-  cp ANNOUNCE-$version $target/ANNOUNCE
-  cp ChangeLog $target/ChangeLog
-  if [ " $arg" != " test" ]
-  then
-    echo -n "Confirm signing this release? "
-    read a
-    if [ " $a" != " y" ]; then echo OK - bye. ; exit 1; fi
-    if zcat $target/$base | gpg -ba > $target/$base.sign && gpg -ba $target/ANNOUNCE
-    then
-      kup put $target/$base  $target/$base.sign \
-         /pub/linux/utils/raid/mdadm/mdadm-$version.tar.gz
-      kup put $target/ANNOUNCE $target/ANNOUNCE.asc /pub/linux/utils/raid/mdadm/ANNOUNCE
-    else
-      echo signing failed
-      exit 1
-    fi
-  fi
-else
-  if [ ! -f $target/$base ]
-  then
-    echo $target/$base does not exist.
-    exit 1
-  fi
-  ( cd .. ; ln -s mdadm.v2 mdadm-$version ; tar chf - --exclude=.git --exclude="TAGS" --exclude='*,v' --exclude='*~' --exclude='*.o' --exclude mdadm --exclude=mdadm'.[^ch0-9]' --exclude=RCS mdadm-$version ; rm mdadm-$version ) | gzip --best > /var/tmp/mdadm-new.tgz
-  mkdir /var/tmp/mdadm-old ; zcat $target/$base | ( cd /var/tmp/mdadm-old ; tar xf - )
-  mkdir /var/tmp/mdadm-new ; zcat /var/tmp/mdadm-new.tgz | ( cd /var/tmp/mdadm-new ; tar xf - )
-  diff -ru /var/tmp/mdadm-old /var/tmp/mdadm-new
-  rm -rf /var/tmp/mdadm-old /var/tmp/mdadm-new /var/tmp/mdadm-new.tgz
-fi
index 200cf83e3436a9b9890097b052b4fc379fb7e334..358459e79435984196eec66661718a9181a0c9d0 100644 (file)
 #include       "mdmon.h"
 #include       <sys/syscall.h>
 #include       <sys/socket.h>
-#include       <signal.h>
 
 static void close_aa(struct active_array *aa)
 {
@@ -208,7 +207,7 @@ static void replace_array(struct supertype *container,
        remove_old();
        while (pending_discard) {
                while (discard_this == NULL)
-                       sleep(1);
+                       sleep_for(1, 0, true);
                remove_old();
        }
        pending_discard = old;
@@ -455,7 +454,7 @@ static void manage_member(struct mdstat_ent *mdstat,
         * trying to find and assign a spare.
         * We do that whenever the monitor tells us too.
         */
-       char buf[64];
+       char buf[SYSFS_MAX_BUF_SIZE];
        int frozen;
        struct supertype *container = a->container;
        struct mdinfo *mdi;
@@ -569,7 +568,7 @@ static void manage_member(struct mdstat_ent *mdstat,
                updates = NULL;
                while (update_queue_pending || update_queue) {
                        check_update_queue(container);
-                       usleep(15*1000);
+                       sleep_for(0, MSEC_TO_NSEC(15), true);
                }
                replace_array(container, a, newa);
                if (sysfs_set_str(&a->info, NULL,
@@ -661,18 +660,17 @@ static void manage_new(struct mdstat_ent *mdstat,
         * the monitor.
         */
 
-       struct active_array *new;
-       struct mdinfo *mdi, *di;
-       char *inst;
-       int i;
+       struct active_array *new = NULL;
+       struct mdinfo *mdi = NULL, *di;
+       int i, inst;
        int failed = 0;
-       char buf[40];
+       char buf[SYSFS_MAX_BUF_SIZE];
 
        /* check if array is ready to be monitored */
        if (!mdstat->active || !mdstat->level)
                return;
-       if (strcmp(mdstat->level, "raid0") == 0 ||
-           strcmp(mdstat->level, "linear") == 0)
+       if (strncmp(mdstat->level, "raid0", strlen("raid0")) == 0 ||
+           strncmp(mdstat->level, "linear", strlen("linear")) == 0)
                return;
 
        mdi = sysfs_read(-1, mdstat->devnm,
@@ -691,7 +689,8 @@ static void manage_new(struct mdstat_ent *mdstat,
 
        new->container = container;
 
-       inst = to_subarray(mdstat, container->devnm);
+       if (parse_num(&inst, to_subarray(mdstat, container->devnm)) != 0)
+               goto error;
 
        new->info.array = mdi->array;
        new->info.component_size = mdi->component_size;
@@ -724,7 +723,7 @@ static void manage_new(struct mdstat_ent *mdstat,
        new->safe_mode_delay_fd = sysfs_open2(new->info.sys_name, NULL,
                                              "safe_mode_delay");
 
-       dprintf("inst: %s action: %d state: %d\n", inst,
+       dprintf("inst: %d action: %d state: %d\n", inst,
                new->action_fd, new->info.state_fd);
 
        if (mdi->safe_mode_delay >= 50)
@@ -739,7 +738,7 @@ static void manage_new(struct mdstat_ent *mdstat,
         * read this information for new arrays only (empty victim)
         */
        if ((victim == NULL) &&
-           (sysfs_get_str(mdi, NULL, "sync_action", buf, 40) > 0) &&
+           (sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf)) > 0) &&
            (strncmp(buf, "reshape", 7) == 0)) {
                if (sysfs_get_ll(mdi, NULL, "reshape_position",
                        &new->last_checkpoint) != 0)
@@ -759,15 +758,13 @@ static void manage_new(struct mdstat_ent *mdstat,
        }
 
        sysfs_free(mdi);
+       mdi = NULL;
 
        /* if everything checks out tell the metadata handler we want to
         * manage this instance
         */
        if (!aa_ready(new) || container->ss->open_new(container, new, inst) < 0) {
-               pr_err("failed to monitor %s\n",
-                       mdstat->metadata_version);
-               new->container = NULL;
-               free_aa(new);
+               goto error;
        } else {
                replace_array(container, victim, new);
                if (failed) {
@@ -775,6 +772,16 @@ static void manage_new(struct mdstat_ent *mdstat,
                        manage_member(mdstat, new);
                }
        }
+       return;
+
+error:
+       pr_err("failed to monitor %s\n", mdstat->metadata_version);
+       if (new) {
+               new->container = NULL;
+               free_aa(new);
+       }
+       if (mdi)
+               sysfs_free(mdi);
 }
 
 void manage(struct mdstat_ent *mdstat, struct supertype *container)
@@ -815,7 +822,7 @@ static void handle_message(struct supertype *container, struct metadata_update *
        if (msg->len <= 0)
                while (update_queue_pending || update_queue) {
                        check_update_queue(container);
-                       usleep(15*1000);
+                       sleep_for(0, MSEC_TO_NSEC(15), true);
                }
 
        if (msg->len == 0) { /* ping_monitor */
@@ -829,7 +836,7 @@ static void handle_message(struct supertype *container, struct metadata_update *
                wakeup_monitor();
 
                while (monitor_loop_cnt - cnt < 0)
-                       usleep(10 * 1000);
+                       sleep_for(0, MSEC_TO_NSEC(10), true);
        } else if (msg->len == -1) { /* ping_manager */
                struct mdstat_ent *mdstat = mdstat_read(1, 0);
 
index 8d7acb3cc389ca0ce49c1dabb99bfe2782875993..f1f3ee2cdc77bbe030a269c88d9a83e73b67c425 100644 (file)
--- a/mapfile.c
+++ b/mapfile.c
@@ -215,7 +215,7 @@ void map_free(struct map_ent *map)
 }
 
 int map_update(struct map_ent **mpp, char *devnm, char *metadata,
-              int *uuid, char *path)
+              int uuid[4], char *path)
 {
        struct map_ent *map, *mp;
        int rv;
@@ -292,6 +292,10 @@ struct map_ent *map_by_uuid(struct map_ent **map, int uuid[4])
 struct map_ent *map_by_devnm(struct map_ent **map, char *devnm)
 {
        struct map_ent *mp;
+
+       if (!devnm)
+               return NULL;
+
        if (!*map)
                map_read(map);
 
@@ -316,9 +320,9 @@ struct map_ent *map_by_name(struct map_ent **map, char *name)
        for (mp = *map ; mp ; mp = mp->next) {
                if (!mp->path)
                        continue;
-               if (strncmp(mp->path, "/dev/md/", 8) != 0)
+               if (strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) != 0)
                        continue;
-               if (strcmp(mp->path+8, name) != 0)
+               if (strcmp(mp->path + DEV_MD_DIR_LEN, name) != 0)
                        continue;
                if (!mddev_busy(mp->devnm)) {
                        mp->bad = 1;
@@ -359,8 +363,7 @@ void RebuildMap(void)
        char *homehost = conf_get_homehost(&require_homehost);
 
        if (homehost == NULL || strcmp(homehost, "<system>")==0) {
-               if (gethostname(sys_hostname, sizeof(sys_hostname)) == 0) {
-                       sys_hostname[sizeof(sys_hostname)-1] = 0;
+               if (s_gethostname(sys_hostname, sizeof(sys_hostname)) == 0) {
                        homehost = sys_hostname;
                }
        }
@@ -409,7 +412,7 @@ void RebuildMap(void)
                        devid = devnm2devid(md->devnm);
                        path = map_dev(major(devid), minor(devid), 0);
                        if (path == NULL ||
-                           strncmp(path, "/dev/md/", 8) != 0) {
+                           strncmp(path, DEV_MD_DIR, DEV_MD_DIR_LEN) != 0) {
                                /* We would really like a name that provides
                                 * an MD_DEVNAME for udev.
                                 * The name needs to be unique both in /dev/md/
@@ -430,7 +433,7 @@ void RebuildMap(void)
                                if (match && match->devname && match->devname[0] == '/') {
                                        path = match->devname;
                                        if (path[0] != '/') {
-                                               strcpy(namebuf, "/dev/md/");
+                                               strcpy(namebuf, DEV_MD_DIR);
                                                strcat(namebuf, path);
                                                path = namebuf;
                                        }
@@ -474,10 +477,10 @@ void RebuildMap(void)
 
                                        while (conflict) {
                                                if (unum >= 0)
-                                                       sprintf(namebuf, "/dev/md/%s%s%d",
+                                                       sprintf(namebuf, DEV_MD_DIR "%s%s%d",
                                                                name, sep, unum);
                                                else
-                                                       sprintf(namebuf, "/dev/md/%s",
+                                                       sprintf(namebuf, DEV_MD_DIR "%s",
                                                                name);
                                                unum++;
                                                if (lstat(namebuf, &stb) != 0 &&
diff --git a/maps.c b/maps.c
index a4fd27977c3685db7e23795a7582c016fb038d70..17f8b54dc40ffd3c657c41472c516c6a405840e1 100644 (file)
--- a/maps.c
+++ b/maps.c
@@ -137,14 +137,14 @@ mapping_t faultylayout[] = {
 
        { "clear", ClearErrors},
        { "flush", ClearFaults},
-       { "none", ClearErrors},
+       { STR_COMMON_NONE, ClearErrors},
        { "default", ClearErrors},
        { NULL, UnSet }
 };
 
 mapping_t consistency_policies[] = {
        { "unknown", CONSISTENCY_POLICY_UNKNOWN},
-       { "none", CONSISTENCY_POLICY_NONE},
+       { STR_COMMON_NONE, CONSISTENCY_POLICY_NONE},
        { "resync", CONSISTENCY_POLICY_RESYNC},
        { "bitmap", CONSISTENCY_POLICY_BITMAP},
        { "journal", CONSISTENCY_POLICY_JOURNAL},
@@ -165,7 +165,62 @@ mapping_t sysfs_array_states[] = {
        { "broken", ARRAY_BROKEN },
        { NULL, ARRAY_UNKNOWN_STATE }
 };
+/**
+ * mapping_t update_options - stores supported update options.
+ */
+mapping_t update_options[] = {
+       { "name", UOPT_NAME },
+       { "ppl", UOPT_PPL },
+       { "no-ppl", UOPT_NO_PPL },
+       { "bitmap", UOPT_BITMAP },
+       { "no-bitmap", UOPT_NO_BITMAP },
+       { "sparc2.2", UOPT_SPARC22 },
+       { "super-minor", UOPT_SUPER_MINOR },
+       { "summaries", UOPT_SUMMARIES },
+       { "resync", UOPT_RESYNC },
+       { "uuid", UOPT_UUID },
+       { "homehost", UOPT_HOMEHOST },
+       { "home-cluster", UOPT_HOME_CLUSTER },
+       { "nodes", UOPT_NODES },
+       { "devicesize", UOPT_DEVICESIZE },
+       { "bbl", UOPT_BBL },
+       { "no-bbl", UOPT_NO_BBL },
+       { "force-no-bbl", UOPT_FORCE_NO_BBL },
+       { "metadata", UOPT_METADATA },
+       { "revert-reshape", UOPT_REVERT_RESHAPE },
+       { "layout-original", UOPT_LAYOUT_ORIGINAL },
+       { "layout-alternate", UOPT_LAYOUT_ALTERNATE },
+       { "layout-unspecified", UOPT_LAYOUT_UNSPECIFIED },
+       { "byteorder", UOPT_BYTEORDER },
+       { "help", UOPT_HELP },
+       { "?", UOPT_HELP },
+       { NULL, UOPT_UNDEFINED}
+};
 
+/**
+ * map_num_s() - Safer alternative of map_num() function.
+ * @map: map to search.
+ * @num: key to match.
+ *
+ * Shall be used only if key existence is quaranted.
+ *
+ * Return: Pointer to name of the element.
+ */
+char *map_num_s(mapping_t *map, int num)
+{
+       char *ret = map_num(map, num);
+
+       assert(ret);
+       return ret;
+}
+
+/**
+ * map_num() - get element name by key.
+ * @map: map to search.
+ * @num: key to match.
+ *
+ * Return: Pointer to name of the element or NULL.
+ */
 char *map_num(mapping_t *map, int num)
 {
        while (map->name) {
diff --git a/md.4 b/md.4
index 0712af255dd5a962789ec0955bc9fe8d33832d7b..7a0bc7e636979b6eadfaf2372de0cf94332f85ed 100644 (file)
--- a/md.4
+++ b/md.4
@@ -215,6 +215,19 @@ option or the
 .B "--update=layout-alternate"
 option.
 
+Once you have updated the layout you will not be able to mount the array
+on an older kernel.  If you need to revert to an older kernel, the
+layout information can be erased with the
+.B "--update=layout-unspecificed"
+option.  If you use this option to
+.B --assemble
+while running a newer kernel, the array will NOT assemble, but the
+metadata will be update so that it can be assembled on an older kernel.
+
+No that setting the layout to "unspecified" removes protections against
+this bug, and you must be sure that the kernel you use matches the
+layout of the array.
+
 .SS RAID1
 
 A RAID1 array is also known as a mirrored set (though mirrors tend to
@@ -896,26 +909,40 @@ The list is particularly useful when recovering to a spare.  If a few blocks
 cannot be read from the other devices, the bulk of the recovery can
 complete and those few bad blocks will be recorded in the bad block list.
 
-.SS RAID456 WRITE JOURNAL
+.SS RAID WRITE HOLE
 
-Due to non-atomicity nature of RAID write operations, interruption of
-write operations (system crash, etc.) to RAID456 array can lead to
-inconsistent parity and data loss (so called RAID-5 write hole).
+Due to non-atomicity nature of RAID write operations,
+interruption of write operations (system crash, etc.) to RAID456
+array can lead to inconsistent parity and data loss (so called
+RAID-5 write hole).
+To plug the write hole md supports two mechanisms described below.
 
-To plug the write hole, from Linux 4.4 (to be confirmed),
-.I md
-supports write ahead journal for RAID456. When the array is created,
-an additional journal device can be added to the array through
-.IR write-journal
-option. The RAID write journal works similar to file system journals.
-Before writing to the data disks, md persists data AND parity of the
-stripe to the journal device. After crashes, md searches the journal
-device for incomplete write operations, and replay them to the data
-disks.
+.TP
+DIRTY STRIPE JOURNAL
+From Linux 4.4, md supports write ahead journal for RAID456.
+When the array is created, an additional journal device can be added to
+the array through write-journal option. The RAID write journal works
+similar to file system journals. Before writing to the data
+disks, md persists data AND parity of the stripe to the journal
+device. After crashes, md searches the journal device for
+incomplete write operations, and replay them to the data disks.
 
 When the journal device fails, the RAID array is forced to run in
 read-only mode.
 
+.TP
+PARTIAL PARITY LOG
+From Linux 4.12 md supports Partial Parity Log (PPL) for RAID5 arrays only.
+Partial parity for a write operation is the XOR of stripe data chunks not
+modified by the write. PPL is stored in the metadata region of RAID member drives,
+no additional journal drive is needed.
+After crashes, if one of the not modified data disks of
+the stripe is missing, this updated parity can be used to recover its
+data.
+
+This mechanism is documented more fully in the file
+Documentation/md/raid5-ppl.rst
+
 .SS WRITE-BEHIND
 
 From Linux 2.6.14,
@@ -1061,7 +1088,7 @@ which contains various files for providing access to information about
 the array.
 
 This interface is documented more fully in the file
-.B Documentation/md.txt
+.B Documentation/admin-guide/md.rst
 which is distributed with the kernel sources.  That file should be
 consulted for full documentation.  The following are just a selection
 of attribute files that are available.
index a3494a1ceabaefce46925c209e12b9bdba446f01..9ba66825a8120a6ea126c4f6c8c34f0ca948d41e 100644 (file)
@@ -5,7 +5,7 @@
 .\"   the Free Software Foundation; either version 2 of the License, or
 .\"   (at your option) any later version.
 .\" See file COPYING in distribution for details.
-.TH MDADM 8 "" v4.1-rc2
+.TH MDADM 8 "" v4.3
 .SH NAME
 mdadm \- manage MD devices
 .I aka
@@ -158,7 +158,7 @@ adding new spares and removing faulty devices.
 .B Misc
 This is an 'everything else' mode that supports operations on active
 arrays, operations on component devices such as erasing old superblocks, and
-information gathering operations.
+information-gathering operations.
 .\"This mode allows operations on independent devices such as examine MD
 .\"superblocks, erasing old superblocks and stopping active arrays.
 
@@ -231,12 +231,12 @@ mode to be assumed.
 
 .TP
 .BR \-h ", " \-\-help
-Display general help message or, after one of the above options, a
+Display general help message or, after one of the above options, a
 mode-specific help message.
 
 .TP
 .B \-\-help\-options
-Display more detailed help about command line parsing and some commonly
+Display more detailed help about command-line parsing and some commonly
 used options.
 
 .TP
@@ -266,14 +266,11 @@ the exact meaning of this option in different contexts.
 
 .TP
 .BR \-c ", " \-\-config=
-Specify the config file or directory.  Default is to use
-.B /etc/mdadm.conf
-and
-.BR /etc/mdadm.conf.d ,
-or if those are missing then
-.B /etc/mdadm/mdadm.conf
-and
-.BR /etc/mdadm/mdadm.conf.d .
+Specify the config file or directory.  If not specified, the default config file
+and default conf.d directory will be used.  See
+.BR mdadm.conf (5)
+for more details.
+
 If the config file given is
 .B "partitions"
 then nothing will be read, but
@@ -367,7 +364,7 @@ Use the Intel(R) Matrix Storage Manager metadata format.  This creates a
 which is managed in a similar manner to DDF, and is supported by an
 option-rom on some platforms:
 .IP
-.B http://www.intel.com/design/chipsets/matrixstorage_sb.htm
+.B https://www.intel.com/content/www/us/en/support/products/122484
 .PP
 .RE
 
@@ -382,7 +379,7 @@ When creating an array, the
 .B homehost
 will be recorded in the metadata.  For version-1 superblocks, it will
 be prefixed to the array name.  For version-0.90 superblocks, part of
-the SHA1 hash of the hostname will be stored in the later half of the
+the SHA1 hash of the hostname will be stored in the latter half of the
 UUID.
 
 When reporting information about an array, any array which is tagged
@@ -391,7 +388,7 @@ for the given homehost will be reported as such.
 When using Auto-Assemble, only arrays tagged for the given homehost
 will be allowed to use 'local' names (i.e. not ending in '_' followed
 by a digit string).  See below under
-.BR "Auto Assembly" .
+.BR "Auto-Assembly" .
 
 The special name "\fBany\fP" can be used as a wild card.  If an array
 is created with
@@ -406,7 +403,7 @@ When
 .I mdadm
 needs to print the name for a device it normally finds the name in
 .B /dev
-which refers to the device and is shortest.  When a path component is
+which refers to the device and is the shortest.  When a path component is
 given with
 .B \-\-prefer
 .I mdadm
@@ -459,7 +456,8 @@ number of spare devices.
 
 .TP
 .BR \-z ", " \-\-size=
-Amount (in Kilobytes) of space to use from each drive in RAID levels 1/4/5/6.
+Amount (in Kilobytes) of space to use from each drive in RAID levels 1/4/5/6/10
+and for RAID 0 on external metadata.
 This must be a multiple of the chunk size, and must leave about 128Kb
 of space at the end of the drive for the RAID superblock.
 If this is not specified
@@ -478,10 +476,19 @@ To guard against this it can be useful to set the initial size
 slightly smaller than the smaller device with the aim that it will
 still be larger than any replacement.
 
+This option can be used with
+.B \-\-create
+for determining the initial size of an array. For external metadata,
+it can be used on a volume, but not on a container itself.
+Setting the initial size of
+.B RAID 0
+array is only valid for external metadata.
+
 This value can be set with
 .B \-\-grow
-for RAID level 1/4/5/6 though
+for RAID level 1/4/5/6/10 though
 DDF arrays may not be able to support this.
+RAID 0 array size cannot be changed.
 If the array was created with a size smaller than the currently
 active drives, the extra space can be accessed using
 .BR \-\-grow .
@@ -501,11 +508,6 @@ problems the array can be made bigger again with no loss with another
 .B "\-\-grow \-\-size="
 command.
 
-This value cannot be used when creating a
-.B CONTAINER
-such as with DDF and IMSM metadata, though it perfectly valid when
-creating an array inside a container.
-
 .TP
 .BR \-Z ", " \-\-array\-size=
 This is only meaningful with
@@ -543,20 +545,20 @@ Clustered arrays do not support this parameter yet.
 
 .TP
 .BR \-c ", " \-\-chunk=
-Specify chunk size of kilobytes.  The default when creating an
+Specify chunk size in kilobytes.  The default when creating an
 array is 512KB.  To ensure compatibility with earlier versions, the
 default when building an array with no persistent metadata is 64KB.
 This is only meaningful for RAID0, RAID4, RAID5, RAID6, and RAID10.
 
 RAID4, RAID5, RAID6, and RAID10 require the chunk size to be a power
-of 2.  In any case it must be a multiple of 4KB.
+of 2, with minimal chunk size being 4KB.
 
 A suffix of 'K', 'M', 'G' or 'T' can be given to indicate Kilobytes,
 Megabytes, Gigabytes or Terabytes respectively.
 
 .TP
 .BR \-\-rounding=
-Specify rounding factor for a Linear array.  The size of each
+Specify the rounding factor for a Linear array.  The size of each
 component will be rounded down to a multiple of this size.
 This is a synonym for
 .B \-\-chunk
@@ -653,7 +655,8 @@ option to set subsequent failure modes.
 and "flush" will clear any persistent faults.
 
 The layout options for RAID10 are one of 'n', 'o' or 'f' followed
-by a small number.  The default is 'n2'.  The supported options are:
+by a small number signifying the number of copies of each datablock.
+The default is 'n2'.  The supported options are:
 
 .I 'n'
 signals 'near' copies.  Multiple copies of one data block are at
@@ -671,7 +674,7 @@ signals 'far' copies
 (multiple copies have very different offsets).
 See md(4) for more detail about 'near', 'offset', and 'far'.
 
-The number is the number of copies of each datablock.  2 is normal, 3
+As for the number of copies of each data block, 2 is normal, 3
 can be useful.  This number can be at most equal to the number of
 devices in the array.  It does not need to divide evenly into that
 number (e.g. it is perfectly legal to have an 'n2' layout for an array
@@ -682,7 +685,7 @@ A bug introduced in Linux 3.14 means that RAID0 arrays
 started using a different layout.  This could lead to
 data corruption.  Since Linux 5.4 (and various stable releases that received
 backports), the kernel will not accept such an array unless
-a layout is explictly set.  It can be set to
+a layout is explicitly set.  It can be set to
 .RB ' original '
 or
 .RB ' alternate '.
@@ -758,13 +761,13 @@ or by selecting a different consistency policy with
 
 .TP
 .BR \-\-bitmap\-chunk=
-Set the chunksize of the bitmap.  Each bit corresponds to that many
+Set the chunk size of the bitmap.  Each bit corresponds to that many
 Kilobytes of storage.
-When using a file based bitmap, the default is to use the smallest
-size that is at-least 4 and requires no more than 2^21 chunks.
+When using a file-based bitmap, the default is to use the smallest
+size that is at least 4 and requires no more than 2^21 chunks.
 When using an
 .B internal
-bitmap, the chunksize defaults to 64Meg, or larger if necessary to
+bitmap, the chunk size defaults to 64Meg, or larger if necessary to
 fit the bitmap into the available space.
 
 A suffix of 'K', 'M', 'G' or 'T' can be given to indicate Kilobytes,
@@ -834,11 +837,27 @@ array is resynced at creation.  From Linux version 3.0,
 .B \-\-assume\-clean
 can be used with that command to avoid the automatic resync.
 
+.TP
+.BR \-\-write-zeroes
+When creating an array, send write zeroes requests to all the block
+devices.  This should zero the data area on all disks such that the
+initial sync is not necessary and, if successfull, will behave
+as if
+.B \-\-assume\-clean
+was specified.
+.IP
+This is intended for use with devices that have hardware offload for
+zeroing, but despite this zeroing can still take several minutes for
+large disks.  Thus a message is printed before and after zeroing and
+each disk is zeroed in parallel with the others.
+.IP
+This is only meaningful with --create.
+
 .TP
 .BR \-\-backup\-file=
 This is needed when
 .B \-\-grow
-is used to increase the number of raid-devices in a RAID5 or RAID6 if
+is used to increase the number of raid devices in a RAID5 or RAID6 if
 there are no spare devices available, or to shrink, change RAID level
 or layout.  See the GROW MODE section below on RAID\-DEVICES CHANGES.
 The file must be stored on a separate device, not on the RAID array
@@ -877,7 +896,7 @@ When creating an array,
 .B \-\-data\-offset
 can be specified as
 .BR variable .
-In the case each member device is expected to have a offset appended
+In the case each member device is expected to have an offset appended
 to the name, separated by a colon.  This makes it possible to recreate
 exactly an array which has varying data offsets (as can happen when
 different versions of
@@ -913,17 +932,14 @@ option will be ignored.
 .BR \-N ", " \-\-name=
 Set a
 .B name
-for the array.  This is currently only effective when creating an
-array with a version-1 superblock, or an array in a DDF container.
-The name is a simple textual string that can be used to identify array
-components when assembling.  If name is needed but not specified, it
-is taken from the basename of the device that is being created.
-e.g. when creating
-.I /dev/md/home
-the
-.B name
-will default to
-.IR home .
+for the array. It must be
+.BR "POSIX PORTABLE NAME"
+compatible and cannot be longer than 32 chars. This is effective when creating an array
+with a v1 metadata, or an external array.
+
+If name is needed but not specified, it is taken from the basename of the device
+that is being created. See
+.BR "DEVICE NAMES"
 
 .TP
 .BR \-R ", " \-\-run
@@ -941,7 +957,7 @@ Insist that
 .I mdadm
 accept the geometry and layout specified without question.  Normally
 .I mdadm
-will not allow creation of an array with only one device, and will try
+will not allow the creation of an array with only one device, and will try
 to create a RAID5 array with one missing drive (as this makes the
 initial resync work faster).  With
 .BR \-\-force ,
@@ -1002,7 +1018,7 @@ number added, e.g.
 If the md device name is in a 'standard' format as described in DEVICE
 NAMES, then it will be created, if necessary, with the appropriate
 device number based on that name.  If the device name is not in one of these
-formats, then a unused device number will be allocated.  The device
+formats, then an unused device number will be allocated.  The device
 number will be considered unused if there is no active array for that
 number, and there is no entry in /dev for that number and with a
 non-standard name.  Names that are not in 'standard' format are only
@@ -1030,30 +1046,25 @@ then
 .B \-\-add
 can be used to add some extra devices to be included in the array.
 In most cases this is not needed as the extra devices can be added as
-spares first, and then the number of raid-disks can be changed.
-However for RAID0, it is not possible to add spares.  So to increase
+spares first, and then the number of raid disks can be changed.
+However, for RAID0 it is not possible to add spares.  So to increase
 the number of devices in a RAID0, it is necessary to set the new
 number of devices, and to add the new devices, in the same command.
 
 .TP
 .BR \-\-nodes
-Only works when the array is for clustered environment. It specifies
+Only works when the array is created for a clustered environment. It specifies
 the maximum number of nodes in the cluster that will use this device
 simultaneously. If not specified, this defaults to 4.
 
 .TP
 .BR \-\-write-journal
 Specify journal device for the RAID-4/5/6 array. The journal device
-should be a SSD with reasonable lifetime.
-
-.TP
-.BR \-\-symlinks
-Auto creation of symlinks in /dev to /dev/md, option --symlinks must
-be 'no' or 'yes' and work with --create and --build.
+should be an SSD with a reasonable lifetime.
 
 .TP
 .BR \-k ", " \-\-consistency\-policy=
-Specify how the array maintains consistency in case of unexpected shutdown.
+Specify how the array maintains consistency in the case of an unexpected shutdown.
 Only relevant for RAID levels with redundancy.
 Currently supported options are:
 .RS
@@ -1061,7 +1072,7 @@ Currently supported options are:
 .TP
 .B resync
 Full resync is performed and all redundancy is regenerated when the array is
-started after unclean shutdown.
+started after an unclean shutdown.
 
 .TP
 .B bitmap
@@ -1070,8 +1081,8 @@ Resync assisted by a write-intent bitmap. Implicitly selected when using
 
 .TP
 .B journal
-For RAID levels 4/5/6, journal device is used to log transactions and replay
-after unclean shutdown. Implicitly selected when using
+For RAID levels 4/5/6, the journal device is used to log transactions and replay
+after an unclean shutdown. Implicitly selected when using
 .BR \-\-write\-journal .
 
 .TP
@@ -1118,8 +1129,10 @@ is much safer.
 
 .TP
 .BR \-N ", " \-\-name=
-Specify the name of the array to assemble.  This must be the name
-that was specified when creating the array.  It must either match
+Specify the name of the array to assemble. It must be
+.BR "POSIX PORTABLE NAME"
+compatible and cannot be longer than 32 chars. This must be the name
+that was specified when creating the array. It must either match
 the name stored in the superblock exactly, or it must match
 with the current
 .I homehost
@@ -1132,7 +1145,8 @@ out-of-date.  If
 .I mdadm
 cannot find enough working devices to start the array, but can find
 some devices that are recorded as having failed, then it will mark
-those devices as working so that the array can be started.
+those devices as working so that the array can be started. This works only for
+native. For external metadata it allows to start dirty degraded RAID 4, 5, 6.
 An array which requires
 .B \-\-force
 to be started may contain data corruption.  Use it carefully.
@@ -1213,6 +1227,7 @@ argument given to this flag can be one of
 .BR no\-ppl ,
 .BR layout\-original ,
 .BR layout\-alternate ,
+.BR layout\-unspecified ,
 .BR metadata ,
 or
 .BR super\-minor .
@@ -1238,7 +1253,7 @@ This can be useful if
 reports a different "Preferred Minor" to
 .BR \-\-detail .
 In some cases this update will be performed automatically
-by the kernel driver.  In particular the update happens automatically
+by the kernel driver.  In particular, the update happens automatically
 at the first write to an array with redundancy (RAID level 1 or
 greater) on a 2.6 (or later) kernel.
 
@@ -1278,7 +1293,7 @@ For version-1 superblocks, this involves updating the name.
 The
 .B home\-cluster
 option will change the cluster name as recorded in the superblock and
-bitmap. This option only works for clustered environment.
+bitmap. This option only works for clustered environment.
 
 The
 .B resync
@@ -1368,8 +1383,9 @@ The
 .B layout\-original
 and
 .B layout\-alternate
-options are for RAID0 arrays in use before Linux 5.4.  If the array was being
-used with Linux 3.13 or earlier, then to assemble the array on a new kernel,
+options are for RAID0 arrays with non-uniform devices size that were in
+use before Linux 5.4.  If the array was being used with Linux 3.13 or
+earlier, then to assemble the array on a new kernel,
 .B \-\-update=layout\-original
 must be given.  If the array was created and used with a kernel from Linux 3.14 to
 Linux 5.3, then
@@ -1379,22 +1395,27 @@ will happen normally.
 For more information, see
 .IR md (4).
 
+The
+.B layout\-unspecified
+option reverts the effect of
+.B layout\-orignal
+or
+.B layout\-alternate
+and allows the array to be again used on a kernel prior to Linux 5.3.
+This option should be used with great caution.
+
 .TP
 .BR \-\-freeze\-reshape
-Option is intended to be used in start-up scripts during initrd boot phase.
-When array under reshape is assembled during initrd phase, this option
-stops reshape after reshape critical section is being restored. This happens
-before file system pivot operation and avoids loss of file system context.
+This option is intended to be used in start-up scripts during the initrd boot phase.
+When the array under reshape is assembled during the initrd phase, this option
+stops the reshape after the reshape-critical section has been restored. This happens
+before the file system pivot operation and avoids loss of filesystem context.
 Losing file system context would cause reshape to be broken.
 
 Reshape can be continued later using the
 .B \-\-continue
 option for the grow command.
 
-.TP
-.BR \-\-symlinks
-See this option under Create and Build options.
-
 .SH For Manage mode:
 
 .TP
@@ -1432,9 +1453,9 @@ re\-add a device that was previously removed from an array.
 If the metadata on the device reports that it is a member of the
 array, and the slot that it used is still vacant, then the device will
 be added back to the array in the same position.  This will normally
-cause the data for that device to be recovered.  However based on the
+cause the data for that device to be recovered.  However, based on the
 event count on the device, the recovery may only require sections that
-are flagged a write-intent bitmap to be recovered or may not require
+are flagged by a write-intent bitmap to be recovered or may not require
 any recovery at all.
 
 When used on an array that has no metadata (i.e. it was built with
@@ -1442,13 +1463,12 @@ When used on an array that has no metadata (i.e. it was built with
 it will be assumed that bitmap-based recovery is enough to make the
 device fully consistent with the array.
 
-When used with v1.x metadata,
 .B \-\-re\-add
-can be accompanied by
+can also be accompanied by
 .BR \-\-update=devicesize ,
 .BR \-\-update=bbl ", or"
 .BR \-\-update=no\-bbl .
-See the description of these option when used in Assemble mode for an
+See descriptions of these options when used in Assemble mode for an
 explanation of their use.
 
 If the device name given is
@@ -1475,7 +1495,7 @@ Add a device as a spare.  This is similar to
 except that it does not attempt
 .B \-\-re\-add
 first.  The device will be added as a spare even if it looks like it
-could be an recent member of the array.
+could be a recent member of the array.
 
 .TP
 .BR \-r ", " \-\-remove
@@ -1492,12 +1512,12 @@ and names like
 .B set-A
 can be given to
 .BR \-\-remove .
-The first causes all failed device to be removed.  The second causes
+The first causes all failed devices to be removed.  The second causes
 any device which is no longer connected to the system (i.e an 'open'
 returns
 .BR ENXIO )
 to be removed.
-The third will remove a set as describe below under
+The third will remove a set as described below under
 .BR \-\-fail .
 
 .TP
@@ -1514,7 +1534,7 @@ For RAID10 arrays where the number of copies evenly divides the number
 of devices, the devices can be conceptually divided into sets where
 each set contains a single complete copy of the data on the array.
 Sometimes a RAID10 array will be configured so that these sets are on
-separate controllers.  In this case all the devices in one set can be
+separate controllers.  In this case, all the devices in one set can be
 failed by giving a name like
 .B set\-A
 or
@@ -1544,9 +1564,9 @@ This can follow a list of
 .B \-\-replace
 devices.  The devices listed after
 .B \-\-with
-will be preferentially used to replace the devices listed after
+will preferentially be used to replace the devices listed after
 .BR \-\-replace .
-These device must already be spare devices in the array.
+These devices must already be spare devices in the array.
 
 .TP
 .BR \-\-write\-mostly
@@ -1569,8 +1589,8 @@ the device is found or <slot>:missing in case the device is not found.
 
 .TP
 .BR \-\-add-journal
-Add journal to an existing array, or recreate journal for RAID-4/5/6 array
-that lost a journal device. To avoid interrupting on-going write opertions,
+Add a journal to an existing array, or recreate journal for a RAID-4/5/6 array
+that lost a journal device. To avoid interrupting ongoing write operations,
 .B \-\-add-journal
 only works for array in Read-Only state.
 
@@ -1626,9 +1646,9 @@ Print details of one or more md devices.
 .TP
 .BR \-\-detail\-platform
 Print details of the platform's RAID capabilities (firmware / hardware
-topology) for a given metadata format. If used without argument, mdadm
+topology) for a given metadata format. If used without an argument, mdadm
 will scan all controllers looking for their capabilities. Otherwise, mdadm
-will only look at the controller specified by the argument in form of an
+will only look at the controller specified by the argument in the form of an
 absolute filepath or a link, e.g.
 .IR /sys/devices/pci0000:00/0000:00:1f.2 .
 
@@ -1695,9 +1715,11 @@ does not report the bitmap for that array.
 .TP
 .B \-\-examine\-badblocks
 List the bad-blocks recorded for the device, if a bad-blocks list has
-been configured.  Currently only
+been configured. Currently only
 .B 1.x
-metadata supports bad-blocks lists.
+and
+.B IMSM
+metadata support bad-blocks lists.
 
 .TP
 .BI \-\-dump= directory
@@ -1735,8 +1757,8 @@ the block where the superblock would be is overwritten even if it
 doesn't appear to be valid.
 
 .B Note:
-Be careful to call \-\-zero\-superblock with clustered raid, make sure
-array isn't used or assembled in other cluster node before execute it.
+Be careful when calling \-\-zero\-superblock with clustered raid. Make sure
+the array isn't used or assembled in another cluster node before executing it.
 
 .TP
 .B \-\-kill\-subarray=
@@ -1783,7 +1805,7 @@ For each md device given, or each device in /proc/mdstat if
 is given, arrange for the array to be marked clean as soon as possible.
 .I mdadm
 will return with success if the array uses external metadata and we
-successfully waited.  For native arrays this returns immediately as the
+successfully waited.  For native arrays, this returns immediately as the
 kernel handles dirty-clean transitions at shutdown.  No action is taken
 if safe-mode handling is disabled.
 
@@ -1823,7 +1845,7 @@ uses to help track which arrays are currently being assembled.
 
 .TP
 .BR \-\-run ", " \-R
-Run any array assembled as soon as a minimal number of devices are
+Run any array assembled as soon as a minimal number of devices is
 available, rather than waiting until all expected devices are present.
 
 .TP
@@ -1853,7 +1875,7 @@ Only used with \-\-fail.  The 'path' given will be recorded so that if
 a new device appears at the same location it can be automatically
 added to the same array.  This allows the failed device to be
 automatically replaced by a new device without metadata if it appears
-at specified path.   This option is normally only set by a
+at specified path.   This option is normally only set by an
 .I udev
 script.
 
@@ -1954,7 +1976,7 @@ Usage:
 .PP
 This usage assembles one or more RAID arrays from pre-existing components.
 For each array, mdadm needs to know the md device, the identity of the
-array, and a number of component-devices.  These can be found in a number of ways.
+array, and the number of component devices.  These can be found in a number of ways.
 
 In the first usage example (without the
 .BR \-\-scan )
@@ -1994,11 +2016,9 @@ The config file is only used if explicitly named with
 .B \-\-config
 or requested with (a possibly implicit)
 .BR \-\-scan .
-In the later case,
-.B /etc/mdadm.conf
-or
-.B /etc/mdadm/mdadm.conf
-is used.
+In the latter case, the default config file is used.  See
+.BR mdadm.conf (5)
+for more details.
 
 If
 .B \-\-scan
@@ -2034,14 +2054,14 @@ detects that udev is not configured, it will create the devices in
 .B /dev
 itself.
 
-In Linux kernels prior to version 2.6.28 there were two distinctly
-different types of md devices that could be created: one that could be
+In Linux kernels prior to version 2.6.28 there were two distinct
+types of md devices that could be created: one that could be
 partitioned using standard partitioning tools and one that could not.
-Since 2.6.28 that distinction is no longer relevant as both type of
+Since 2.6.28 that distinction is no longer relevant as both types of
 devices can be partitioned.
 .I mdadm
 will normally create the type that originally could not be partitioned
-as it has a well defined major number (9).
+as it has a well-defined major number (9).
 
 Prior to 2.6.28, it is important that mdadm chooses the correct type
 of array device to use.  This can be controlled with the
@@ -2061,7 +2081,7 @@ can also be given in the configuration file as a word starting
 .B auto=
 on the ARRAY line for the relevant array.
 
-.SS Auto Assembly
+.SS Auto-Assembly
 When
 .B \-\-assemble
 is used with
@@ -2117,11 +2137,11 @@ See
 .IR mdadm.conf (5)
 for further details.
 
-Note: Auto assembly cannot be used for assembling and activating some
+Note: Auto-assembly cannot be used for assembling and activating some
 arrays which are undergoing reshape.  In particular as the
 .B backup\-file
-cannot be given, any reshape which requires a backup-file to continue
-cannot be started by auto assembly.  An array which is growing to more
+cannot be given, any reshape which requires a backup file to continue
+cannot be started by auto-assembly.  An array which is growing to more
 devices and has passed the critical section can be assembled using
 auto-assembly.
 
@@ -2158,20 +2178,28 @@ Usage:
 .I md-device
 .BI \-\-chunk= X
 .BI \-\-level= Y
-.br
 .BI \-\-raid\-devices= Z
 .I devices
 
 .PP
-This usage will initialise a new md array, associate some devices with
+This usage will initialize a new md array, associate some devices with
 it, and activate the array.
 
+.I md-device
+is a new device. This could be standard name or chosen name. For details see:
+.BR "DEVICE NAMES"
+
 The named device will normally not exist when
 .I "mdadm \-\-create"
 is run, but will be created by
 .I udev
 once the array becomes active.
 
+The max length md-device name is limited to 32 characters.
+Different metadata types have more strict limitation
+(like IMSM where only 16 characters are allowed).
+For that reason, long name could be truncated or rejected, it depends on metadata policy.
+
 As devices are added, they are checked to see if they contain RAID
 superblocks or filesystems.  They are also checked to see if the variance in
 device size exceeds 1%.
@@ -2201,29 +2229,11 @@ array.  This feature can be overridden with the
 .B \-\-force
 option.
 
-When creating an array with version-1 metadata a name for the array is
-required.
-If this is not given with the
-.B \-\-name
-option,
-.I mdadm
-will choose a name based on the last component of the name of the
-device being created.  So if
-.B /dev/md3
-is being created, then the name
-.B 3
-will be chosen.
-If
-.B /dev/md/home
-is being created, then the name
-.B home
-will be used.
-
 When creating a partition based array, using
 .I mdadm
 with version-1.x metadata, the partition type should be set to
 .B 0xDA
-(non fs-data).  This type selection allows for greater precision since
+(non fs-data).  This type of selection allows for greater precision since
 using any other [RAID auto-detect (0xFD) or a GNU/Linux partition (0x83)],
 might create problems in the event of array recovery through a live cdrom.
 
@@ -2239,7 +2249,7 @@ when creating a v0.90 array will silently override any
 setting.
 .\"If the
 .\".B \-\-size
-.\"option is given, it is not necessary to list any component-devices in this command.
+.\"option is given, it is not necessary to list any component devices in this command.
 .\"They can be added later, before a
 .\".B \-\-run.
 .\"If no
@@ -2253,7 +2263,7 @@ requested with the
 .B \-\-bitmap
 option or a different consistency policy is selected with the
 .B \-\-consistency\-policy
-option. In any case space for a bitmap will be reserved so that one
+option. In any case, space for a bitmap will be reserved so that one
 can be added later with
 .BR "\-\-grow \-\-bitmap=internal" .
 
@@ -2303,7 +2313,7 @@ will firstly mark
 as faulty in
 .B /dev/md0
 and will then remove it from the array and finally add it back
-in as a spare.  However only one md array can be affected by a single
+in as a spare.  However, only one md array can be affected by a single
 command.
 
 When a device is added to an active array, mdadm checks to see if it
@@ -2395,18 +2405,18 @@ or
 .B \-\-update=
 option. The supported options are
 .BR name ,
-.B ppl
+.BR ppl ,
+.BR no\-ppl ,
+.BR bitmap
 and
-.BR no\-ppl .
+.BR no\-bitmap .
 
 The
 .B name
-option updates the subarray name in the metadata, it may not affect the
-device node name or the device node symlink until the subarray is
-re\-assembled.  If updating
-.B name
-would change the UUID of an active subarray this operation is blocked,
-and the command will end in an error.
+option updates the subarray name in the metadata. It must be
+.BR "POSIX PORTABLE NAME"
+compatible and cannot be longer than 32 chars. If successes, new value will be respected after
+next assembly.
 
 The
 .B ppl
@@ -2415,6 +2425,13 @@ and
 options enable and disable PPL in the metadata. Currently supported only for
 IMSM subarrays.
 
+The
+.B bitmap
+and
+.B no\-bitmap
+options enable and disable write-intent bitmap in the metadata. Currently supported only for
+IMSM subarrays.
+
 .TP
 .B \-\-examine
 The device should be a component of an md array.
@@ -2439,14 +2456,14 @@ config file to be examined.
 If the device contains RAID metadata, a file will be created in the
 .I directory
 and the metadata will be written to it.  The file will be the same
-size as the device and have the metadata written in the file at the
-same locate that it exists in the device.  However the file will be "sparse" so
+size as the device and will have the metadata written at the
+same location as it exists in the device.  However, the file will be "sparse" so
 that only those blocks containing metadata will be allocated. The
 total space used will be small.
 
-The file name used in the
+The filename used in the
 .I directory
-will be the base name of the device.   Further if any links appear in
+will be the base name of the device.   Further, if any links appear in
 .I /dev/disk/by-id
 which point to the device, then hard links to the file will be created
 in
@@ -2529,13 +2546,33 @@ Usage:
 .I options... devices...
 
 .PP
-This usage causes
+Monitor option can work in two modes:
+.IP \(bu 4
+system wide mode, follow all md devices based on
+.B /proc/mdstat,
+.IP \(bu 4
+follow only specified MD devices in command line.
+.PP
+
+.B \-\-scan -
+indicates system wide mode. Option causes the
+.I monitor
+to track all md devices that appear in
+.B /proc/mdstat.
+If it is not set, then at least one
+.B device
+must be specified.
+
+Monitor usage causes
 .I mdadm
 to periodically poll a number of md arrays and to report on any events
 noticed.
-.I mdadm
-will never exit once it decides that there are arrays to be checked,
-so it should normally be run in the background.
+
+In both modes,
+.I monitor
+will work as long as there is an active array with redundancy and it is defined to follow (for
+.B \-\-scan
+every array is followed).
 
 As well as reporting events,
 .I mdadm
@@ -2546,15 +2583,6 @@ or
 .B domain
 and if the destination array has a failed drive but no spares.
 
-If any devices are listed on the command line,
-.I mdadm
-will only monitor those devices.  Otherwise all arrays listed in the
-configuration file will be monitored.  Further, if
-.B \-\-scan
-is given, then any other md devices that appear in
-.B /proc/mdstat
-will also be monitored.
-
 The result of monitoring the arrays is the generation of events.
 These events are passed to a separate program (if specified) and may
 be mailed to a given E-mail address.
@@ -2567,16 +2595,34 @@ device if relevant (such as a component device that has failed).
 
 If
 .B \-\-scan
-is given, then a program or an E-mail address must be specified on the
-command line or in the config file.  If neither are available, then
+is given, then a
+.B program
+or an
+.B e-mail
+address must be specified on the
+command line or in the config file. If neither are available, then
 .I mdadm
 will not monitor anything.
-Without
-.B \-\-scan,
-.I mdadm
-will continue monitoring as long as something was found to monitor.  If
-no program or email is given, then each event is reported to
-.BR stdout .
+For devices given directly in command line, without
+.B program
+or
+.B email
+specified, each event is reported to
+.BR stdout.
+
+Note: For systems where
+.If mdadm monitor
+is configured via systemd,
+.B mdmonitor(mdmonitor.service)
+should be configured. The service is designed to be primary solution for array monitoring,
+it is configured to work in system wide mode.
+It is automatically started and stopped according to current state and types of MD arrays in system.
+The service may require additional configuration, like
+.B e-mail
+or
+.B delay.
+That should be done in
+.B mdadm.conf.
 
 The different events are:
 
@@ -2605,10 +2651,10 @@ check, repair). (syslog priority: Warning)
 .BI Rebuild NN
 Where
 .I NN
-is a two-digit number (ie. 05, 48). This indicates that rebuild
-has passed that many percent of the total. The events are generated
-with fixed increment since 0. Increment size may be specified with
-a commandline option (default is 20). (syslog priority: Warning)
+is a two-digit number (eg. 05, 48). This indicates that the rebuild
+has reached that percentage of the total. The events are generated
+at a fixed increment from 0. The increment size may be specified with
+a command-line option (the default is 20). (syslog priority: Warning)
 
 .TP
 .B RebuildFinished
@@ -2716,8 +2762,8 @@ When
 detects that an array in a spare group has fewer active
 devices than necessary for the complete array, and has no spare
 devices, it will look for another array in the same spare group that
-has a full complement of working drive and a spare.  It will then
-attempt to remove the spare from the second drive and add it to the
+has a full complement of working drives and a spare.  It will then
+attempt to remove the spare from the second array and add it to the
 first.
 If the removal succeeds but the adding fails, then it is added back to
 the original array.
@@ -2731,10 +2777,8 @@ and then follow similar steps as above if a matching spare is found.
 .SH GROW MODE
 The GROW mode is used for changing the size or shape of an active
 array.
-For this to work, the kernel must support the necessary change.
-Various types of growth are being added during 2.6 development.
 
-Currently the supported changes include
+During the kernel 2.6 era the following changes were added:
 .IP \(bu 4
 change the "size" attribute for RAID1, RAID4, RAID5 and RAID6.
 .IP \(bu 4
@@ -2777,8 +2821,8 @@ use more than half of a spare device for backup space.
 
 .SS SIZE CHANGES
 Normally when an array is built the "size" is taken from the smallest
-of the drives.  If all the small drives in an arrays are, one at a
-time, removed and replaced with larger drives, then you could have an
+of the drives.  If all the small drives in an arrays are, over time,
+removed and replaced with larger drives, then you could have an
 array of large drives with only a small amount used.  In this
 situation, changing the "size" with "GROW" mode will allow the extra
 space to start being used.  If the size is increased in this way, a
@@ -2793,7 +2837,7 @@ after growing, or to reduce its size
 .B prior
 to shrinking the array.
 
-Also the size of an array cannot be changed while it has an active
+Also, the size of an array cannot be changed while it has an active
 bitmap.  If an array has a bitmap, it must be removed before the size
 can be changed. Once the change is complete a new bitmap can be created.
 
@@ -2873,12 +2917,12 @@ long time.  A
 is required.  If the array is not simultaneously being grown or
 shrunk, so that the array size will remain the same - for example,
 reshaping a 3-drive RAID5 into a 4-drive RAID6 - the backup file will
-be used not just for a "cricital section" but throughout the reshape
+be used not just for a "critical section" but throughout the reshape
 operation, as described below under LAYOUT CHANGES.
 
 .SS CHUNK-SIZE AND LAYOUT CHANGES
 
-Changing the chunk-size of layout without also changing the number of
+Changing the chunk-size or layout without also changing the number of
 devices as the same time will involve re-writing all blocks in-place.
 To ensure against data loss in the case of a crash, a
 .B --backup-file
@@ -2891,7 +2935,7 @@ slowly.
 If the reshape is interrupted for any reason, this backup file must be
 made available to
 .B "mdadm --assemble"
-so the array can be reassembled.  Consequently the file cannot be
+so the array can be reassembled.  Consequently, the file cannot be
 stored on the device being reshaped.
 
 
@@ -3135,6 +3179,11 @@ environment.  This can be useful for testing or for disaster
 recovery.  You should be aware that interoperability may be
 compromised by setting this value.
 
+These change can also be suppressed by adding
+.B mdadm.imsm.test=1
+to the kernel command line. This makes it easy to test IMSM
+code in a virtual machine that doesn't have IMSM virtual hardware.
+
 .TP
 .B MDADM_GROW_ALLOW_OLD
 If an array is stopped while it is performing a reshape and that
@@ -3311,24 +3360,27 @@ uses this to find arrays when
 is given in Misc mode, and to monitor array reconstruction
 on Monitor mode.
 
-.SS /etc/mdadm.conf
+.SS {CONFFILE} (or {CONFFILE2})
 
-The config file lists which devices may be scanned to see if
-they contain MD super block, and gives identifying information
-(e.g. UUID) about known MD arrays.  See
+Default config file.  See
 .BR mdadm.conf (5)
 for more details.
 
-.SS /etc/mdadm.conf.d
+.SS {CONFFILE}.d (or {CONFFILE2}.d)
 
-A directory containing configuration files which are read in lexical
-order.
+Default directory containing configuration files.  See
+.BR mdadm.conf (5)
+for more details.
 
 .SS {MAP_PATH}
 When
 .B \-\-incremental
 mode is used, this file gets a list of arrays currently being created.
 
+.SH POSIX PORTABLE NAME
+A valid name can only consist of characters "A-Za-z0-9.-_".
+The name cannot start with a leading "-" and cannot exceed 255 chars.
+
 .SH DEVICE NAMES
 
 .I mdadm
@@ -3350,6 +3402,10 @@ can be given, or just the suffix of the second sort of name, such as
 .I home
 can be given.
 
+In every style, raw name must be compatible with
+.BR "POSIX PORTABLE NAME"
+and has to be no longer than 32 chars.
+
 When
 .I mdadm
 chooses device names during auto-assembly or incremental assembly, it
@@ -3398,6 +3454,25 @@ is any string.  These names are supported by
 since version 3.3 provided they are enabled in
 .IR mdadm.conf .
 
+.SH UNDERSTANDING OUTPUT
+
+.TP
+EXAMINE
+
+.TP
+.B checkpoint
+Checkpoint value is reported when array is performing some action including
+resync, recovery or reshape. Checkpoints allow resuming action from certain
+point if it was interrupted.
+
+Checkpoint is reported as combination of two values: current migration unit
+and number of blocks per unit. By multiplying those values and dividing by
+array size checkpoint progress percentage can be obtained in relation to
+current progress reported in /proc/mdstat. Checkpoint is also related to (and
+sometimes based on) sysfs entry sync_completed but depending on action units
+may differ. Even if units are the same, it should not be expected that
+checkpoint and sync_completed will be exact match nor updated simultaneously.
+
 .SH NOTE
 .I mdadm
 was previously known as
@@ -3407,7 +3482,7 @@ was previously known as
 For further information on mdadm usage, MD and the various levels of
 RAID, see:
 .IP
-.B http://raid.wiki.kernel.org/
+.B https://raid.wiki.kernel.org/
 .PP
 (based upon Jakob \(/Ostergaard's Software\-RAID.HOWTO)
 .PP
@@ -3415,7 +3490,7 @@ The latest version of
 .I mdadm
 should always be available from
 .IP
-.B http://www.kernel.org/pub/linux/utils/raid/mdadm/
+.B https://www.kernel.org/pub/linux/utils/raid/mdadm/
 .PP
 Related man pages:
 .PP
diff --git a/mdadm.c b/mdadm.c
index 13dc24e4426923c1ba450fc68e47cfdf33498c5c..d18619db86bf162ee02b2db29beef0da17781321 100644 (file)
--- a/mdadm.c
+++ b/mdadm.c
@@ -49,7 +49,6 @@ int main(int argc, char *argv[])
        int i;
 
        unsigned long long array_size = 0;
-       unsigned long long data_offset = INVALID_SECTORS;
        struct mddev_ident ident;
        char *configfile = NULL;
        int devmode = 0;
@@ -59,7 +58,6 @@ int main(int argc, char *argv[])
        struct mddev_dev *dv;
        mdu_array_info_t array;
        int devs_found = 0;
-       char *symlinks = NULL;
        int grow_continue = 0;
        /* autof indicates whether and how to create device node.
         * bottom 3 bits are style.  Rest (when shifted) are number of parts
@@ -80,6 +78,7 @@ int main(int argc, char *argv[])
                .layout         = UnSet,
                .bitmap_chunk   = UnSet,
                .consistency_policy     = CONSISTENCY_POLICY_UNKNOWN,
+               .data_offset = INVALID_SECTORS,
        };
 
        char sys_hostname[256];
@@ -101,32 +100,20 @@ int main(int argc, char *argv[])
        char *dump_directory = NULL;
 
        int print_help = 0;
-       FILE *outf;
+       FILE *outf = NULL;
 
        int mdfd = -1;
        int locked = 0;
 
        srandom(time(0) ^ getpid());
 
-       ident.uuid_set = 0;
-       ident.level = UnSet;
-       ident.raid_disks = UnSet;
-       ident.super_minor = UnSet;
-       ident.devices = 0;
-       ident.spare_group = NULL;
-       ident.autof = 0;
-       ident.st = NULL;
-       ident.bitmap_fd = -1;
-       ident.bitmap_file = NULL;
-       ident.name[0] = 0;
-       ident.container = NULL;
-       ident.member = NULL;
-
-       if (get_linux_version() < 2006015) {
-               pr_err("This version of mdadm does not support kernels older than 2.6.15\n");
+       if (get_linux_version() < 2006032) {
+               pr_err("This version of mdadm does not support kernels older than 2.6.32\n");
                exit(1);
        }
 
+       ident_init(&ident);
+
        while ((option_index = -1),
               (opt = getopt_long(argc, argv, shortopt, long_options,
                                  &option_index)) != -1) {
@@ -167,7 +154,7 @@ int main(int argc, char *argv[])
                        continue;
 
                case HomeHost:
-                       if (strcasecmp(optarg, "<ignore>") == 0)
+                       if (is_devname_ignore(optarg) == true)
                                c.require_homehost = 0;
                        else
                                c.homehost = optarg;
@@ -227,6 +214,7 @@ int main(int argc, char *argv[])
                        shortopt = short_bitmap_auto_options;
                        break;
                case 'F': newmode = MONITOR;
+                       shortopt = short_monitor_options;
                        break;
                case 'G': newmode = GROW;
                        shortopt = short_bitmap_options;
@@ -280,8 +268,8 @@ int main(int argc, char *argv[])
                        else
                                fprintf(stderr, "-%c", opt);
                        fprintf(stderr, " would set mdadm mode to \"%s\", but it is already set to \"%s\".\n",
-                               map_num(modes, newmode),
-                               map_num(modes, mode));
+                               map_num_s(modes, newmode),
+                               map_num_s(modes, mode));
                        exit(2);
                } else if (!mode && newmode) {
                        mode = newmode;
@@ -479,15 +467,15 @@ int main(int argc, char *argv[])
 
                case O(CREATE,DataOffset):
                case O(GROW,DataOffset):
-                       if (data_offset != INVALID_SECTORS) {
+                       if (s.data_offset != INVALID_SECTORS) {
                                pr_err("data-offset may only be specified one. Second value is %s.\n", optarg);
                                exit(2);
                        }
                        if (mode == CREATE && strcmp(optarg, "variable") == 0)
-                               data_offset = VARIABLE_OFFSET;
+                               s.data_offset = VARIABLE_OFFSET;
                        else
-                               data_offset = parse_size(optarg);
-                       if (data_offset == INVALID_SECTORS) {
+                               s.data_offset = parse_size(optarg);
+                       if (s.data_offset == INVALID_SECTORS) {
                                pr_err("invalid data-offset: %s\n",
                                        optarg);
                                exit(2);
@@ -544,7 +532,7 @@ int main(int argc, char *argv[])
                        switch(s.level) {
                        default:
                                pr_err("layout not meaningful for %s arrays.\n",
-                                       map_num(pers, s.level));
+                                       map_num_s(pers, s.level));
                                exit(2);
                        case UnSet:
                                pr_err("raid level must be given before layout.\n");
@@ -602,6 +590,10 @@ int main(int argc, char *argv[])
                        s.assume_clean = 1;
                        continue;
 
+               case O(CREATE, WriteZeroes):
+                       s.write_zeroes = 1;
+                       continue;
+
                case O(GROW,'n'):
                case O(CREATE,'n'):
                case O(BUILD,'n'): /* number of raid disks */
@@ -610,8 +602,7 @@ int main(int argc, char *argv[])
                                        s.raiddisks, optarg);
                                exit(2);
                        }
-                       s.raiddisks = parse_num(optarg);
-                       if (s.raiddisks <= 0) {
+                       if (parse_num(&s.raiddisks, optarg) != 0 || s.raiddisks <= 0) {
                                pr_err("invalid number of raid devices: %s\n",
                                        optarg);
                                exit(2);
@@ -621,8 +612,7 @@ int main(int argc, char *argv[])
                case O(ASSEMBLE, Nodes):
                case O(GROW, Nodes):
                case O(CREATE, Nodes):
-                       c.nodes = parse_num(optarg);
-                       if (c.nodes < 2) {
+                       if (parse_num(&c.nodes, optarg) != 0 || c.nodes < 2) {
                                pr_err("clustered array needs two nodes at least: %s\n",
                                        optarg);
                                exit(2);
@@ -647,8 +637,7 @@ int main(int argc, char *argv[])
                                        s.level);
                                exit(2);
                        }
-                       s.sparedisks = parse_num(optarg);
-                       if (s.sparedisks < 0) {
+                       if (parse_num(&s.sparedisks, optarg) != 0 || s.sparedisks < 0) {
                                pr_err("invalid number of spare-devices: %s\n",
                                        optarg);
                                exit(2);
@@ -665,13 +654,6 @@ int main(int argc, char *argv[])
                case O(ASSEMBLE,Auto): /* auto-creation of device node */
                        c.autof = parse_auto(optarg, "--auto flag", 0);
                        continue;
-
-               case O(CREATE,Symlinks):
-               case O(BUILD,Symlinks):
-               case O(ASSEMBLE,Symlinks): /* auto creation of symlinks in /dev to /dev/md */
-                       symlinks = optarg;
-                       continue;
-
                case O(BUILD,'f'): /* force honouring '-n 1' */
                case O(BUILD,Force): /* force honouring '-n 1' */
                case O(GROW,'f'): /* ditto */
@@ -708,20 +690,14 @@ int main(int argc, char *argv[])
                case O(CREATE,'N'):
                case O(ASSEMBLE,'N'):
                case O(MISC,'N'):
-                       if (ident.name[0]) {
-                               pr_err("name cannot be set twice.   Second value %s.\n", optarg);
-                               exit(2);
-                       }
                        if (mode == MISC && !c.subarray) {
                                pr_err("-N/--name only valid with --update-subarray in misc mode\n");
                                exit(2);
                        }
-                       if (strlen(optarg) > 32) {
-                               pr_err("name '%s' is too long, 32 chars max.\n",
-                                       optarg);
+
+                       if (ident_set_name(&ident, optarg) != MDADM_STATUS_SUCCESS)
                                exit(2);
-                       }
-                       strcpy(ident.name, optarg);
+
                        continue;
 
                case O(ASSEMBLE,'m'): /* super-minor for array */
@@ -732,12 +708,9 @@ int main(int argc, char *argv[])
                        }
                        if (strcmp(optarg, "dev") == 0)
                                ident.super_minor = -2;
-                       else {
-                               ident.super_minor = parse_num(optarg);
-                               if (ident.super_minor < 0) {
-                                       pr_err("Bad super-minor number: %s.\n", optarg);
-                                       exit(2);
-                               }
+                       else if (parse_num(&ident.super_minor, optarg) != 0 || ident.super_minor < 0) {
+                               pr_err("Bad super-minor number: %s.\n", optarg);
+                               exit(2);
                        }
                        continue;
 
@@ -748,90 +721,50 @@ int main(int argc, char *argv[])
                        continue;
 
                case O(ASSEMBLE,'U'): /* update the superblock */
-               case O(MISC,'U'):
+               case O(MISC,'U'): {
+                       enum update_opt print_mode = UOPT_HELP;
+                       const char *error_addon = "update option";
+
                        if (c.update) {
                                pr_err("Can only update one aspect of superblock, both %s and %s given.\n",
-                                       c.update, optarg);
+                                       map_num(update_options, c.update), optarg);
                                exit(2);
                        }
                        if (mode == MISC && !c.subarray) {
                                pr_err("Only subarrays can be updated in misc mode\n");
                                exit(2);
                        }
-                       c.update = optarg;
-                       if (strcmp(c.update, "sparc2.2") == 0)
-                               continue;
-                       if (strcmp(c.update, "super-minor") == 0)
-                               continue;
-                       if (strcmp(c.update, "summaries") == 0)
-                               continue;
-                       if (strcmp(c.update, "resync") == 0)
-                               continue;
-                       if (strcmp(c.update, "uuid") == 0)
-                               continue;
-                       if (strcmp(c.update, "name") == 0)
-                               continue;
-                       if (strcmp(c.update, "homehost") == 0)
-                               continue;
-                       if (strcmp(c.update, "home-cluster") == 0)
-                               continue;
-                       if (strcmp(c.update, "nodes") == 0)
-                               continue;
-                       if (strcmp(c.update, "devicesize") == 0)
-                               continue;
-                       if (strcmp(c.update, "no-bitmap") == 0)
-                               continue;
-                       if (strcmp(c.update, "bbl") == 0)
-                               continue;
-                       if (strcmp(c.update, "no-bbl") == 0)
-                               continue;
-                       if (strcmp(c.update, "force-no-bbl") == 0)
-                               continue;
-                       if (strcmp(c.update, "ppl") == 0)
-                               continue;
-                       if (strcmp(c.update, "no-ppl") == 0)
-                               continue;
-                       if (strcmp(c.update, "metadata") == 0)
-                               continue;
-                       if (strcmp(c.update, "revert-reshape") == 0)
-                               continue;
-                       if (strcmp(c.update, "layout-original") == 0 ||
-                           strcmp(c.update, "layout-alternate") == 0)
-                               continue;
-                       if (strcmp(c.update, "byteorder") == 0) {
+
+                       c.update = map_name(update_options, optarg);
+
+                       if (devmode == UpdateSubarray) {
+                               print_mode = UOPT_SUBARRAY_ONLY;
+                               error_addon = "update-subarray option";
+
+                               if (c.update > UOPT_SUBARRAY_ONLY && c.update < UOPT_HELP)
+                                       c.update = UOPT_UNDEFINED;
+                       }
+
+                       switch (c.update) {
+                       case UOPT_UNDEFINED:
+                               pr_err("'--update=%s' is invalid %s. ",
+                                       optarg, error_addon);
+                               outf = stderr;
+                       case UOPT_HELP:
+                               if (!outf)
+                                       outf = stdout;
+                               fprint_update_options(outf, print_mode);
+                               exit(outf == stdout ? 0 : 2);
+                       case UOPT_BYTEORDER:
                                if (ss) {
                                        pr_err("must not set metadata type with --update=byteorder.\n");
                                        exit(2);
                                }
-                               for(i = 0; !ss && superlist[i]; i++)
-                                       ss = superlist[i]->match_metadata_desc(
-                                               "0.swap");
-                               if (!ss) {
-                                       pr_err("INTERNAL ERROR cannot find 0.swap\n");
-                                       exit(2);
-                               }
-
-                               continue;
+                       default:
+                               break;
                        }
-                       if (strcmp(c.update,"?") == 0 ||
-                           strcmp(c.update, "help") == 0) {
-                               outf = stdout;
-                               fprintf(outf, "%s: ", Name);
-                       } else {
-                               outf = stderr;
-                               fprintf(outf,
-                                       "%s: '--update=%s' is invalid.  ",
-                                       Name, c.update);
-                       }
-                       fprintf(outf, "Valid --update options are:\n"
-               "     'sparc2.2', 'super-minor', 'uuid', 'name', 'nodes', 'resync',\n"
-               "     'summaries', 'homehost', 'home-cluster', 'byteorder', 'devicesize',\n"
-               "     'no-bitmap', 'metadata', 'revert-reshape'\n"
-               "     'bbl', 'no-bbl', 'force-no-bbl', 'ppl', 'no-ppl'\n"
-               "     'layout-original', 'layout-alternate'\n"
-                               );
-                       exit(outf == stdout ? 0 : 2);
-
+                       continue;
+               }
                case O(MANAGE,'U'):
                        /* update=devicesize is allowed with --re-add */
                        if (devmode != 'A') {
@@ -840,14 +773,14 @@ int main(int argc, char *argv[])
                        }
                        if (c.update) {
                                pr_err("Can only update one aspect of superblock, both %s and %s given.\n",
-                                       c.update, optarg);
+                                       map_num(update_options, c.update), optarg);
                                exit(2);
                        }
-                       c.update = optarg;
-                       if (strcmp(c.update, "devicesize") != 0 &&
-                           strcmp(c.update, "bbl") != 0 &&
-                           strcmp(c.update, "force-no-bbl") != 0 &&
-                           strcmp(c.update, "no-bbl") != 0) {
+                       c.update = map_name(update_options, optarg);
+                       if (c.update != UOPT_DEVICESIZE &&
+                           c.update != UOPT_BBL &&
+                           c.update != UOPT_NO_BBL &&
+                           c.update != UOPT_FORCE_NO_BBL) {
                                pr_err("only 'devicesize', 'bbl', 'no-bbl', and 'force-no-bbl' can be updated with --re-add\n");
                                exit(2);
                        }
@@ -904,8 +837,8 @@ int main(int argc, char *argv[])
 
                case O(MONITOR,'r'): /* rebuild increments */
                case O(MONITOR,Increment):
-                       increments = atoi(optarg);
-                       if (increments > 99 || increments < 1) {
+                       if (parse_num(&increments, optarg) != 0
+                               || increments > 99 || increments < 1) {
                                pr_err("please specify positive integer between 1 and 99 as rebuild increments.\n");
                                exit(2);
                        }
@@ -916,15 +849,10 @@ int main(int argc, char *argv[])
                case O(BUILD,'d'): /* delay for bitmap updates */
                case O(CREATE,'d'):
                        if (c.delay)
-                               pr_err("only specify delay once. %s ignored.\n",
-                                       optarg);
-                       else {
-                               c.delay = parse_num(optarg);
-                               if (c.delay < 1) {
-                                       pr_err("invalid delay: %s\n",
-                                               optarg);
-                                       exit(2);
-                               }
+                               pr_err("only specify delay once. %s ignored.\n", optarg);
+                       else if (parse_num(&c.delay, optarg) != 0 || c.delay < 1) {
+                               pr_err("invalid delay: %s\n", optarg);
+                               exit(2);
                        }
                        continue;
                case O(MONITOR,'f'): /* daemonise */
@@ -1171,7 +1099,7 @@ int main(int argc, char *argv[])
                                exit(2);
                        }
                        if (strcmp(optarg, "internal") == 0 ||
-                           strcmp(optarg, "none") == 0 ||
+                           strcmp(optarg, STR_COMMON_NONE) == 0 ||
                            strchr(optarg, '/') != NULL) {
                                s.bitmap_file = optarg;
                                continue;
@@ -1206,18 +1134,16 @@ int main(int argc, char *argv[])
 
                case O(GROW, WriteBehind):
                case O(BUILD, WriteBehind):
-               case O(CREATE, WriteBehind): /* write-behind mode */
+               case O(CREATE, WriteBehind):
                        s.write_behind = DEFAULT_MAX_WRITE_BEHIND;
-                       if (optarg) {
-                               s.write_behind = parse_num(optarg);
-                               if (s.write_behind < 0 ||
-                                   s.write_behind > 16383) {
-                                       pr_err("Invalid value for maximum outstanding write-behind writes: %s.\n\tMust be between 0 and 16383.\n", optarg);
-                                       exit(2);
-                               }
+                       if (optarg &&
+                           (parse_num(&s.write_behind, optarg) != 0 ||
+                            s.write_behind < 0 || s.write_behind > 16383)) {
+                               pr_err("Invalid value for maximum outstanding write-behind writes: %s.\n\tMust be between 0 and 16383.\n",
+                                               optarg);
+                               exit(2);
                        }
                        continue;
-
                case O(INCREMENTAL, 'r'):
                case O(INCREMENTAL, RebuildMapOpt):
                        rebuild_map = 1;
@@ -1259,10 +1185,10 @@ int main(int argc, char *argv[])
                if (option_index > 0)
                        pr_err(":option --%s not valid in %s mode\n",
                                long_options[option_index].name,
-                               map_num(modes, mode));
+                               map_num_s(modes, mode));
                else
                        pr_err("option -%c not valid in %s mode\n",
-                               opt, map_num(modes, mode));
+                               opt, map_num_s(modes, mode));
                exit(2);
 
        }
@@ -1287,7 +1213,7 @@ int main(int argc, char *argv[])
                if (s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN &&
                    s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
                        pr_err("--write-journal is not supported with consistency policy: %s\n",
-                              map_num(consistency_policies, s.consistency_policy));
+                              map_num_s(consistency_policies, s.consistency_policy));
                        exit(2);
                }
        }
@@ -1296,33 +1222,37 @@ int main(int argc, char *argv[])
            s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN) {
                if (s.level <= 0) {
                        pr_err("--consistency-policy not meaningful with level %s.\n",
-                              map_num(pers, s.level));
+                              map_num_s(pers, s.level));
                        exit(2);
                } else if (s.consistency_policy == CONSISTENCY_POLICY_JOURNAL &&
                           !s.journaldisks) {
                        pr_err("--write-journal is required for consistency policy: %s\n",
-                              map_num(consistency_policies, s.consistency_policy));
+                              map_num_s(consistency_policies, s.consistency_policy));
                        exit(2);
                } else if (s.consistency_policy == CONSISTENCY_POLICY_PPL &&
                           s.level != 5) {
                        pr_err("PPL consistency policy is only supported for RAID level 5.\n");
                        exit(2);
                } else if (s.consistency_policy == CONSISTENCY_POLICY_BITMAP &&
-                          (!s.bitmap_file ||
-                           strcmp(s.bitmap_file, "none") == 0)) {
+                         (!s.bitmap_file || str_is_none(s.bitmap_file) == true)) {
                        pr_err("--bitmap is required for consistency policy: %s\n",
-                              map_num(consistency_policies, s.consistency_policy));
+                              map_num_s(consistency_policies, s.consistency_policy));
                        exit(2);
                } else if (s.bitmap_file &&
-                          strcmp(s.bitmap_file, "none") != 0 &&
+                          str_is_none(s.bitmap_file) == false &&
                           s.consistency_policy != CONSISTENCY_POLICY_BITMAP &&
                           s.consistency_policy != CONSISTENCY_POLICY_JOURNAL) {
                        pr_err("--bitmap is not compatible with consistency policy: %s\n",
-                              map_num(consistency_policies, s.consistency_policy));
+                              map_num_s(consistency_policies, s.consistency_policy));
                        exit(2);
                }
        }
 
+       if (s.write_zeroes && !s.assume_clean) {
+               pr_info("Disk zeroing requested, setting --assume-clean to skip resync\n");
+               s.assume_clean = 1;
+       }
+
        if (!mode && devs_found) {
                mode = MISC;
                devmode = 'Q';
@@ -1334,18 +1264,6 @@ int main(int argc, char *argv[])
                exit(2);
        }
 
-       if (symlinks) {
-               struct createinfo *ci = conf_get_create_info();
-
-               if (strcasecmp(symlinks, "yes") == 0)
-                       ci->symlinks = 1;
-               else if (strcasecmp(symlinks, "no") == 0)
-                       ci->symlinks = 0;
-               else {
-                       pr_err("option --symlinks must be 'no' or 'yes'\n");
-                       exit(2);
-               }
-       }
        /* Ok, got the option parsing out of the way
         * hopefully it's mostly right but there might be some stuff
         * missing
@@ -1358,42 +1276,45 @@ int main(int argc, char *argv[])
 
        if (mode == MANAGE || mode == BUILD || mode == CREATE ||
            mode == GROW || (mode == ASSEMBLE && ! c.scan)) {
+               struct stat stb;
+               int ret;
+
                if (devs_found < 1) {
                        pr_err("an md device must be given in this mode\n");
                        exit(2);
                }
+               if (ident_set_devname(&ident, devlist->devname) != MDADM_STATUS_SUCCESS)
+                       exit(1);
+
                if ((int)ident.super_minor == -2 && c.autof) {
                        pr_err("--super-minor=dev is incompatible with --auto\n");
                        exit(2);
                }
                if (mode == MANAGE || mode == GROW) {
-                       mdfd = open_mddev(devlist->devname, 1);
+                       mdfd = open_mddev(ident.devname, 1);
                        if (mdfd < 0)
                                exit(1);
-               } else
-                       /* non-existent device is OK */
-                       mdfd = open_mddev(devlist->devname, 0);
-               if (mdfd == -2) {
-                       pr_err("device %s exists but is not an md array.\n", devlist->devname);
-                       exit(1);
-               }
-               if ((int)ident.super_minor == -2) {
-                       struct stat stb;
-                       if (mdfd < 0) {
+
+                       ret = fstat(mdfd, &stb);
+                       if (ret) {
+                               pr_err("fstat failed on %s.\n", ident.devname);
+                               exit(1);
+                       }
+               } else {
+                       ret = stat(ident.devname, &stb);
+                       if (ident.super_minor == -2 && ret != 0) {
                                pr_err("--super-minor=dev given, and listed device %s doesn't exist.\n",
-                                       devlist->devname);
+                                      ident.devname);
+                               exit(1);
+                       }
+
+                       if (!ret && !stat_is_md_dev(&stb)) {
+                               pr_err("device %s exists but is not an md array.\n", ident.devname);
                                exit(1);
                        }
-                       fstat(mdfd, &stb);
-                       ident.super_minor = minor(stb.st_rdev);
-               }
-               if (mdfd >= 0 && mode != MANAGE && mode != GROW) {
-                       /* We don't really want this open yet, we just might
-                        * have wanted to check some things
-                        */
-                       close(mdfd);
-                       mdfd = -1;
                }
+               if (ident.super_minor == -2)
+                       ident.super_minor = minor(stb.st_rdev);
        }
 
        if (s.raiddisks) {
@@ -1408,8 +1329,7 @@ int main(int argc, char *argv[])
        if (c.homehost == NULL && c.require_homehost)
                c.homehost = conf_get_homehost(&c.require_homehost);
        if (c.homehost == NULL || strcasecmp(c.homehost, "<system>") == 0) {
-               if (gethostname(sys_hostname, sizeof(sys_hostname)) == 0) {
-                       sys_hostname[sizeof(sys_hostname)-1] = 0;
+               if (s_gethostname(sys_hostname, sizeof(sys_hostname)) == 0) {
                        c.homehost = sys_hostname;
                }
        }
@@ -1433,7 +1353,12 @@ int main(int argc, char *argv[])
                }
        }
 
-       if (c.backup_file && data_offset != INVALID_SECTORS) {
+       if (c.update && c.update == UOPT_NODES && c.nodes == 0) {
+               pr_err("Please specify nodes number with --nodes\n");
+               exit(1);
+       }
+
+       if (c.backup_file && s.data_offset != INVALID_SECTORS) {
                pr_err("--backup-file and --data-offset are incompatible\n");
                exit(2);
        }
@@ -1472,17 +1397,17 @@ int main(int argc, char *argv[])
        case MANAGE:
                /* readonly, add/remove, readwrite, runstop */
                if (c.readonly > 0)
-                       rv = Manage_ro(devlist->devname, mdfd, c.readonly);
-               if (!rv && devs_found>1)
-                       rv = Manage_subdevs(devlist->devname, mdfd,
-                                           devlist->next, c.verbose, c.test,
-                                           c.update, c.force);
+                       rv = Manage_ro(ident.devname, mdfd, c.readonly);
+               if (!rv && devs_found > 1)
+                       rv = Manage_subdevs(ident.devname, mdfd,
+                                           devlist->next, c.verbose,
+                                           c.test, c.update, c.force);
                if (!rv && c.readonly < 0)
-                       rv = Manage_ro(devlist->devname, mdfd, c.readonly);
+                       rv = Manage_ro(ident.devname, mdfd, c.readonly);
                if (!rv && c.runstop > 0)
-                       rv = Manage_run(devlist->devname, mdfd, &c);
+                       rv = Manage_run(ident.devname, mdfd, &c);
                if (!rv && c.runstop < 0)
-                       rv = Manage_stop(devlist->devname, mdfd, c.verbose, 0);
+                       rv = Manage_stop(ident.devname, mdfd, c.verbose, 0);
                break;
        case ASSEMBLE:
                if (!c.scan && c.runstop == -1) {
@@ -1492,22 +1417,19 @@ int main(int argc, char *argv[])
                           ident.super_minor == UnSet && ident.name[0] == 0 &&
                           !c.scan) {
                        /* Only a device has been given, so get details from config file */
-                       struct mddev_ident *array_ident = conf_get_ident(devlist->devname);
+                       struct mddev_ident *array_ident = conf_get_ident(ident.devname);
                        if (array_ident == NULL) {
-                               pr_err("%s not identified in config file.\n",
-                                       devlist->devname);
+                               pr_err("%s not identified in config file.\n", ident.devname);
                                rv |= 1;
                                if (mdfd >= 0)
                                        close(mdfd);
                        } else {
                                if (array_ident->autof == 0)
                                        array_ident->autof = c.autof;
-                               rv |= Assemble(ss, devlist->devname, array_ident,
-                                              NULL, &c);
+                               rv |= Assemble(ss, ident.devname, array_ident, NULL, &c);
                        }
                } else if (!c.scan)
-                       rv = Assemble(ss, devlist->devname, &ident,
-                                     devlist->next, &c);
+                       rv = Assemble(ss, ident.devname, &ident, devlist->next, &c);
                else if (devs_found > 0) {
                        if (c.update && devs_found > 1) {
                                pr_err("can only update a single array at a time\n");
@@ -1565,7 +1487,7 @@ int main(int argc, char *argv[])
                                break;
                        }
                }
-               rv = Build(devlist->devname, devlist->next, &s, &c);
+               rv = Build(&ident, devlist->next, &s, &c);
                break;
        case CREATE:
                if (c.delay == 0)
@@ -1602,10 +1524,7 @@ int main(int argc, char *argv[])
                        break;
                }
 
-               rv = Create(ss, devlist->devname,
-                           ident.name, ident.uuid_set ? ident.uuid : NULL,
-                           devs_found-1, devlist->next,
-                           &s, &c, data_offset);
+               rv = Create(ss, &ident, devs_found - 1, devlist->next, &s, &c);
                break;
        case MISC:
                if (devmode == 'E') {
@@ -1651,10 +1570,8 @@ int main(int argc, char *argv[])
                        break;
                }
                if (c.delay == 0) {
-                       if (get_linux_version() > 2006016)
-                               /* mdstat responds to poll */
-                               c.delay = 1000;
-                       else
+                       c.delay = conf_get_monitor_delay();
+                       if (!c.delay)
                                c.delay = 60;
                }
                rv = Monitor(devlist, mailaddr, program,
@@ -1704,8 +1621,7 @@ int main(int argc, char *argv[])
                                break;
                        }
                        for (dv = devlist->next; dv; dv = dv->next) {
-                               rv = Grow_Add_device(devlist->devname, mdfd,
-                                                    dv->devname);
+                               rv = Grow_Add_device(ident.devname, mdfd, dv->devname);
                                if (rv)
                                        break;
                        }
@@ -1718,19 +1634,15 @@ int main(int argc, char *argv[])
                        }
                        if (c.delay == 0)
                                c.delay = DEFAULT_BITMAP_DELAY;
-                       rv = Grow_addbitmap(devlist->devname, mdfd, &c, &s);
+                       rv = Grow_addbitmap(ident.devname, mdfd, &c, &s);
                } else if (grow_continue)
-                       rv = Grow_continue_command(devlist->devname,
-                                                  mdfd, c.backup_file,
-                                                  c.verbose);
+                       rv = Grow_continue_command(ident.devname, mdfd, &c);
                else if (s.size > 0 || s.raiddisks || s.layout_str ||
                         s.chunk != 0 || s.level != UnSet ||
-                        data_offset != INVALID_SECTORS) {
-                       rv = Grow_reshape(devlist->devname, mdfd,
-                                         devlist->next,
-                                         data_offset, &c, &s);
+                        s.data_offset != INVALID_SECTORS) {
+                       rv = Grow_reshape(ident.devname, mdfd, devlist->next, &c, &s);
                } else if (s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN) {
-                       rv = Grow_consistency_policy(devlist->devname, mdfd, &c, &s);
+                       rv = Grow_consistency_policy(ident.devname, mdfd, &c, &s);
                } else if (array_size == 0)
                        pr_err("no changes to --grow\n");
                break;
@@ -1776,10 +1688,13 @@ int main(int argc, char *argv[])
                autodetect();
                break;
        }
+       if (ss) {
+               ss->ss->free_super(ss);
+               free(ss);
+       }
        if (locked)
                cluster_release_dlmlock();
-       if (mdfd > 0)
-               close(mdfd);
+       close_fd(&mdfd);
        exit(rv);
 }
 
@@ -1817,8 +1732,7 @@ static int scan_assemble(struct supertype *ss,
                        int r;
                        if (a->assembled)
                                continue;
-                       if (a->devname &&
-                           strcasecmp(a->devname, "<ignore>") == 0)
+                       if (a->devname && is_devname_ignore(a->devname) == true)
                                continue;
 
                        r = Assemble(ss, a->devname,
@@ -2001,7 +1915,7 @@ static int misc_list(struct mddev_dev *devlist,
                        rv |= Kill_subarray(dv->devname, c->subarray, c->verbose);
                        continue;
                case UpdateSubarray:
-                       if (c->update == NULL) {
+                       if (!c->update) {
                                pr_err("-U/--update must be specified with --update-subarray\n");
                                rv |= 1;
                                continue;
@@ -2033,6 +1947,11 @@ static int misc_list(struct mddev_dev *devlist,
                                rv |= Manage_run(dv->devname, mdfd, c);
                                break;
                        case 'S':
+                               if (c->scan) {
+                                       pr_err("--stop not meaningful with both a --scan assembly and a device name.\n");
+                                       rv |= 1;
+                                       break;
+                               }
                                rv |= Manage_stop(dv->devname, mdfd, c->verbose, 0);
                                break;
                        case 'o':
similarity index 86%
rename from mdadm.conf.5
rename to mdadm.conf.5.in
index 27dbab18455d145790e502e101b1a02947ef7d13..14302a91d34e2d4bde07237a8c54870b48b2d561 100644 (file)
@@ -8,7 +8,7 @@
 .SH NAME
 mdadm.conf \- configuration for management of Software RAID with mdadm
 .SH SYNOPSIS
-/etc/mdadm.conf
+{CONFFILE}
 .SH DESCRIPTION
 .PP
 .I mdadm
@@ -88,7 +88,8 @@ but only the major and minor device numbers.  It scans
 .I /dev
 to find the name that matches the numbers.
 
-If no DEVICE line is present, then "DEVICE partitions containers" is assumed.
+If no DEVICE line is present in any config file,
+then "DEVICE partitions containers" is assumed.
 
 For example:
 .IP
@@ -132,13 +133,6 @@ The value should be a 128 bit uuid in hexadecimal, with punctuation
 interspersed if desired.  This must match the uuid stored in the
 superblock.
 .TP
-.B name=
-The value should be a simple textual name as was given to
-.I mdadm
-when the array was created.  This must match the name stored in the
-superblock on a device for that device to be included in the array.
-Not all superblock formats support names.
-.TP
 .B super\-minor=
 The value is an integer which indicates the minor number that was
 stored in the superblock when the array was created. When an array is
@@ -272,6 +266,10 @@ catenated with spaces to form the address.
 Note that this value cannot be set via the
 .I mdadm
 commandline.  It is only settable via the config file.
+There should only be one
+.B MAILADDR
+line and it should have only one address.  Any subsequent addresses
+are silently ignored.
 
 .TP
 .B PROGRAM
@@ -286,7 +284,8 @@ device.
 
 There should only be one
 .B program
-line and it should be give only one program.
+line and it should be given only one program.  Any subsequent programs
+are silently ignored.
 
 
 .TP
@@ -295,7 +294,14 @@ The
 .B create
 line gives default values to be used when creating arrays, new members
 of arrays, and device entries for arrays.
-These include:
+
+There should only be one
+.B create
+line.  Any subsequent lines will override the previous settings.
+
+Keywords used in the
+.I CREATE
+line and supported values are:
 
 .RS 4
 .TP
@@ -325,21 +331,6 @@ missing device entries should be created.
 The name of the metadata format to use if none is explicitly given.
 This can be useful to impose a system-wide default of version-1 superblocks.
 
-.TP
-.B symlinks=no
-Normally when creating devices in
-.B /dev/md/
-.I mdadm
-will create a matching symlink from
-.B /dev/
-with a name starting
-.B md
-or
-.BR md_ .
-Give
-.B symlinks=no
-to suppress this symlink creation.
-
 .TP
 .B names=yes
 Since Linux 2.6.29 it has been possible to create
@@ -426,6 +417,23 @@ from any possible local name. e.g.
 .B /dev/md/1_1
 or
 .BR /dev/md/home_0 .
+
+.TP
+.B HOMECLUSTER
+The
+.B homcluster
+line gives a default value for the
+.B \-\-homecluster=
+option to mdadm.  It specifies  the  cluster name for the md device.
+The md device can be assembled only on the cluster which matches
+the name specified. If
+.B homcluster
+is not provided, mdadm tries to detect the cluster name automatically.
+
+There should only be one
+.B homecluster
+line.  Any subsequent lines will be silently ignored.
+
 .TP
 .B AUTO
 A list of names of metadata format can be given, each preceded by a
@@ -475,8 +483,8 @@ The known metadata types are
 
 .B AUTO
 should be given at most once.  Subsequent lines are silently ignored.
-Thus an earlier config file in a config directory will over-ride
-the setting in a later config file.
+Thus a later config file in a config directory will not overwrite
+the setting in an earlier config file.
 
 .TP
 .B POLICY
@@ -505,7 +513,7 @@ Keywords used in the
 .I POLICY
 line and supported values are:
 
-.RS 7
+.RS 4
 .TP
 .B domain=
 any arbitrary string
@@ -589,9 +597,12 @@ found.
 
 .TP
 .B SYSFS
-The SYSFS line lists custom values of MD device's sysfs attributes which will be
+The
+.B SYSFS
+line lists custom values of MD device's sysfs attributes which will be
 stored in sysfs after the array is assembled. Multiple lines are allowed and each
 line has to contain the uuid or the name of the device to which it relates.
+Lines are applied in reverse order.
 .RS 4
 .TP
 .B uuid=
@@ -604,8 +615,75 @@ name of the MD device as was given to
 when the array was created. It will be ignored if
 .B uuid
 is not empty.
+.RE
+
 .TP
-.RS 7
+.B MONITORDELAY
+The
+.B monitordelay
+line gives a delay in seconds
+.I mdadm
+shall wait before pooling md arrays
+when
+.I mdadm
+is running in
+.B \-\-monitor
+mode.
+.B \-d/\-\-delay
+command line argument takes precedence over the config file.
+
+If multiple
+.B MINITORDELAY
+lines are provided, only first non-zero value is considered.
+
+.TP
+.B ENCRYPTION_NO_VERIFY
+The
+.B ENCRYPTION_NO_VERIFY
+disables encryption verification for devices with particular encryption support detected.
+Currently, only verification of SATA OPAL encryption can be disabled.
+It does not disable ATA security encryption verification.
+Currently effective only for
+.I IMSM
+metadata.
+Available parameter
+.I "sata_opal".
+
+
+.SH FILES
+
+.SS {CONFFILE}
+
+The default config file location, used when
+.I mdadm
+is running without --config option.
+
+.SS {CONFFILE}.d
+
+The default directory with config files. Used when
+.I mdadm
+is running without --config option, after successful reading of the
+.B {CONFFILE}
+default config file. Files in that directory
+are read in lexical order.
+
+
+.SS {CONFFILE2}
+
+Alternative config file that is read, when
+.I mdadm
+is running without --config option and the
+.B {CONFFILE}
+default config file was not opened successfully.
+
+.SS {CONFFILE2}.d
+
+The alternative directory with config files. Used when
+.I mdadm
+is runninng without --config option, after reading the
+.B {CONFFILE2}
+alternative config file whether it was successful or not. Files in
+that directory are read in lexical order.
 
 .SH EXAMPLE
 DEVICE /dev/sd[bcdjkl]1
@@ -646,10 +724,6 @@ ARRAY /dev/md/home UUID=9187a482:5dde19d9:eea3cc4a:d646ab8b
 .br
            auto=part
 .br
-# The name of this array contains a space.
-.br
-ARRAY /dev/md9 name='Data Storage'
-.sp
 POLICY domain=domain1 metadata=imsm path=pci-0000:00:1f.2-scsi-*
 .br
            action=spare
@@ -682,6 +756,10 @@ SYSFS name=/dev/md/raid5 group_thread_cnt=4 sync_speed_max=1000000
 .br
 SYSFS uuid=bead5eb6:31c17a27:da120ba2:7dfda40d group_thread_cnt=4
 sync_speed_max=1000000
+.br
+MONITORDELAY 60
+.br
+ENCRYPTION_NO_VERIFY sata_opal
 
 .SH SEE ALSO
 .BR mdadm (8),
diff --git a/mdadm.h b/mdadm.h
index d94569f903b0aebf90f5a3dc44868104de820e0c..2ff3e46383cde40e15542799a615956e62fe83ed 100644 (file)
--- a/mdadm.h
+++ b/mdadm.h
@@ -33,8 +33,10 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
 # endif
 #endif
 
+#include       <assert.h>
 #include       <sys/types.h>
 #include       <sys/stat.h>
+#include       <stdarg.h>
 #include       <stdint.h>
 #include       <stdlib.h>
 #include       <time.h>
@@ -45,6 +47,8 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
 #include       <errno.h>
 #include       <string.h>
 #include       <syslog.h>
+#include       <stdbool.h>
+#include       <signal.h>
 /* Newer glibc requires sys/sysmacros.h directly for makedev() */
 #include       <sys/sysmacros.h>
 #ifdef __dietlibc__
@@ -96,6 +100,22 @@ struct dlm_lksb {
 #define DEFAULT_BITMAP_DELAY 5
 #define DEFAULT_MAX_WRITE_BEHIND 256
 
+/* DEV_NUM_PREF is a subpath to numbered MD devices, e.g. /dev/md1 or directory name.
+ * DEV_NUM_PREF_LEN is a length with Null byte excluded.
+ */
+#ifndef DEV_NUM_PREF
+#define DEV_NUM_PREF "/dev/md"
+#define DEV_NUM_PREF_LEN (sizeof(DEV_NUM_PREF) - 1)
+#endif /* DEV_NUM_PREF */
+
+/* DEV_MD_DIR points to named MD devices directory.
+ * DEV_MD_DIR_LEN is a length with Null byte excluded.
+ */
+#ifndef DEV_MD_DIR
+#define DEV_MD_DIR "/dev/md/"
+#define DEV_MD_DIR_LEN (sizeof(DEV_MD_DIR) - 1)
+#endif /* DEV_MD_DIR */
+
 /* MAP_DIR should be somewhere that persists across the pivotroot
  * from early boot to late boot.
  * /run  seems to have emerged as the best standard.
@@ -129,6 +149,14 @@ struct dlm_lksb {
 #define FAILED_SLOTS_DIR "/run/mdadm/failed-slots"
 #endif /* FAILED_SLOTS */
 
+#ifndef MDMON_SERVICE
+#define MDMON_SERVICE "mdmon"
+#endif /* MDMON_SERVICE */
+
+#ifndef GROW_SERVICE
+#define GROW_SERVICE "mdadm-grow-continue"
+#endif /* GROW_SERVICE */
+
 #include       "md_u.h"
 #include       "md_p.h"
 #include       "bitmap.h"
@@ -263,6 +291,14 @@ static inline void __put_unaligned32(__u32 val, void *p)
 
 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
 
+#define KIB_TO_BYTES(x)        ((x) << 10)
+#define SEC_TO_BYTES(x)        ((x) << 9)
+
+/**
+ * This is true for native and DDF, IMSM allows 16.
+ */
+#define MD_NAME_MAX 32
+
 extern const char Name[];
 
 struct md_bb_entry {
@@ -341,9 +377,13 @@ struct mdinfo {
        int container_member; /* for assembling external-metatdata arrays
                               * This is to be used internally by metadata
                               * handler only */
-       int container_enough; /* flag external handlers can set to
-                              * indicate that subarrays have not enough (-1),
-                              * enough to start (0), or all expected disks (1) */
+       /**
+        * flag external handlers can set to indicate that subarrays have:
+        * - not enough disks to start (-1),
+        * - enough disks to start (0),
+        * - all expected disks (1).
+        */
+       int container_enough;
        char            sys_name[32];
        struct mdinfo *devs;
        struct mdinfo *next;
@@ -384,17 +424,24 @@ struct createinfo {
        int     gid;
        int     autof;
        int     mode;
-       int     symlinks;
        int     names;
        int     bblist;
        struct supertype *supertype;
 };
 
 struct spare_criteria {
+       bool criteria_set;
        unsigned long long min_size;
        unsigned int sector_size;
+       struct dev_policy *pols;
 };
 
+typedef enum mdadm_status {
+       MDADM_STATUS_SUCCESS = 0,
+       MDADM_STATUS_ERROR,
+       MDADM_STATUS_UNDEF,
+} mdadm_status_t;
+
 enum mode {
        ASSEMBLE=1,
        BUILD,
@@ -409,6 +456,7 @@ enum mode {
 };
 
 extern char short_options[];
+extern char short_monitor_options[];
 extern char short_bitmap_options[];
 extern char short_bitmap_auto_options[];
 extern struct option long_options[];
@@ -423,6 +471,7 @@ extern char Version[], Usage[], Help[], OptionHelp[],
  */
 enum special_options {
        AssumeClean = 300,
+       WriteZeroes,
        BitmapChunk,
        WriteBehind,
        ReAdd,
@@ -431,7 +480,6 @@ enum special_options {
        BackupFile,
        HomeHost,
        AutoHomeHost,
-       Symlinks,
        AutoDetect,
        Waitclean,
        DetailPlatform,
@@ -486,6 +534,51 @@ enum special_options {
        ConsistencyPolicy,
 };
 
+enum update_opt {
+       UOPT_NAME = 1,
+       UOPT_PPL,
+       UOPT_NO_PPL,
+       UOPT_BITMAP,
+       UOPT_NO_BITMAP,
+       UOPT_SUBARRAY_ONLY,
+       UOPT_SPARC22,
+       UOPT_SUPER_MINOR,
+       UOPT_SUMMARIES,
+       UOPT_RESYNC,
+       UOPT_UUID,
+       UOPT_HOMEHOST,
+       UOPT_HOME_CLUSTER,
+       UOPT_NODES,
+       UOPT_DEVICESIZE,
+       UOPT_BBL,
+       UOPT_NO_BBL,
+       UOPT_FORCE_NO_BBL,
+       UOPT_METADATA,
+       UOPT_REVERT_RESHAPE,
+       UOPT_LAYOUT_ORIGINAL,
+       UOPT_LAYOUT_ALTERNATE,
+       UOPT_LAYOUT_UNSPECIFIED,
+       UOPT_BYTEORDER,
+       UOPT_HELP,
+       UOPT_USER_ONLY,
+       /*
+        * Code specific options, cannot be set by the user
+        */
+       UOPT_SPEC_FORCE_ONE,
+       UOPT_SPEC_FORCE_ARRAY,
+       UOPT_SPEC_ASSEMBLE,
+       UOPT_SPEC_LINEAR_GROW_NEW,
+       UOPT_SPEC_LINEAR_GROW_UPDATE,
+       UOPT_SPEC__RESHAPE_PROGRESS,
+       UOPT_SPEC_WRITEMOSTLY,
+       UOPT_SPEC_READWRITE,
+       UOPT_SPEC_FAILFAST,
+       UOPT_SPEC_NOFAILFAST,
+       UOPT_SPEC_REVERT_RESHAPE_NOBACKUP,
+       UOPT_UNDEFINED
+};
+extern void fprint_update_options(FILE *outf, enum update_opt update_mode);
+
 enum prefix_standard {
        JEDEC,
        IEC
@@ -501,6 +594,11 @@ enum flag_mode {
        FlagDefault, FlagSet, FlagClear,
 };
 
+typedef enum {
+       ROLLBACK_METADATA_CHANGES,
+       APPLY_METADATA_CHANGES
+} change_dir_t;
+
 /* structures read from config file */
 /* List of mddevice names and identifiers
  * Identifiers can be:
@@ -517,7 +615,7 @@ struct mddev_ident {
 
        int     uuid_set;
        int     uuid[4];
-       char    name[33];
+       char    name[MD_NAME_MAX + 1];
 
        int super_minor;
 
@@ -560,7 +658,7 @@ struct context {
        int     export;
        int     test;
        char    *subarray;
-       char    *update;
+       enum    update_opt update;
        int     scan;
        int     SparcAdjust;
        int     autof;
@@ -574,7 +672,9 @@ struct context {
 };
 
 struct shape {
+       char    *dev;
        int     raiddisks;
+       int     delta_disks;
        int     sparedisks;
        int     journaldisks;
        int     level;
@@ -584,9 +684,12 @@ struct shape {
        int     bitmap_chunk;
        char    *bitmap_file;
        int     assume_clean;
+       bool    write_zeroes;
        int     write_behind;
        unsigned long long size;
+       unsigned long long data_offset;
        int     consistency_policy;
+       change_dir_t direction;
 };
 
 /* List of device names - wildcards expanded */
@@ -628,7 +731,7 @@ struct mdstat_ent {
 extern struct mdstat_ent *mdstat_read(int hold, int start);
 extern void mdstat_close(void);
 extern void free_mdstat(struct mdstat_ent *ms);
-extern void mdstat_wait(int seconds);
+extern int mdstat_wait(int seconds);
 extern void mdstat_wait_fd(int fd, const sigset_t *sigmask);
 extern int mddev_busy(char *devnm);
 extern struct mdstat_ent *mdstat_by_component(char *name);
@@ -681,6 +784,10 @@ enum sysfs_read_flags {
        GET_DEVS_ALL    = (1 << 27),
 };
 
+#define SYSFS_MAX_BUF_SIZE 64
+
+extern void sysfs_get_container_devnm(struct mdinfo *mdi, char *buf);
+
 /* If fd >= 0, get the array it is open on,
  * else use devnm.
  */
@@ -712,7 +819,7 @@ extern int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev,
 extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
                         char *name, char *val, int size);
 extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms);
-extern int sysfs_set_array(struct mdinfo *info, int vers);
+extern int sysfs_set_array(struct mdinfo *info);
 extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume);
 extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
 extern int sysfs_unique_holder(char *devnm, long rdev);
@@ -754,18 +861,19 @@ extern int restore_stripes(int *dest, unsigned long long *offsets,
                           int source, unsigned long long read_offset,
                           unsigned long long start, unsigned long long length,
                           char *src_buf);
+extern bool sysfs_is_libata_allow_tpm_enabled(const int verbose);
 
 #ifndef Sendmail
 #define Sendmail "/usr/lib/sendmail -t"
 #endif
 
 #define SYSLOG_FACILITY LOG_DAEMON
-
+extern char *map_num_s(mapping_t *map, int num);
 extern char *map_num(mapping_t *map, int num);
 extern int map_name(mapping_t *map, char *name);
 extern mapping_t r0layout[], r5layout[], r6layout[],
        pers[], modes[], faultylayout[];
-extern mapping_t consistency_policies[], sysfs_array_states[];
+extern mapping_t consistency_policies[], sysfs_array_states[], update_options[];
 
 extern char *map_dev_preferred(int major, int minor, int create,
                               char *prefer);
@@ -774,6 +882,42 @@ static inline char *map_dev(int major, int minor, int create)
        return map_dev_preferred(major, minor, create, NULL);
 }
 
+/**
+ * is_fd_valid() - check file descriptor.
+ * @fd: file descriptor.
+ *
+ * The function checks if @fd is nonnegative integer and shall be used only
+ * to verify open() result.
+ */
+static inline int is_fd_valid(int fd)
+{
+       return (fd > -1);
+}
+
+/**
+ * is_level456() - check whether given level is between inclusive 4 and 6.
+ * @level: level to check.
+ *
+ * Return: true if condition is met, false otherwise
+ */
+static inline bool is_level456(int level)
+{
+       return (level >= 4 && level <= 6);
+}
+
+/**
+ * close_fd() - verify, close and unset file descriptor.
+ * @fd: pointer to file descriptor.
+ *
+ * The function closes and invalidates file descriptor if appropriative. It
+ * ignores incorrect file descriptor quitely to simplify error handling.
+ */
+static inline void close_fd(int *fd)
+{
+       if (is_fd_valid(*fd) && close(*fd) == 0)
+               *fd = -1;
+}
+
 struct active_array;
 struct metadata_update;
 
@@ -806,6 +950,23 @@ struct reshape {
        unsigned long long new_size; /* New size of array in sectors */
 };
 
+/**
+ * struct dev_policy - Data structure for policy management.
+ * @next: pointer to next dev_policy.
+ * @name: policy name, category.
+ * @metadata: the metadata type it affects.
+ * @value: value of the policy.
+ *
+ * The functions to manipulate dev_policy lists do not free elements, so they must be statically
+ * allocated. @name and @metadata can be compared by address.
+ */
+typedef struct dev_policy {
+       struct dev_policy *next;
+       char *name;
+       const char *metadata;
+       const char *value;
+} dev_policy_t;
+
 /* A superswitch provides entry point to a metadata handler.
  *
  * The superswitch primarily operates on some "metadata" that
@@ -918,7 +1079,7 @@ extern struct superswitch {
         *                    it will resume going in the opposite direction.
         */
        int (*update_super)(struct supertype *st, struct mdinfo *info,
-                           char *update,
+                           enum update_opt update,
                            char *devname, int verbose,
                            int uuid_set, char *homehost);
 
@@ -958,7 +1119,8 @@ extern struct superswitch {
         * moved in, otherwise the superblock in 'st' is compared with
         * 'tst'.
         */
-       int (*compare_super)(struct supertype *st, struct supertype *tst);
+       int (*compare_super)(struct supertype *st, struct supertype *tst,
+                            int verbose);
        /* Load metadata from a single device.  If 'devname' is not NULL
         * print error messages as appropriate */
        int (*load_super)(struct supertype *st, int fd, char *devname);
@@ -983,10 +1145,9 @@ extern struct superswitch {
         * Return spare criteria for array:
         * - minimum disk size can be used in array;
         * - sector size can be used in array.
-        * Return values: 0 - for success and -EINVAL on error.
         */
-       int (*get_spare_criteria)(struct supertype *st,
-                                 struct spare_criteria *sc);
+       mdadm_status_t (*get_spare_criteria)(struct supertype *st, char *mddev_path,
+                                            struct spare_criteria *sc);
        /* Find somewhere to put a bitmap - possibly auto-size it - and
         * update the metadata to record this.  The array may be newly
         * created, in which case data_size may be updated, or it might
@@ -998,6 +1159,9 @@ extern struct superswitch {
        int (*add_internal_bitmap)(struct supertype *st, int *chunkp,
                                   int delay, int write_behind,
                                   unsigned long long size, int may_change, int major);
+       /* Perform additional setup required to activate a bitmap.
+        */
+       int (*set_bitmap)(struct supertype *st, struct mdinfo *info);
        /* Seek 'fd' to start of write-intent-bitmap.  Must be an
         * md-native format bitmap
         */
@@ -1031,6 +1195,25 @@ extern struct superswitch {
                                 char *subdev, unsigned long long *freesize,
                                 int consistency_policy, int verbose);
 
+       /**
+        * test_and_add_drive_policies() - test new and add custom policies from metadata handler.
+        * @pols: list of currently recorded policies.
+        * @disk_fd: file descriptor of the device to check.
+        * @verbose: verbose flag.
+        *
+        * Used by IMSM to verify all drives in container/array, against requirements not recored
+        * in superblock, like controller type for IMSM. It should check all drives even if
+        * they are not actually used, because mdmon or kernel are free to use any drive assigned to
+        * container automatically.
+        *
+        * Generating and comparison methods belong to metadata handler. It is not mandatory to be
+        * implemented.
+        *
+        * Return: MDADM_STATUS_SUCCESS is expected on success.
+        */
+       mdadm_status_t (*test_and_add_drive_policies)(dev_policy_t **pols, int disk_fd,
+                                                     const int verbose);
+
        /* Return a linked list of 'mdinfo' structures for all arrays
         * in the container.  For non-containers, it is like
         * getinfo_super with an allocated mdinfo.*/
@@ -1040,23 +1223,22 @@ extern struct superswitch {
        /* Permit subarray's to be deleted from inactive containers */
        int (*kill_subarray)(struct supertype *st,
                             char *subarray_id); /* optional */
-       /* Permit subarray's to be modified */
+       /**
+        * update_subarray() - Permit subarray to be modified.
+        * @st: Supertype.
+        * @subarray: Subarray name.
+        * @update: Update option.
+        * @ident: Optional identifiers.
+        */
        int (*update_subarray)(struct supertype *st, char *subarray,
-                              char *update, struct mddev_ident *ident); /* optional */
+                              enum update_opt update, struct mddev_ident *ident);
        /* Check if reshape is supported for this external format.
         * st is obtained from super_by_fd() where st->subarray[0] is
         * initialized to indicate if reshape is being performed at the
         * container or subarray level
         */
-#define APPLY_METADATA_CHANGES         1
-#define ROLLBACK_METADATA_CHANGES      0
-
-       int (*reshape_super)(struct supertype *st,
-                            unsigned long long size, int level,
-                            int layout, int chunksize, int raid_disks,
-                            int delta_disks, char *backup, char *dev,
-                            int direction,
-                            int verbose); /* optional */
+
+       int (*reshape_super)(struct supertype *st, struct shape *shape, struct context *c);
        int (*manage_reshape)( /* optional */
                int afd, struct mdinfo *sra, struct reshape *reshape,
                struct supertype *st, unsigned long blocks,
@@ -1065,7 +1247,7 @@ extern struct superswitch {
 
 /* for mdmon */
        int (*open_new)(struct supertype *c, struct active_array *a,
-                       char *inst);
+                       int inst);
 
        /* Tell the metadata handler the current state of the array.
         * This covers whether it is known to be consistent (no pending writes)
@@ -1106,21 +1288,6 @@ extern struct superswitch {
         */
        struct mdinfo *(*activate_spare)(struct active_array *a,
                                         struct metadata_update **updates);
-       /*
-        * Return statically allocated string that represents metadata specific
-        * controller domain of the disk. The domain is used in disk domain
-        * matching functions. Disks belong to the same domain if the they have
-        * the same domain from mdadm.conf and belong the same metadata domain.
-        * Returning NULL or not providing this handler means that metadata
-        * does not distinguish the differences between disks that belong to
-        * different controllers. They are in the domain specified by
-        * configuration file (mdadm.conf).
-        * In case when the metadata has the notion of domains based on disk
-        * it shall return NULL for disks that do not belong to the controller
-        * the supported domains. Such disks will form another domain and won't
-        * be mixed with supported ones.
-        */
-       const char *(*get_disk_controller_domain)(const char *path);
 
        /* for external backup area */
        int (*recover_backup)(struct supertype *st, struct mdinfo *info);
@@ -1155,6 +1322,8 @@ extern struct superswitch super0, super1;
 extern struct superswitch super_imsm, super_ddf;
 extern struct superswitch mbr, gpt;
 
+void imsm_set_no_platform(int v);
+
 struct metadata_update {
        int     len;
        char    *buf;
@@ -1225,27 +1394,8 @@ extern struct supertype *dup_super(struct supertype *st);
 extern int get_dev_size(int fd, char *dname, unsigned long long *sizep);
 extern int get_dev_sector_size(int fd, char *dname, unsigned int *sectsizep);
 extern int must_be_container(int fd);
-extern int dev_size_from_id(dev_t id, unsigned long long *size);
-extern int dev_sector_size_from_id(dev_t id, unsigned int *size);
 void wait_for(char *dev, int fd);
 
-/*
- * Data structures for policy management.
- * Each device can have a policy structure that lists
- * various name/value pairs each possibly with a metadata associated.
- * The policy list is sorted by name/value/metadata
- */
-struct dev_policy {
-       struct dev_policy *next;
-       char *name;     /* None of these strings are allocated.  They are
-                        * all just references to strings which are known
-                        * to exist elsewhere.
-                        * name and metadata can be compared by address equality.
-                        */
-       const char *metadata;
-       const char *value;
-};
-
 extern char pol_act[], pol_domain[], pol_metadata[], pol_auto[];
 
 /* iterate over the sublist starting at list, having the same
@@ -1287,10 +1437,16 @@ extern struct dev_policy *disk_policy(struct mdinfo *disk);
 extern struct dev_policy *devid_policy(int devid);
 extern void dev_policy_free(struct dev_policy *p);
 
-//extern void pol_new(struct dev_policy **pol, char *name, char *val, char *metadata);
 extern void pol_add(struct dev_policy **pol, char *name, char *val, char *metadata);
 extern struct dev_policy *pol_find(struct dev_policy *pol, char *name);
 
+extern mdadm_status_t drive_test_and_add_policies(struct supertype *st, dev_policy_t **pols,
+                                                 int fd, const int verbose);
+extern mdadm_status_t sysfs_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols,
+                                                       struct mdinfo *mdi, const int verbose);
+extern mdadm_status_t mddev_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols,
+                                                       int array_fd, const int verbose);
+
 enum policy_action {
        act_default,
        act_include,
@@ -1375,20 +1531,18 @@ extern int Manage_stop(char *devname, int fd, int quiet,
                       int will_retry);
 extern int Manage_subdevs(char *devname, int fd,
                          struct mddev_dev *devlist, int verbose, int test,
-                         char *update, int force);
+                         enum update_opt update, int force);
 extern int autodetect(void);
 extern int Grow_Add_device(char *devname, int fd, char *newdev);
 extern int Grow_addbitmap(char *devname, int fd,
                          struct context *c, struct shape *s);
 extern int Grow_reshape(char *devname, int fd,
                        struct mddev_dev *devlist,
-                       unsigned long long data_offset,
                        struct context *c, struct shape *s);
 extern int Grow_restart(struct supertype *st, struct mdinfo *info,
                        int *fdlist, int cnt, char *backup_file, int verbose);
 extern int Grow_continue(int mdfd, struct supertype *st,
-                        struct mdinfo *info, char *backup_file,
-                        int forked, int freeze_reshape);
+                        struct mdinfo *info, int forked, struct context *c);
 extern int Grow_consistency_policy(char *devname, int fd,
                                   struct context *c, struct shape *s);
 
@@ -1398,23 +1552,18 @@ extern int restore_backup(struct supertype *st,
                          int spares,
                          char **backup_filep,
                          int verbose);
-extern int Grow_continue_command(char *devname, int fd,
-                                char *backup_file, int verbose);
+extern int Grow_continue_command(char *devname, int fd, struct context *c);
 
 extern int Assemble(struct supertype *st, char *mddev,
                    struct mddev_ident *ident,
                    struct mddev_dev *devlist,
                    struct context *c);
 
-extern int Build(char *mddev, struct mddev_dev *devlist,
-                struct shape *s, struct context *c);
+extern int Build(struct mddev_ident *ident, struct mddev_dev *devlist, struct shape *s,
+                struct context *c);
 
-extern int Create(struct supertype *st, char *mddev,
-                 char *name, int *uuid,
-                 int subdevs, struct mddev_dev *devlist,
-                 struct shape *s,
-                 struct context *c,
-                 unsigned long long data_offset);
+extern int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
+                 struct mddev_dev *devlist, struct shape *s, struct context *c);
 
 extern int Detail(char *dev, struct context *c);
 extern int Detail_Platform(struct superswitch *ss, int scan, int verbose, int export, char *controller_path);
@@ -1431,7 +1580,7 @@ extern int Monitor(struct mddev_dev *devlist,
 
 extern int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl);
 extern int Kill_subarray(char *dev, char *subarray, int verbose);
-extern int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int quiet);
+extern int Update_subarray(char *dev, char *subarray, enum update_opt update, struct mddev_ident *ident, int quiet);
 extern int Wait(char *dev);
 extern int WaitClean(char *dev, int verbose);
 extern int SetAction(char *dev, char *action);
@@ -1447,6 +1596,7 @@ extern int CreateBitmap(char *filename, int force, char uuid[16],
                        unsigned long long array_size,
                        int major);
 extern int ExamineBitmap(char *filename, int brief, struct supertype *st);
+extern int IsBitmapDirty(char *filename);
 extern int Write_rules(char *rule_name);
 extern int bitmap_update_uuid(int fd, int *uuid, int swap);
 
@@ -1472,10 +1622,11 @@ extern int get_linux_version(void);
 extern int mdadm_version(char *version);
 extern unsigned long long parse_size(char *size);
 extern int parse_uuid(char *str, int uuid[4]);
+int default_layout(struct supertype *st, int level, int verbose);
 extern int is_near_layout_10(int layout);
 extern int parse_layout_10(char *layout);
 extern int parse_layout_faulty(char *layout);
-extern long parse_num(char *num);
+extern int parse_num(int *dest, const char *num);
 extern int parse_cluster_confirm_arg(char *inp, char **devname, int *slot);
 extern int check_ext2(int fd, char *name);
 extern int check_reiser(int fd, char *name);
@@ -1483,11 +1634,15 @@ extern int check_raid(int fd, char *name);
 extern int check_partitions(int fd, char *dname,
                            unsigned long long freesize,
                            unsigned long long size);
+extern bool is_name_posix_compatible(const char *path);
 extern int fstat_is_blkdev(int fd, char *devname, dev_t *rdev);
 extern int stat_is_blkdev(char *devname, dev_t *rdev);
 
+extern bool is_string_lq(const char * const str, size_t max_len);
+extern bool is_dev_alive(char *path);
 extern int get_mdp_major(void);
 extern int get_maj_min(char *dev, int *major, int *minor);
+extern bool is_bit_set(int *val, unsigned char index);
 extern int dev_open(char *dev, int flags);
 extern int open_dev(char *devnm);
 extern void reopen_mddev(int mdfd);
@@ -1497,6 +1652,12 @@ extern int is_standard(char *dev, int *nump);
 extern int same_dev(char *one, char *two);
 extern int compare_paths (char* path1,char* path2);
 extern void enable_fds(int devices);
+extern void manage_fork_fds(int close_all);
+extern int continue_via_systemd(char *devnm, char *service_name, char *prefix);
+
+extern void ident_init(struct mddev_ident *ident);
+extern mdadm_status_t ident_set_devname(struct mddev_ident *ident, const char *devname);
+extern mdadm_status_t ident_set_name(struct mddev_ident *ident, const char *name);
 
 extern int parse_auto(char *str, char *msg, int config);
 extern struct mddev_ident *conf_get_ident(char *dev);
@@ -1510,13 +1671,18 @@ extern char *conf_get_mailfrom(void);
 extern char *conf_get_program(void);
 extern char *conf_get_homehost(int *require_homehostp);
 extern char *conf_get_homecluster(void);
+extern int conf_get_monitor_delay(void);
+extern bool conf_get_sata_opal_encryption_no_verify(void);
 extern char *conf_line(FILE *file);
 extern char *conf_word(FILE *file, int allow_key);
 extern void print_quoted(char *str);
-extern void print_escape(char *str);
 extern int use_udev(void);
+extern void print_escape(char *str);
 extern unsigned long GCD(unsigned long a, unsigned long b);
 extern int conf_name_is_free(char *name);
+extern bool is_devname_ignore(const char *devname);
+extern bool is_devname_md_numbered(const char *devname);
+extern bool is_devname_md_d_numbered(const char *devname);
 extern int conf_verify_devnames(struct mddev_ident *array_list);
 extern int devname_matches(char *name, char *match);
 extern struct mddev_ident *conf_match(struct supertype *st,
@@ -1531,8 +1697,7 @@ extern const int uuid_zero[4];
 extern int same_uuid(int a[4], int b[4], int swapuuid);
 extern void copy_uuid(void *a, int b[4], int swapuuid);
 extern char *__fname_from_uuid(int id[4], int swap, char *buf, char sep);
-extern char *fname_from_uuid(struct supertype *st,
-                            struct mdinfo *info, char *buf, char sep);
+extern char *fname_from_uuid(struct mdinfo *info, char *buf);
 extern unsigned long calc_csum(void *super, int bytes);
 extern int enough(int level, int raid_disks, int layout, int clean,
                   char *avail);
@@ -1554,6 +1719,9 @@ extern int assemble_container_content(struct supertype *st, int mdfd,
 #define        INCR_UNSAFE     2
 #define        INCR_ALREADY    4
 #define        INCR_YES        8
+
+extern bool devid_matches_criteria(struct supertype *st, dev_t devid, struct spare_criteria *sc);
+extern bool disk_fd_matches_criteria(struct supertype *st, int disk_fd, struct spare_criteria *sc);
 extern struct mdinfo *container_choose_spares(struct supertype *st,
                                              struct spare_criteria *criteria,
                                              struct domainlist *domlist,
@@ -1591,6 +1759,7 @@ extern int create_mddev(char *dev, char *name, int autof, int trustworthy,
 #define        FOREIGN 2
 #define        METADATA 3
 extern int open_mddev(char *dev, int report_errors);
+extern int is_mddev(char *dev);
 extern int open_container(int fd);
 extern int metadata_container_matches(char *metadata, char *devnm);
 extern int metadata_subdev_matches(char *metadata, char *devnm);
@@ -1617,9 +1786,8 @@ void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0
 extern char *stat2kname(struct stat *st);
 extern char *fd2kname(int fd);
 extern char *stat2devnm(struct stat *st);
+bool stat_is_md_dev(struct stat *st);
 extern char *fd2devnm(int fd);
-extern void udev_block(char *devnm);
-extern void udev_unblock(void);
 
 extern int in_initrd(void);
 
@@ -1662,6 +1830,13 @@ extern int cluster_get_dlmlock(void);
 extern int cluster_release_dlmlock(void);
 extern void set_dlm_hooks(void);
 
+#define MSEC_TO_NSEC(msec) ((msec) * 1000000)
+#define USEC_TO_NSEC(usec) ((usec) * 1000)
+extern void sleep_for(unsigned int sec, long nsec, bool wake_after_interrupt);
+extern bool is_directory(const char *path);
+extern bool is_file(const char *path);
+extern int s_gethostname(char *buf, int buf_len);
+
 #define _ROUND_UP(val, base)   (((val) + (base) - 1) & ~(base - 1))
 #define ROUND_UP(val, base)    _ROUND_UP(val, (typeof(val))(base))
 #define ROUND_UP_PTR(ptr, base)        ((typeof(ptr)) \
@@ -1686,6 +1861,26 @@ static inline char *to_subarray(struct mdstat_ent *ent, char *container)
        return &ent->metadata_version[10+strlen(container)+1];
 }
 
+/**
+ * signal_s() - Wrapper for sigaction() with signal()-like interface.
+ * @sig: The signal to set the signal handler to.
+ * @handler: The signal handler.
+ *
+ * Return: previous handler or SIG_ERR on failure.
+ */
+static inline sighandler_t signal_s(int sig, sighandler_t handler)
+{
+       struct sigaction new_act = {0};
+       struct sigaction old_act = {0};
+
+       new_act.sa_handler = handler;
+
+       if (sigaction(sig, &new_act, &old_act) == 0)
+               return old_act.sa_handler;
+
+       return SIG_ERR;
+}
+
 #ifdef DEBUG
 #define dprintf(fmt, arg...) \
        fprintf(stderr, "%s: %s: "fmt, Name, __func__, ##arg)
@@ -1697,8 +1892,7 @@ static inline char *to_subarray(struct mdstat_ent *ent, char *container)
 #define dprintf_cont(fmt, arg...) \
         ({ if (0) fprintf(stderr, fmt, ##arg); 0; })
 #endif
-#include <assert.h>
-#include <stdarg.h>
+
 static inline int xasprintf(char **strp, const char *fmt, ...) {
        va_list ap;
        int ret;
@@ -1716,6 +1910,10 @@ static inline int xasprintf(char **strp, const char *fmt, ...) {
 #endif
 #define cont_err(fmt ...) fprintf(stderr, "       " fmt)
 
+#define pr_info(fmt, args...) printf("%s: "fmt, Name, ##args)
+
+#define pr_vrb(fmt, arg...) ((void)(verbose && pr_err(fmt, ##arg)))
+
 void *xmalloc(size_t len);
 void *xrealloc(void *ptr, size_t len);
 void *xcalloc(size_t num, size_t size);
@@ -1815,7 +2013,8 @@ enum r0layout {
 #define RESYNC_NONE -1
 #define RESYNC_DELAYED -2
 #define RESYNC_PENDING -3
-#define RESYNC_UNKNOWN -4
+#define RESYNC_REMOTE  -4
+#define RESYNC_UNKNOWN -5
 
 /* When using "GET_DISK_INFO" it isn't certain how high
  * we need to check.  So we impose an absolute limit of
@@ -1836,3 +2035,33 @@ enum r0layout {
 #define INVALID_SECTORS 1
 /* And another special number needed for --data_offset=variable */
 #define VARIABLE_OFFSET 3
+
+/**
+ * is_container() - check if @level is &LEVEL_CONTAINER
+ * @level: level value
+ *
+ * return:
+ * 1 if level is equal to &LEVEL_CONTAINER, 0 otherwise.
+ */
+static inline int is_container(const int level)
+{
+       if (level == LEVEL_CONTAINER)
+               return 1;
+       return 0;
+}
+
+#define STR_COMMON_NONE "none"
+
+/**
+ * str_is_none() - check if @str starts with "none".
+ * @str: string
+ *
+ * return:
+ * true if string starts with "none", false otherwise.
+ */
+static inline bool str_is_none(char *str)
+{
+       if (strncmp(str, STR_COMMON_NONE, sizeof(STR_COMMON_NONE) - 1) == 0)
+               return true;
+       return false;
+}
diff --git a/mdadm.spec b/mdadm.spec
deleted file mode 100644 (file)
index 1c66894..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-Summary:     mdadm is used for controlling Linux md devices (aka RAID arrays)
-Name:        mdadm
-Version:     4.1
-Release:     1
-Source:      http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.gz
-URL:         http://neil.brown.name/blog/mdadm
-License:     GPL
-Group:       Utilities/System
-BuildRoot:   %{_tmppath}/%{name}-root
-Obsoletes:   mdctl
-
-%description
-mdadm is a program that can be used to create, manage, and monitor
-Linux MD (Software RAID) devices.
-
-%prep
-%setup -q
-# we want to install in /sbin, not /usr/sbin...
-%define _exec_prefix %{nil}
-
-%build
-# This is a debatable issue. The author of this RPM spec file feels that
-# people who install RPMs (especially given that the default RPM options
-# will strip the binary) are not going to be running gdb against the
-# program.
-make CXFLAGS="$RPM_OPT_FLAGS" SYSCONFDIR="%{_sysconfdir}"
-
-%install
-make DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} BINDIR=%{_sbindir} install
-install -D -m644 mdadm.conf-example $RPM_BUILD_ROOT/%{_sysconfdir}/mdadm.conf
-
-%clean
-rm -rf $RPM_BUILD_ROOT
-
-%files
-%defattr(-,root,root)
-%doc TODO ChangeLog mdadm.conf-example COPYING
-%{_sbindir}/mdadm
-%{_sbindir}/mdmon
-/usr/lib/udev/rules.d/01-md-raid-creating.rules
-/usr/lib/udev/rules.d/63-md-raid-arrays.rules
-/usr/lib/udev/rules.d/64-md-raid-assembly.rules
-/usr/lib/udev/rules.d/69-md-clustered-confirm-device.rules
-%config(noreplace,missingok)/%{_sysconfdir}/mdadm.conf
-%{_mandir}/man*/md*
-
-%changelog
diff --git a/mdmon.8 b/mdmon.8
index 4cbc2ba7ef5f5b40faef77eeb0051d883ef61c4c..907c4a0bf07f87f89a69d94aa59a9aabd73ad81a 100644 (file)
--- a/mdmon.8
+++ b/mdmon.8
@@ -1,5 +1,5 @@
 .\" See file COPYING in distribution for details.
-.TH MDMON 8 "" v4.1-rc2
+.TH MDMON 8 "" v4.3
 .SH NAME
 mdmon \- monitor MD external metadata arrays
 
diff --git a/mdmon.c b/mdmon.c
index ff985d291ee9045ede3daf68c1184af3ee0384eb..5fdb5cdb5a495eb5fa59329394857e703f34601c 100644 (file)
--- a/mdmon.c
+++ b/mdmon.c
@@ -56,7 +56,6 @@
 #include       <errno.h>
 #include       <string.h>
 #include       <fcntl.h>
-#include       <signal.h>
 #include       <dirent.h>
 #ifdef USE_PTHREADS
 #include       <pthread.h>
@@ -100,7 +99,7 @@ static int clone_monitor(struct supertype *container)
        if (rc)
                return rc;
        while (mon_tid == -1)
-               usleep(10);
+               sleep_for(0, USEC_TO_NSEC(10), true);
        pthread_attr_destroy(&attr);
 
        mgr_tid = syscall(SYS_gettid);
@@ -210,7 +209,7 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
                rv = kill(pid, SIGUSR1);
                if (rv < 0)
                        break;
-               usleep(200000);
+               sleep_for(0, MSEC_TO_NSEC(200), true);
        }
 }
 
@@ -241,7 +240,7 @@ static int make_control_sock(char *devname)
                return -1;
 
        addr.sun_family = PF_LOCAL;
-       strcpy(addr.sun_path, path);
+       snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", path);
        umask(077); /* ensure no world write access */
        if (bind(sfd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
                close(sfd);
@@ -289,17 +288,27 @@ void usage(void)
        exit(2);
 }
 
+static bool is_duplicate_opt(const int opt, const int set_val, const char *long_name)
+{
+       if (opt == set_val) {
+               pr_err("--%s option duplicated!\n", long_name);
+               return true;
+       }
+       return false;
+}
+
 static int mdmon(char *devnm, int must_fork, int takeover);
 
 int main(int argc, char *argv[])
 {
        char *container_name = NULL;
-       char *devnm = NULL;
        int status = 0;
        int opt;
        int all = 0;
        int takeover = 0;
        int dofork = 1;
+       int mdfd = -1;
+       bool help = false;
        static struct option options[] = {
                {"all", 0, NULL, 'a'},
                {"takeover", 0, NULL, 't'},
@@ -309,54 +318,78 @@ int main(int argc, char *argv[])
                {NULL, 0, NULL, 0}
        };
 
-       if (in_initrd()) {
-               /*
-                * set first char of argv[0] to @. This is used by
-                * systemd to signal that the task was launched from
-                * initrd/initramfs and should be preserved during shutdown
-                */
-               argv[0][0] = '@';
-       }
+       /*
+        * mdmon should never complain due to lack of a platform,
+        * that is mdadm's job if at all.
+        */
+       imsm_set_no_platform(1);
 
        while ((opt = getopt_long(argc, argv, "thaF", options, NULL)) != -1) {
                switch (opt) {
                case 'a':
+                       if (is_duplicate_opt(all, 1, "all"))
+                               exit(1);
                        container_name = argv[optind-1];
                        all = 1;
                        break;
                case 't':
+                       if (is_duplicate_opt(takeover, 1, "takeover"))
+                               exit(1);
                        takeover = 1;
                        break;
                case 'F':
+                       if (is_duplicate_opt(dofork, 0, "foreground"))
+                               exit(1);
                        dofork = 0;
                        break;
                case OffRootOpt:
+                       if (is_duplicate_opt(argv[0][0], '@', "offroot"))
+                               exit(1);
                        argv[0][0] = '@';
                        break;
                case 'h':
+                       if (is_duplicate_opt(help, true, "help"))
+                               exit(1);
+                       help = true;
+                       break;
                default:
                        usage();
                        break;
                }
        }
 
-       if (all == 0 && container_name == NULL) {
-               if (argv[optind])
-                       container_name = argv[optind];
+       if (in_initrd()) {
+               /*
+                * set first char of argv[0] to @. This is used by
+                * systemd to signal that the task was launched from
+                * initrd/initramfs and should be preserved during shutdown
+                */
+               argv[0][0] = '@';
        }
 
-       if (container_name == NULL)
-               usage();
+       if (!all && argv[optind]) {
+               static const char prefix[] = "initrd/";
+               container_name = argv[optind];
+               if (strncmp(container_name, prefix,
+                           sizeof(prefix) - 1) == 0)
+                       container_name += sizeof(prefix)-1;
+               container_name = get_md_name(container_name);
+               if (!container_name)
+                       return 1;
+       }
 
-       if (argc - optind > 1)
+       if (container_name == NULL || argc - optind > 1)
                usage();
 
        if (strcmp(container_name, "/proc/mdstat") == 0)
                all = 1;
 
+       if (help)
+               usage();
+
        if (all) {
                struct mdstat_ent *mdstat, *e;
-               int container_len = strlen(container_name);
+               int container_len = strnlen(container_name, MD_NAME_MAX);
 
                /* launch an mdmon instance for each container found */
                mdstat = mdstat_read(0, 0);
@@ -377,23 +410,20 @@ int main(int argc, char *argv[])
                free_mdstat(mdstat);
 
                return status;
-       } else if (strncmp(container_name, "md", 2) == 0) {
-               int id = devnm2devid(container_name);
-               if (id)
-                       devnm = container_name;
-       } else {
-               struct stat st;
-
-               if (stat(container_name, &st) == 0)
-                       devnm = xstrdup(stat2devnm(&st));
        }
 
-       if (!devnm) {
-               pr_err("%s is not a valid md device name\n",
-                       container_name);
-               exit(1);
+       mdfd = open_mddev(container_name, 0);
+       if (is_fd_valid(mdfd)) {
+               char *devnm = fd2devnm(mdfd);
+
+               close(mdfd);
+
+               if (devnm)
+                       return mdmon(devnm, dofork && do_fork(), takeover);
        }
-       return mdmon(devnm, dofork && do_fork(), takeover);
+
+       pr_err("%s is not a valid md device name\n", container_name);
+       return 1;
 }
 
 static int mdmon(char *devnm, int must_fork, int takeover)
@@ -443,7 +473,7 @@ static int mdmon(char *devnm, int must_fork, int takeover)
                pfd[0] = pfd[1] = -1;
 
        container = xcalloc(1, sizeof(*container));
-       strcpy(container->devnm, devnm);
+       snprintf(container->devnm, MD_NAME_MAX, "%s", devnm);
        container->arrays = NULL;
        container->sock = -1;
 
@@ -546,14 +576,7 @@ static int mdmon(char *devnm, int must_fork, int takeover)
        }
 
        setsid();
-       close(0);
-       open("/dev/null", O_RDWR);
-       close(1);
-       ignore = dup(0);
-#ifndef DEBUG
-       close(2);
-       ignore = dup(0);
-#endif
+       manage_fork_fds(0);
 
        /* This silliness is to stop the compiler complaining
         * that we ignore 'ignore'
index 245be537823016c33325599303d108845cb09aaa..eaa59b5925af40f0d4b73c02bc9380eee5473785 100644 (file)
--- a/mdopen.c
+++ b/mdopen.c
@@ -23,6 +23,7 @@
  */
 
 #include "mdadm.h"
+#include "udev.h"
 #include "md_p.h"
 #include <ctype.h>
 
@@ -176,7 +177,7 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
        char devnm[32];
        char cbuf[400];
 
-       if (!use_udev())
+       if (!udev_is_available())
                block_udev = 0;
 
        if (chosen == NULL)
@@ -188,19 +189,19 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
        parts = autof >> 3;
        autof &= 7;
 
-       strcpy(chosen, "/dev/md/");
+       strcpy(chosen, DEV_MD_DIR);
        cname = chosen + strlen(chosen);
 
        if (dev) {
-               if (strncmp(dev, "/dev/md/", 8) == 0) {
-                       strcpy(cname, dev+8);
+               if (strncmp(dev, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0) {
+                       snprintf(cname, MD_NAME_MAX, "%s", dev + DEV_MD_DIR_LEN);
                } else if (strncmp(dev, "/dev/", 5) == 0) {
                        char *e = dev + strlen(dev);
                        while (e > dev && isdigit(e[-1]))
                                e--;
                        if (e[0])
                                num = strtoul(e, NULL, 10);
-                       strcpy(cname, dev+5);
+                       snprintf(cname, MD_NAME_MAX, "%s", dev + 5);
                        cname[e-(dev+5)] = 0;
                        /* name *must* be mdXX or md_dXX in this context */
                        if (num < 0 ||
@@ -335,8 +336,8 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
        devnm[0] = 0;
        if (num < 0 && cname && ci->names) {
                sprintf(devnm, "md_%s", cname);
-               if (block_udev)
-                       udev_block(devnm);
+               if (block_udev && udev_block(devnm) != UDEV_STATUS_SUCCESS)
+                       return -1;
                if (!create_named_array(devnm)) {
                        devnm[0] = 0;
                        udev_unblock();
@@ -344,8 +345,8 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
        }
        if (num >= 0) {
                sprintf(devnm, "md%d", num);
-               if (block_udev)
-                       udev_block(devnm);
+               if (block_udev && udev_block(devnm) != UDEV_STATUS_SUCCESS)
+                       return -1;
                if (!create_named_array(devnm)) {
                        devnm[0] = 0;
                        udev_unblock();
@@ -368,8 +369,9 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
                                return -1;
                        }
                }
-               if (block_udev)
-                       udev_block(devnm);
+               if (block_udev && udev_block(devnm) != UDEV_STATUS_SUCCESS)
+                       return -1;
+               create_named_array(devnm);
        }
 
        sprintf(devname, "/dev/%s", devnm);
@@ -383,7 +385,7 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
         * If we cannot detect udev, we need to make
         * devices and links ourselves.
         */
-       if (!use_udev()) {
+       if (!udev_is_available()) {
                /* Make sure 'devname' exists and 'chosen' is a symlink to it */
                if (lstat(devname, &stb) == 0) {
                        /* Must be the correct device, else error */
@@ -411,11 +413,11 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
                        make_parts(devname, parts);
 
                if (strcmp(chosen, devname) != 0) {
-                       if (mkdir("/dev/md",0700) == 0) {
-                               if (chown("/dev/md", ci->uid, ci->gid))
-                                       perror("chown /dev/md");
-                               if (chmod("/dev/md", ci->mode| ((ci->mode>>2) & 0111)))
-                                       perror("chmod /dev/md");
+                       if (mkdir(DEV_NUM_PREF, 0700) == 0) {
+                               if (chown(DEV_NUM_PREF, ci->uid, ci->gid))
+                                       perror("chown " DEV_NUM_PREF);
+                               if (chmod(DEV_NUM_PREF, ci->mode | ((ci->mode >> 2) & 0111)))
+                                       perror("chmod " DEV_NUM_PREF);
                        }
 
                        if (dev && strcmp(chosen, dev) == 0)
@@ -475,6 +477,23 @@ int open_mddev(char *dev, int report_errors)
        return mdfd;
 }
 
+/**
+ * is_mddev() - check that file name passed is an md device.
+ * @dev: file name that has to be checked.
+ * Return: 1 if file passed is an md device, 0 if not.
+ */
+int is_mddev(char *dev)
+{
+       int fd = open_mddev(dev, 1);
+
+       if (fd >= 0) {
+               close(fd);
+               return 1;
+       }
+
+       return 0;
+}
+
 char *find_free_devnm(int use_partitions)
 {
        static char devnm[32];
@@ -490,7 +509,7 @@ char *find_free_devnm(int use_partitions)
                        continue;
                if (!conf_name_is_free(devnm))
                        continue;
-               if (!use_udev()) {
+               if (!udev_is_available()) {
                        /* make sure it is new to /dev too, at least as a
                         * non-standard */
                        dev_t devid = devnm2devid(devnm);
index 7e600d0cb03a7d072978226f5cd5cdab3a51f992..2fd792c5d56de1ce4547efd3a3c2fcd71ab96a87 100644 (file)
--- a/mdstat.c
+++ b/mdstat.c
@@ -135,7 +135,6 @@ struct mdstat_ent *mdstat_read(int hold, int start)
        if (hold && mdstat_fd != -1) {
                off_t offset = lseek(mdstat_fd, 0L, 0);
                if (offset == (off_t)-1) {
-                       mdstat_close();
                        return NULL;
                }
                fd = dup(mdstat_fd);
@@ -192,6 +191,12 @@ struct mdstat_ent *mdstat_read(int hold, int start)
                        else if (strcmp(w, "inactive") == 0) {
                                ent->active = 0;
                                in_devs = 1;
+                       } else if (strcmp(w, "bitmap:") == 0) {
+                               /* We need to stop parsing here;
+                                * otherwise, ent->raid_disks will be
+                                * overwritten by the wrong value.
+                                */
+                               break;
                        } else if (ent->active > 0 &&
                                 ent->level == NULL &&
                                 w[0] != '(' /*readonly*/) {
@@ -257,6 +262,8 @@ struct mdstat_ent *mdstat_read(int hold, int start)
                                        ent->percent = RESYNC_DELAYED;
                                if (l > 8 && strcmp(w+l-8, "=PENDING") == 0)
                                        ent->percent = RESYNC_PENDING;
+                               if (l > 7 && strcmp(w+l-7, "=REMOTE") == 0)
+                                       ent->percent = RESYNC_REMOTE;
                        } else if (ent->percent == RESYNC_NONE &&
                                   w[0] >= '0' &&
                                   w[0] <= '9' &&
@@ -301,7 +308,17 @@ void mdstat_close(void)
        mdstat_fd = -1;
 }
 
-void mdstat_wait(int seconds)
+/*
+ * function: mdstat_wait
+ * Description: Function waits for event on mdstat.
+ * Parameters:
+ *             seconds - timeout for waiting
+ * Returns:
+ *             > 0 - detected event
+ *             0 - timeout
+ *             < 0 - detected error
+ */
+int mdstat_wait(int seconds)
 {
        fd_set fds;
        struct timeval tm;
@@ -310,10 +327,13 @@ void mdstat_wait(int seconds)
        if (mdstat_fd >= 0) {
                FD_SET(mdstat_fd, &fds);
                maxfd = mdstat_fd;
-       }
+       } else
+               return -1;
+
        tm.tv_sec = seconds;
        tm.tv_usec = 0;
-       select(maxfd + 1, NULL, NULL, &fds, &tm);
+
+       return select(maxfd + 1, NULL, NULL, &fds, &tm);
 }
 
 void mdstat_wait_fd(int fd, const sigset_t *sigmask)
index 42d4094a27543830658b49b5536bfe9e5f114d3b..f87999d3e797bb000ef99ad530baf246e88aec7b 100644 (file)
@@ -125,11 +125,13 @@ do
        do
                eval fl=\$MD_${i}_fl
                eval sys=\$MD_${i}_sys
+               eval dev=\$MD_${i}_dev
 
                if [ -z "$fl" ]; then continue; fi
 
                if [ "`cat $sys/md/sync_action`" != 'check' ]
                then
+                       logger -p daemon.info mdcheck finished checking $dev
                        eval MD_${i}_fl=
                        rm -f $fl
                        continue;
@@ -138,7 +140,13 @@ do
                echo $a > $fl
                any=yes
        done
-       if [ -z "$any" ]; then exit 0; fi
+       # mdcheck_continue.timer is started by mdcheck_start.timer.
+       # When the check action can be finished in mdcheck_start.service,
+       # it doesn't need mdcheck_continue anymore.
+       if [ -z "$any" ]; then
+               systemctl stop mdcheck_continue.timer
+               exit 0;
+       fi
        sleep 120
 done
 
diff --git a/mkinitramfs b/mkinitramfs
deleted file mode 100644 (file)
index c6275dd..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/sh
-
-# make sure we are being run in the right directory...
-if [ -f mkinitramfs ]
-then :
-else
-  echo >&2 mkinitramfs must be run from the mdadm source directory.
-  exit 1
-fi
-if [ -f /bin/busybox ]
-then : good, it exists
-  case `file /bin/busybox` in
-   *statically* ) : good ;;
-   * ) echo >&2 mkinitramfs: /bin/busybox is not statically linked: cannot proceed.
-       exit 1
-  esac
-else
-  echo >&2 "mkinitramfs: /bin/busybox doesn't exist - please install it statically linked."
-    exit 1
-fi
-
-rm -rf initramfs
-mkdir initramfs
-mkdir initramfs/bin
-make mdadm.static
-cp mdadm.static initramfs/bin/mdadm
-cp /bin/busybox initramfs/bin/busybox
-ln initramfs/bin/busybox initramfs/bin/sh
-cat <<- END > initramfs/init
-       #!/bin/sh
-
-       echo 'Auto-assembling boot md array'
-       mkdir /proc
-       mount -t proc proc /proc
-       if [ -n "$rootuuid" ]
-       then arg=--uuid=$rootuuid
-       elif [ -n "$mdminor" ]
-       then arg=--super-minor=$mdminor
-       else arg=--super-minor=0
-       fi
-       echo "Using $arg"
-       mdadm -Acpartitions $arg --auto=part /dev/mda
-       cd /
-       mount /dev/mda1 /root ||  mount /dev/mda /root
-       umount /proc
-       cd /root
-       exec chroot . /sbin/init < /dev/console > /dev/console 2>&1
-END
-chmod +x initramfs/init
-
-(cd initramfs
- find init bin | cpio -o -H newc | gzip --best
-) > init.cpio.gz
-rm -rf initramfs
-ls -l init.cpio.gz
index e0d3be679daf25324db8e0672da16802c9c46b43..be0bec785080351412f4a1fcd07e031e1d356b68 100644 (file)
--- a/monitor.c
+++ b/monitor.c
@@ -22,7 +22,6 @@
 #include "mdmon.h"
 #include <sys/syscall.h>
 #include <sys/select.h>
-#include <signal.h>
 
 static char *array_states[] = {
        "clear", "inactive", "suspended", "readonly", "read-auto",
@@ -82,15 +81,15 @@ static int read_attr(char *buf, int len, int fd)
 
 static void read_resync_start(int fd, unsigned long long *v)
 {
-       char buf[30];
+       char buf[SYSFS_MAX_BUF_SIZE];
        int n;
 
-       n = read_attr(buf, 30, fd);
+       n = read_attr(buf, sizeof(buf), fd);
        if (n <= 0) {
                dprintf("Failed to read resync_start (%d)\n", fd);
                return;
        }
-       if (strncmp(buf, "none", 4) == 0)
+       if (str_is_none(buf) == true)
                *v = MaxSector;
        else
                *v = strtoull(buf, NULL, 10);
@@ -99,11 +98,11 @@ static void read_resync_start(int fd, unsigned long long *v)
 static unsigned long long read_sync_completed(int fd)
 {
        unsigned long long val;
-       char buf[50];
+       char buf[SYSFS_MAX_BUF_SIZE];
        int n;
        char *ep;
 
-       n = read_attr(buf, 50, fd);
+       n = read_attr(buf, sizeof(buf), fd);
 
        if (n <= 0)
                return 0;
@@ -116,8 +115,8 @@ static unsigned long long read_sync_completed(int fd)
 
 static enum array_state read_state(int fd)
 {
-       char buf[20];
-       int n = read_attr(buf, 20, fd);
+       char buf[SYSFS_MAX_BUF_SIZE];
+       int n = read_attr(buf, sizeof(buf), fd);
 
        if (n <= 0)
                return bad_word;
@@ -126,8 +125,8 @@ static enum array_state read_state(int fd)
 
 static enum sync_action read_action( int fd)
 {
-       char buf[20];
-       int n = read_attr(buf, 20, fd);
+       char buf[SYSFS_MAX_BUF_SIZE];
+       int n = read_attr(buf, sizeof(buf), fd);
 
        if (n <= 0)
                return bad_action;
@@ -136,7 +135,7 @@ static enum sync_action read_action( int fd)
 
 int read_dev_state(int fd)
 {
-       char buf[100];
+       char buf[SYSFS_MAX_BUF_SIZE];
        int n = read_attr(buf, sizeof(buf), fd);
        char *cp;
        int rv = 0;
@@ -312,6 +311,9 @@ static int check_for_cleared_bb(struct active_array *a, struct mdinfo *mdi)
        struct md_bb *bb;
        int i;
 
+       if (!ss->get_bad_blocks)
+               return -1;
+
        /*
         * Get a list of bad blocks for an array, then read list of
         * acknowledged bad blocks from kernel and compare it against metadata
@@ -410,6 +412,7 @@ static int read_and_act(struct active_array *a, fd_set *fds)
        int ret = 0;
        int count = 0;
        struct timeval tv;
+       bool write_checkpoint = false;
 
        a->next_state = bad_word;
        a->next_action = bad_action;
@@ -562,52 +565,40 @@ static int read_and_act(struct active_array *a, fd_set *fds)
                }
        }
 
-       /* Check for recovery checkpoint notifications.  We need to be a
-        * minimum distance away from the last checkpoint to prevent
-        * over checkpointing.  Note reshape checkpointing is handled
-        * in the second branch.
-        */
-       if (sync_completed > a->last_checkpoint &&
-           sync_completed - a->last_checkpoint > a->info.component_size >> 4 &&
-           a->curr_action > reshape) {
-               /* A (non-reshape) sync_action has reached a checkpoint.
-                * Record the updated position in the metadata
-                */
-               a->last_checkpoint = sync_completed;
-               a->container->ss->set_array_state(a, a->curr_state <= clean);
-       } else if ((a->curr_action == idle && a->prev_action == reshape) ||
-                  (a->curr_action == reshape &&
-                   sync_completed > a->last_checkpoint)) {
-               /* Reshape has progressed or completed so we need to
-                * update the array state - and possibly the array size
-                */
+       /* Update reshape checkpoint, depending if it finished or progressed */
+       if (a->curr_action == idle && a->prev_action == reshape) {
+               char buf[SYSFS_MAX_BUF_SIZE];
+
                if (sync_completed != 0)
                        a->last_checkpoint = sync_completed;
-               /* We might need to update last_checkpoint depending on
-                * the reason that reshape finished.
-                * if array reshape is really finished:
-                *        set check point to the end, this allows
-                *        set_array_state() to finalize reshape in metadata
-                * if reshape if broken: do not set checkpoint to the end
-                *        this allows for reshape restart from checkpoint
+
+               /*
+                * If reshape really finished, set checkpoint to the end to finalize it.
+                * Do not set checkpoint if reshape is broken.
+                * Reshape will restart from last checkpoint.
                 */
-               if ((a->curr_action != reshape) &&
-                   (a->prev_action == reshape)) {
-                       char buf[40];
-                       if ((sysfs_get_str(&a->info, NULL,
-                                         "reshape_position",
-                                         buf,
-                                         sizeof(buf)) >= 0) &&
-                            strncmp(buf, "none", 4) == 0)
+               if (sysfs_get_str(&a->info, NULL, "reshape_position", buf, sizeof(buf)) >= 0)
+                       if (str_is_none(buf) == true)
                                a->last_checkpoint = a->info.component_size;
-               }
-               a->container->ss->set_array_state(a, a->curr_state <= clean);
-               a->last_checkpoint = sync_completed;
+
+               write_checkpoint = true;
        }
 
-       if (sync_completed > a->last_checkpoint)
+       if (a->curr_action >= reshape && sync_completed > a->last_checkpoint) {
+               /* Update checkpoint if neither reshape nor idle action */
                a->last_checkpoint = sync_completed;
 
+               write_checkpoint = true;
+       }
+
+       /* Save checkpoint */
+       if (write_checkpoint) {
+               a->container->ss->set_array_state(a, a->curr_state <= clean);
+
+               if (a->curr_action <= reshape)
+                       a->last_checkpoint = sync_completed;
+       }
+
        if (sync_completed >= a->info.component_size)
                a->last_checkpoint = 0;
 
diff --git a/msg.c b/msg.c
index 45cd45040a6126c22eb2bb2bfdf193cc7357790d..ba0e25be906d5b80de7a9f55ffd504e23accc876 100644 (file)
--- a/msg.c
+++ b/msg.c
@@ -324,7 +324,7 @@ int block_monitor(char *container, const int freeze)
 {
        struct mdstat_ent *ent, *e, *e2;
        struct mdinfo *sra = NULL;
-       char buf[64];
+       char buf[SYSFS_MAX_BUF_SIZE];
        int rv = 0;
 
        if (check_mdmon_version(container))
@@ -366,7 +366,7 @@ int block_monitor(char *container, const int freeze)
                     !sysfs_attribute_available(sra, NULL, "sync_action")) ||
                    (freeze &&
                     sysfs_attribute_available(sra, NULL, "sync_action") &&
-                    sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0 &&
+                    sysfs_get_str(sra, NULL, "sync_action", buf, sizeof(buf)) > 0 &&
                     strcmp(buf, "frozen\n") == 0))
                        /* pass */;
                else {
index 04bffc57accf483a64e570db705b3865f8acfd93..15a9fa5ac160f9b5573a91edfce7878690c5ed1f 100644 (file)
 #include <sys/stat.h>
 #include <limits.h>
 
+#define NVME_SUBSYS_PATH "/sys/devices/virtual/nvme-subsystem/"
+
+static bool imsm_orom_has_raid0(const struct imsm_orom *orom)
+{
+       return imsm_rlc_has_bit(orom, IMSM_OROM_RLC_RAID0);
+}
+
+static bool imsm_orom_has_raid1(const struct imsm_orom *orom)
+{
+       return imsm_rlc_has_bit(orom, IMSM_OROM_RLC_RAID1);
+}
+
+static bool imsm_orom_has_raid10(const struct imsm_orom *orom)
+{
+       return imsm_rlc_has_bit(orom, IMSM_OROM_RLC_RAID10);
+}
+
+static bool imsm_orom_has_raid5(const struct imsm_orom *orom)
+{
+       return imsm_rlc_has_bit(orom, IMSM_OROM_RLC_RAID5);
+}
+
+/* IMSM platforms do not define how many disks are allowed for each level,
+ * but there are some global limitations we need to follow.
+ */
+static bool imsm_orom_support_raid_disks_count_raid0(const int raid_disks)
+{
+       return true;
+}
+
+static bool imsm_orom_support_raid_disks_count_raid1(const int raid_disks)
+{
+       if (raid_disks == 2)
+               return true;
+       return false;
+}
+
+static bool imsm_orom_support_raid_disks_count_raid5(const int raid_disks)
+{
+       if (raid_disks > 2)
+               return true;
+       return false;
+}
+
+static bool imsm_orom_support_raid_disks_count_raid10(const int raid_disks)
+{
+       /* raid_disks count must be higher than 4 and even */
+       if (raid_disks >= 4 && (raid_disks & 1) == 0)
+               return true;
+       return false;
+}
+
+struct imsm_level_ops imsm_level_ops[] = {
+               {0, imsm_orom_has_raid0, imsm_orom_support_raid_disks_count_raid0, "raid0"},
+               {1, imsm_orom_has_raid1, imsm_orom_support_raid_disks_count_raid1, "raid1"},
+               {5, imsm_orom_has_raid5, imsm_orom_support_raid_disks_count_raid5, "raid5"},
+               {10, imsm_orom_has_raid10, imsm_orom_support_raid_disks_count_raid10, "raid10"},
+               {-1, NULL, NULL, NULL}
+};
+
 static int devpath_to_ll(const char *dev_path, const char *entry,
                         unsigned long long *val);
 
@@ -62,9 +122,10 @@ struct sys_dev *find_driver_devices(const char *bus, const char *driver)
 
        if (strcmp(driver, "isci") == 0)
                type = SYS_DEV_SAS;
-       else if (strcmp(driver, "ahci") == 0)
+       else if (strcmp(driver, "ahci") == 0) {
+               vmd = find_driver_devices("pci", "vmd");
                type = SYS_DEV_SATA;
-       else if (strcmp(driver, "nvme") == 0) {
+       else if (strcmp(driver, "nvme") == 0) {
                /* if looking for nvme devs, first look for vmd */
                vmd = find_driver_devices("pci", "vmd");
                type = SYS_DEV_NVME;
@@ -113,6 +174,17 @@ struct sys_dev *find_driver_devices(const char *bus, const char *driver)
                        free(rp);
                }
 
+               /* change sata type if under a vmd controller */
+               if (type == SYS_DEV_SATA) {
+                       struct sys_dev *dev;
+                       char *rp = realpath(path, NULL);
+                       for (dev = vmd; dev; dev = dev->next) {
+                               if ((strncmp(dev->path, rp, strlen(dev->path)) == 0))
+                                       type = SYS_DEV_SATA_VMD;
+                       }
+                       free(rp);
+               }
+
                /* if it's not Intel device or mark as VMD connected - skip it. */
                if (devpath_to_vendor(path) != 0x8086 || skip == 1)
                        continue;
@@ -164,7 +236,8 @@ struct sys_dev *find_driver_devices(const char *bus, const char *driver)
        }
        closedir(driver_dir);
 
-       if (vmd) {
+       /* nvme vmd needs a list separate from sata vmd */
+       if (vmd && type == SYS_DEV_NVME) {
                if (list)
                        list->next = vmd;
                else
@@ -199,7 +272,7 @@ struct sys_dev *device_by_id_and_path(__u16 device_id, const char *path)
 
 static int devpath_to_ll(const char *dev_path, const char *entry, unsigned long long *val)
 {
-       char path[strlen(dev_path) + strlen(entry) + 2];
+       char path[strnlen(dev_path, PATH_MAX) + strnlen(entry, PATH_MAX) + 2];
        int fd;
        int n;
 
@@ -237,6 +310,29 @@ __u16 devpath_to_vendor(const char *dev_path)
        return id;
 }
 
+/* Description: Read text value of dev_path/entry field
+ * Parameters:
+ *     dev_path - sysfs path to the device
+ *     entry - entry to be read
+ *     buf - buffer for read value
+ *     len - size of buf
+ *     verbose - error logging level
+ */
+int devpath_to_char(const char *dev_path, const char *entry, char *buf, int len,
+                   int verbose)
+{
+       char path[PATH_MAX];
+
+       snprintf(path, sizeof(path), "%s/%s", dev_path, entry);
+       if (load_sys(path, buf, len)) {
+               if (verbose)
+                       pr_err("Cannot read %s, aborting\n", path);
+               return 1;
+       }
+
+       return 0;
+}
+
 struct sys_dev *find_intel_devices(void)
 {
        struct sys_dev *ahci, *isci, *nvme;
@@ -248,6 +344,7 @@ struct sys_dev *find_intel_devices(void)
                free_sys_dev(&intel_devices);
 
        isci = find_driver_devices("pci", "isci");
+       /* Searching for AHCI will return list of SATA and SATA VMD controllers */
        ahci = find_driver_devices("pci", "ahci");
        /* Searching for NVMe will return list of NVMe and VMD controllers */
        nvme = find_driver_devices("pci", "nvme");
@@ -471,9 +568,6 @@ static const struct imsm_orom *find_imsm_hba_orom(struct sys_dev *hba)
        return get_orom_by_device_id(hba->dev_id);
 }
 
-#define GUID_STR_MAX   37  /* according to GUID format:
-                            * xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" */
-
 #define EFI_GUID(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
 ((struct efi_guid) \
 {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
@@ -486,15 +580,17 @@ static const struct imsm_orom *find_imsm_hba_orom(struct sys_dev *hba)
 #define SCU_PROP "RstScuV"
 #define AHCI_PROP "RstSataV"
 #define AHCI_SSATA_PROP "RstsSatV"
-#define AHCI_CSATA_PROP "RstCSatV"
-#define VMD_PROP "RstUefiV"
+#define AHCI_TSATA_PROP "RsttSatV"
+#define VROC_VMD_PROP "RstUefiV"
+#define RST_VMD_PROP "RstVmdV"
 
 #define VENDOR_GUID \
        EFI_GUID(0x193dfefa, 0xa445, 0x4302, 0x99, 0xd8, 0xef, 0x3a, 0xad, 0x1a, 0x04, 0xc6)
 
 #define PCI_CLASS_RAID_CNTRL 0x010400
 
-static int read_efi_var(void *buffer, ssize_t buf_size, char *variable_name, struct efi_guid guid)
+static int read_efi_var(void *buffer, ssize_t buf_size,
+                       const char *variable_name, struct efi_guid guid)
 {
        char path[PATH_MAX];
        char buf[GUID_STR_MAX];
@@ -523,7 +619,8 @@ static int read_efi_var(void *buffer, ssize_t buf_size, char *variable_name, str
        return 0;
 }
 
-static int read_efi_variable(void *buffer, ssize_t buf_size, char *variable_name, struct efi_guid guid)
+static int read_efi_variable(void *buffer, ssize_t buf_size,
+                            const char *variable_name, struct efi_guid guid)
 {
        char path[PATH_MAX];
        char buf[GUID_STR_MAX];
@@ -576,7 +673,10 @@ const struct imsm_orom *find_imsm_efi(struct sys_dev *hba)
 {
        struct imsm_orom orom;
        struct orom_entry *ret;
-       int err;
+       static const char * const sata_efivars[] = {AHCI_PROP, AHCI_SSATA_PROP,
+                                                   AHCI_TSATA_PROP};
+       static const char * const vmd_efivars[] = {VROC_VMD_PROP, RST_VMD_PROP};
+       unsigned long i;
 
        if (check_env("IMSM_TEST_AHCI_EFI") || check_env("IMSM_TEST_SCU_EFI"))
                return imsm_platform_test(hba);
@@ -585,35 +685,42 @@ const struct imsm_orom *find_imsm_efi(struct sys_dev *hba)
        if (check_env("IMSM_TEST_OROM"))
                return NULL;
 
-       if (hba->type == SYS_DEV_SATA && hba->class != PCI_CLASS_RAID_CNTRL)
-               return NULL;
-
-       err = read_efi_variable(&orom, sizeof(orom), hba->type == SYS_DEV_SAS ? SCU_PROP : AHCI_PROP, VENDOR_GUID);
+       switch (hba->type) {
+       case SYS_DEV_SAS:
+               if (!read_efi_variable(&orom, sizeof(orom), SCU_PROP,
+                                      VENDOR_GUID))
+                       break;
 
-       /* try to read variable for second AHCI controller */
-       if (err && hba->type == SYS_DEV_SATA)
-               err = read_efi_variable(&orom, sizeof(orom), AHCI_SSATA_PROP, VENDOR_GUID);
+               return NULL;
+       case SYS_DEV_SATA:
+               if (hba->class != PCI_CLASS_RAID_CNTRL)
+                       return NULL;
 
-       /* try to read variable for combined AHCI controllers */
-       if (err && hba->type == SYS_DEV_SATA) {
-               static struct orom_entry *csata;
+               for (i = 0; i < ARRAY_SIZE(sata_efivars); i++) {
+                       if (!read_efi_variable(&orom, sizeof(orom),
+                                               sata_efivars[i], VENDOR_GUID))
+                               break;
 
-               err = read_efi_variable(&orom, sizeof(orom), AHCI_CSATA_PROP, VENDOR_GUID);
-               if (!err) {
-                       if (!csata)
-                               csata = add_orom(&orom);
-                       add_orom_device_id(csata, hba->dev_id);
-                       csata->type = hba->type;
-                       return &csata->orom;
                }
-       }
+               if (i == ARRAY_SIZE(sata_efivars))
+                       return NULL;
+
+               break;
+       case SYS_DEV_VMD:
+       case SYS_DEV_SATA_VMD:
+               for (i = 0; i < ARRAY_SIZE(vmd_efivars); i++) {
+                       if (!read_efi_variable(&orom, sizeof(orom),
+                                               vmd_efivars[i], VENDOR_GUID))
+                               break;
+               }
 
-       if (hba->type == SYS_DEV_VMD) {
-               err = read_efi_variable(&orom, sizeof(orom), VMD_PROP, VENDOR_GUID);
-       }
+               if (i == ARRAY_SIZE(vmd_efivars))
+                       return NULL;
 
-       if (err)
+               break;
+       default:
                return NULL;
+       }
 
        ret = add_orom(&orom);
        add_orom_device_id(ret, hba->dev_id);
@@ -651,6 +758,106 @@ const struct imsm_orom *find_imsm_nvme(struct sys_dev *hba)
        return &nvme_orom->orom;
 }
 
+#define VMD_REGISTER_OFFSET            0x3FC
+#define VMD_REGISTER_SKU_SHIFT         1
+#define VMD_REGISTER_SKU_MASK          (0x00000007)
+#define VMD_REGISTER_SKU_PREMIUM       2
+#define MD_REGISTER_VER_MAJOR_SHIFT    4
+#define MD_REGISTER_VER_MAJOR_MASK     (0x0000000F)
+#define MD_REGISTER_VER_MINOR_SHIFT    8
+#define MD_REGISTER_VER_MINOR_MASK     (0x0000000F)
+
+/*
+ * read_vmd_register() - Reads VMD register and writes contents to buff ptr
+ * @buff: buffer for vmd register data, should be the size of uint32_t
+ *
+ * Return: 0 on success, 1 on error
+ */
+int read_vmd_register(uint32_t *buff, struct sys_dev *hba)
+{
+       int fd;
+       char vmd_pci_config_path[PATH_MAX];
+
+       if (!vmd_domain_to_controller(hba, vmd_pci_config_path))
+               return 1;
+
+       strncat(vmd_pci_config_path, "/config", PATH_MAX - strnlen(vmd_pci_config_path, PATH_MAX));
+
+       fd = open(vmd_pci_config_path, O_RDONLY);
+       if (fd < 0)
+               return 1;
+
+       if (pread(fd, buff, sizeof(uint32_t), VMD_REGISTER_OFFSET) != sizeof(uint32_t)) {
+               close(fd);
+               return 1;
+       }
+       close(fd);
+       return 0;
+}
+
+/*
+ * add_vmd_orom() - Adds VMD orom cap to orom list, writes orom_entry ptr into vmd_orom
+ * @vmd_orom: pointer to orom entry pointer
+ *
+ * Return: 0 on success, 1 on error
+ */
+int add_vmd_orom(struct orom_entry **vmd_orom, struct sys_dev *hba)
+{
+       uint8_t sku;
+       uint32_t vmd_register_data;
+       struct imsm_orom vmd_orom_cap = {
+               .signature = IMSM_VMD_OROM_COMPAT_SIGNATURE,
+               .sss = IMSM_OROM_SSS_4kB | IMSM_OROM_SSS_8kB |
+                                       IMSM_OROM_SSS_16kB | IMSM_OROM_SSS_32kB |
+                                       IMSM_OROM_SSS_64kB | IMSM_OROM_SSS_128kB,
+               .dpa = IMSM_OROM_DISKS_PER_ARRAY_NVME,
+               .tds = IMSM_OROM_TOTAL_DISKS_VMD,
+               .vpa = IMSM_OROM_VOLUMES_PER_ARRAY,
+               .vphba = IMSM_OROM_VOLUMES_PER_HBA_VMD,
+               .attr = IMSM_OROM_ATTR_2TB | IMSM_OROM_ATTR_2TB_DISK,
+               .driver_features = IMSM_OROM_CAPABILITIES_EnterpriseSystem |
+                                  IMSM_OROM_CAPABILITIES_TPV
+       };
+
+       if (read_vmd_register(&vmd_register_data, hba) != 0)
+               return 1;
+
+       sku = (uint8_t)((vmd_register_data >> VMD_REGISTER_SKU_SHIFT) &
+               VMD_REGISTER_SKU_MASK);
+
+       if (sku == VMD_REGISTER_SKU_PREMIUM)
+               vmd_orom_cap.rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 |
+                                  IMSM_OROM_RLC_RAID10 | IMSM_OROM_RLC_RAID5;
+       else
+               vmd_orom_cap.rlc = IMSM_OROM_RLC_RAID_CNG;
+
+       vmd_orom_cap.major_ver = (uint8_t)
+               ((vmd_register_data >> MD_REGISTER_VER_MAJOR_SHIFT) &
+                       MD_REGISTER_VER_MAJOR_MASK);
+       vmd_orom_cap.minor_ver = (uint8_t)
+               ((vmd_register_data >> MD_REGISTER_VER_MINOR_SHIFT) &
+                       MD_REGISTER_VER_MINOR_MASK);
+
+       *vmd_orom = add_orom(&vmd_orom_cap);
+
+       return 0;
+}
+
+const struct imsm_orom *find_imsm_vmd(struct sys_dev *hba)
+{
+       static struct orom_entry *vmd_orom;
+
+       if (hba->type != SYS_DEV_VMD)
+               return NULL;
+
+       if (!vmd_orom && add_vmd_orom(&vmd_orom, hba) != 0)
+               return NULL;
+
+       add_orom_device_id(vmd_orom, hba->dev_id);
+       vmd_orom->type = SYS_DEV_VMD;
+       return &vmd_orom->orom;
+}
+
 const struct imsm_orom *find_imsm_capability(struct sys_dev *hba)
 {
        const struct imsm_orom *cap = get_orom_by_device_id(hba->dev_id);
@@ -660,23 +867,117 @@ const struct imsm_orom *find_imsm_capability(struct sys_dev *hba)
 
        if (hba->type == SYS_DEV_NVME)
                return find_imsm_nvme(hba);
-       if ((cap = find_imsm_efi(hba)) != NULL)
+
+       cap = find_imsm_efi(hba);
+       if (cap)
                return cap;
-       if ((cap = find_imsm_hba_orom(hba)) != NULL)
+
+       if (hba->type == SYS_DEV_VMD) {
+               cap = find_imsm_vmd(hba);
+               if (cap)
+                       return cap;
+       }
+
+       cap = find_imsm_hba_orom(hba);
+       if (cap)
                return cap;
 
        return NULL;
 }
 
-char *devt_to_devpath(dev_t dev)
+/* Check whether the nvme device is represented by nvme subsytem,
+ * if yes virtual path should be changed to hardware device path,
+ * to allow IMSM capabilities detection.
+ * Returns:
+ *     hardware path to device - if the device is represented via
+ *             nvme virtual subsytem
+ *     NULL - if the device is not represented via nvme virtual subsytem
+ */
+char *get_nvme_multipath_dev_hw_path(const char *dev_path)
 {
-       char device[46];
+       DIR *dir;
+       struct dirent *ent;
+       char *rp = NULL;
+
+       if (strncmp(dev_path, NVME_SUBSYS_PATH, strlen(NVME_SUBSYS_PATH)) != 0)
+               return NULL;
+
+       dir = opendir(dev_path);
+       if (!dir)
+               return NULL;
 
-       sprintf(device, "/sys/dev/block/%d:%d/device", major(dev), minor(dev));
-       return realpath(device, NULL);
+       for (ent = readdir(dir); ent; ent = readdir(dir)) {
+               char buf[strlen(dev_path) + strlen(ent->d_name) + 1];
+
+               /* Check if dir is a controller, ignore namespaces*/
+               if (!(strncmp(ent->d_name, "nvme", 4) == 0) ||
+                   (strrchr(ent->d_name, 'n') != &ent->d_name[0]))
+                       continue;
+
+               sprintf(buf, "%s/%s", dev_path, ent->d_name);
+               rp = realpath(buf, NULL);
+               break;
+       }
+
+       closedir(dir);
+       return rp;
 }
 
-char *diskfd_to_devpath(int fd)
+/* Description: Return part or whole realpath for the dev
+ * Parameters:
+ *     dev - the device to be quered
+ *     dev_level - level of "/device" entries. It allows to caller to access
+ *                 virtual or physical devices which are on "path" to quered
+ *                 one.
+ *     buf - optional, must be PATH_MAX size. If set, then will be used.
+ */
+char *devt_to_devpath(dev_t dev, int dev_level, char *buf)
+{
+       char device[PATH_MAX];
+       char *hw_path;
+       int i;
+       unsigned long device_free_len = sizeof(device) - 1;
+       char dev_str[] = "/device";
+       unsigned long dev_str_len = strlen(dev_str);
+
+       snprintf(device, sizeof(device), "/sys/dev/block/%d:%d", major(dev),
+                minor(dev));
+
+       /* If caller wants block device, return path to it even if it is exposed
+        * via virtual layer.
+        */
+       if (dev_level == 0)
+               return realpath(device, buf);
+
+       device_free_len -= strlen(device);
+       for (i = 0; i < dev_level; i++) {
+               if (device_free_len < dev_str_len)
+                       return NULL;
+
+               strncat(device, dev_str, device_free_len);
+
+               /* Resolve nvme-subsystem abstraction if needed
+                */
+               device_free_len -= dev_str_len;
+               if (i == 0) {
+                       char rp[PATH_MAX];
+
+                       if (!realpath(device, rp))
+                               return NULL;
+                       hw_path = get_nvme_multipath_dev_hw_path(rp);
+                       if (hw_path) {
+                               strcpy(device, hw_path);
+                               device_free_len = sizeof(device) -
+                                                 strlen(device) - 1;
+                               free(hw_path);
+                       }
+               }
+       }
+
+       return realpath(device, buf);
+}
+
+char *diskfd_to_devpath(int fd, int dev_level, char *buf)
 {
        /* return the device path for a disk, return NULL on error or fd
         * refers to a partition
@@ -688,7 +989,7 @@ char *diskfd_to_devpath(int fd)
        if (!S_ISBLK(st.st_mode))
                return NULL;
 
-       return devt_to_devpath(st.st_rdev);
+       return devt_to_devpath(st.st_rdev, dev_level, buf);
 }
 
 int path_attached_to_hba(const char *disk_path, const char *hba_path)
@@ -713,7 +1014,7 @@ int path_attached_to_hba(const char *disk_path, const char *hba_path)
 
 int devt_attached_to_hba(dev_t dev, const char *hba_path)
 {
-       char *disk_path = devt_to_devpath(dev);
+       char *disk_path = devt_to_devpath(dev, 1, NULL);
        int rc = path_attached_to_hba(disk_path, hba_path);
 
        if (disk_path)
@@ -724,7 +1025,7 @@ int devt_attached_to_hba(dev_t dev, const char *hba_path)
 
 int disk_attached_to_hba(int fd, const char *hba_path)
 {
-       char *disk_path = diskfd_to_devpath(fd);
+       char *disk_path = diskfd_to_devpath(fd, 1, NULL);
        int rc = path_attached_to_hba(disk_path, hba_path);
 
        if (disk_path)
@@ -766,3 +1067,91 @@ char *vmd_domain_to_controller(struct sys_dev *hba, char *buf)
        closedir(dir);
        return NULL;
 }
+
+/* Scan over all controller's namespaces and compare nsid value to verify if
+ * current one is supported. The routine doesn't check IMSM capabilities for
+ * namespace. Only one nvme namespace is supported by IMSM.
+ * Paramteres:
+ *     fd - open descriptor to the nvme namespace
+ *     verbose - error logging level
+ * Returns:
+ *     1 - if namespace is supported
+ *     0 - otherwise
+ */
+int imsm_is_nvme_namespace_supported(int fd, int verbose)
+{
+       DIR *dir = NULL;
+       struct dirent *ent;
+       char cntrl_path[PATH_MAX];
+       char ns_path[PATH_MAX];
+       unsigned long long lowest_nsid = ULLONG_MAX;
+       unsigned long long this_nsid;
+       int rv = 0;
+
+
+       if (!diskfd_to_devpath(fd, 1, cntrl_path) ||
+           !diskfd_to_devpath(fd, 0, ns_path)) {
+               if (verbose)
+                       pr_err("Cannot get device paths\n");
+               goto abort;
+       }
+
+
+       if (devpath_to_ll(ns_path, "nsid", &this_nsid)) {
+               if (verbose)
+                       pr_err("Cannot read nsid value for %s",
+                              basename(ns_path));
+               goto abort;
+       }
+
+       dir = opendir(cntrl_path);
+       if (!dir)
+               goto abort;
+
+       /* The lowest nvme namespace is supported */
+       for (ent = readdir(dir); ent; ent = readdir(dir)) {
+               unsigned long long curr_nsid;
+               char curr_ns_path[PATH_MAX + 256];
+
+               if (!strstr(ent->d_name, "nvme"))
+                       continue;
+
+               snprintf(curr_ns_path, sizeof(curr_ns_path), "%s/%s",
+                        cntrl_path, ent->d_name);
+
+               if (devpath_to_ll(curr_ns_path, "nsid", &curr_nsid))
+                       goto abort;
+
+               if (lowest_nsid > curr_nsid)
+                       lowest_nsid = curr_nsid;
+       }
+
+       if (this_nsid == lowest_nsid)
+               rv = 1;
+       else if (verbose)
+               pr_err("IMSM is supported on the lowest NVMe namespace\n");
+
+abort:
+       if (dir)
+               closedir(dir);
+
+       return rv;
+}
+
+/* Verify if multipath is supported by NVMe controller
+ * Returns:
+ *     0 - not supported
+ *     1 - supported
+ */
+int is_multipath_nvme(int disk_fd)
+{
+       char ns_path[PATH_MAX];
+
+       if (!diskfd_to_devpath(disk_fd, 0, ns_path))
+               return 0;
+
+       if (strncmp(ns_path, NVME_SUBSYS_PATH, strlen(NVME_SUBSYS_PATH)) == 0)
+               return 1;
+
+       return 0;
+}
index 7cb370ef0adbdd81472424b9ebd0d76dc8662283..dcc5aaa74f21b8bde62683e197c7d5042331bb57 100644 (file)
 #include <asm/types.h>
 #include <strings.h>
 
+/* according to GUID format: "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" */
+#define GUID_STR_MAX   37
+
 /* The IMSM Capability (IMSM AHCI and ISCU OROM/EFI variable) Version Table definition */
 struct imsm_orom {
        __u8 signature[4];
        #define IMSM_OROM_SIGNATURE "$VER"
        #define IMSM_NVME_OROM_COMPAT_SIGNATURE "$NVM"
+       #define IMSM_VMD_OROM_COMPAT_SIGNATURE "$VMD"
        __u8 table_ver_major; /* Currently 2 (can change with future revs) */
        __u8 table_ver_minor; /* Currently 2 (can change with future revs) */
        __u16 major_ver; /* Example: 8 as in 8.6.0.1020 */
@@ -65,11 +69,13 @@ struct imsm_orom {
        __u16 tds; /* Total Disks Supported */
        #define IMSM_OROM_TOTAL_DISKS 6
        #define IMSM_OROM_TOTAL_DISKS_NVME 12
+       #define IMSM_OROM_TOTAL_DISKS_VMD 48
        __u8 vpa; /* # Volumes Per Array supported */
        #define IMSM_OROM_VOLUMES_PER_ARRAY 2
        __u8 vphba; /* # Volumes Per Host Bus Adapter supported */
        #define IMSM_OROM_VOLUMES_PER_HBA 4
        #define IMSM_OROM_VOLUMES_PER_HBA_NVME 4
+       #define IMSM_OROM_VOLUMES_PER_HBA_VMD 24
        /* Attributes supported. This should map to the
         * attributes in the MPB. Also, lower 16 bits
         * should match/duplicate RLC bits above.
@@ -103,25 +109,21 @@ struct imsm_orom {
        #define IMSM_OROM_CAPABILITIES_TPV (1 << 10)
 } __attribute__((packed));
 
-static inline int imsm_orom_has_raid0(const struct imsm_orom *orom)
-{
-       return !!(orom->rlc & IMSM_OROM_RLC_RAID0);
-}
-static inline int imsm_orom_has_raid1(const struct imsm_orom *orom)
-{
-       return !!(orom->rlc & IMSM_OROM_RLC_RAID1);
-}
-static inline int imsm_orom_has_raid1e(const struct imsm_orom *orom)
-{
-       return !!(orom->rlc & IMSM_OROM_RLC_RAID1E);
-}
-static inline int imsm_orom_has_raid10(const struct imsm_orom *orom)
-{
-       return !!(orom->rlc & IMSM_OROM_RLC_RAID10);
-}
-static inline int imsm_orom_has_raid5(const struct imsm_orom *orom)
+/* IMSM metadata requirements for each level */
+struct imsm_level_ops {
+       int level;
+       bool (*is_level_supported)(const struct imsm_orom *);
+       bool (*is_raiddisks_count_supported)(const int);
+       char *name;
+};
+
+extern struct imsm_level_ops imsm_level_ops[];
+
+static inline bool imsm_rlc_has_bit(const struct imsm_orom *orom, const unsigned short bit)
 {
-       return !!(orom->rlc & IMSM_OROM_RLC_RAID5);
+       if (orom->rlc & bit)
+               return true;
+       return false;
 }
 
 /**
@@ -182,7 +184,13 @@ static inline int imsm_orom_is_enterprise(const struct imsm_orom *orom)
 static inline int imsm_orom_is_nvme(const struct imsm_orom *orom)
 {
        return memcmp(orom->signature, IMSM_NVME_OROM_COMPAT_SIGNATURE,
-                       sizeof(orom->signature)) == 0;
+                     sizeof(orom->signature)) == 0;
+}
+
+static inline int imsm_orom_is_vmd_without_efi(const struct imsm_orom *orom)
+{
+       return memcmp(orom->signature, IMSM_VMD_OROM_COMPAT_SIGNATURE,
+                     sizeof(orom->signature)) == 0;
 }
 
 static inline int imsm_orom_has_tpv_support(const struct imsm_orom *orom)
@@ -196,6 +204,7 @@ enum sys_dev_type {
        SYS_DEV_SATA,
        SYS_DEV_NVME,
        SYS_DEV_VMD,
+       SYS_DEV_SATA_VMD,
        SYS_DEV_MAX
 };
 
@@ -228,7 +237,7 @@ extern struct orom_entry *orom_entries;
 
 static inline char *guid_str(char *buf, struct efi_guid guid)
 {
-       sprintf(buf, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+       snprintf(buf, GUID_STR_MAX, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
                 guid.b[3], guid.b[2], guid.b[1], guid.b[0],
                 guid.b[5], guid.b[4], guid.b[7], guid.b[6],
                 guid.b[8], guid.b[9], guid.b[10], guid.b[11],
@@ -236,7 +245,10 @@ static inline char *guid_str(char *buf, struct efi_guid guid)
        return buf;
 }
 
-char *diskfd_to_devpath(int fd);
+char *get_nvme_multipath_dev_hw_path(const char *dev_path);
+char *diskfd_to_devpath(int fd, int dev_level, char *buf);
+int devpath_to_char(const char *dev_path, const char *entry, char *buf,
+                   int len, int verbose);
 __u16 devpath_to_vendor(const char *dev_path);
 struct sys_dev *find_driver_devices(const char *bus, const char *driver);
 struct sys_dev *find_intel_devices(void);
@@ -244,11 +256,12 @@ const struct imsm_orom *find_imsm_capability(struct sys_dev *hba);
 const struct imsm_orom *find_imsm_orom(void);
 int disk_attached_to_hba(int fd, const char *hba_path);
 int devt_attached_to_hba(dev_t dev, const char *hba_path);
-char *devt_to_devpath(dev_t dev);
+char *devt_to_devpath(dev_t dev, int dev_level, char *buf);
 int path_attached_to_hba(const char *disk_path, const char *hba_path);
-const char *get_sys_dev_type(enum sys_dev_type);
 const struct orom_entry *get_orom_entry_by_device_id(__u16 dev_id);
 const struct imsm_orom *get_orom_by_device_id(__u16 device_id);
 struct sys_dev *device_by_id(__u16 device_id);
 struct sys_dev *device_by_id_and_path(__u16 device_id, const char *path);
+int is_multipath_nvme(int disk_fd);
+int imsm_is_nvme_namespace_supported(int disk_fd, int verbose);
 char *vmd_domain_to_controller(struct sys_dev *hba, char *buf);
index 3c53bd35e0b19a693ff84b81a927f27c3084048d..dfaafdc07cdcd73af6e810f09d258b42ff8909bb 100644 (file)
--- a/policy.c
+++ b/policy.c
@@ -365,7 +365,6 @@ struct dev_policy *path_policy(char **paths, char *type)
 {
        struct pol_rule *rules;
        struct dev_policy *pol = NULL;
-       int i;
 
        rules = config_rules;
 
@@ -380,23 +379,104 @@ struct dev_policy *path_policy(char **paths, char *type)
                rules = rules->next;
        }
 
-       /* Now add any metadata-specific internal knowledge
-        * about this path
-        */
-       for (i=0; paths && paths[0] && superlist[i]; i++)
-               if (superlist[i]->get_disk_controller_domain) {
-                       const char *d =
-                               superlist[i]->get_disk_controller_domain(
-                                       paths[0]);
-                       if (d)
-                               pol_new(&pol, pol_domain, d, superlist[i]->name);
-               }
-
        pol_sort(&pol);
        pol_dedup(pol);
        return pol;
 }
 
+/**
+ * drive_test_and_add_policies() - get policies for drive and add them to pols.
+ * @st: supertype.
+ * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL.
+ * @fd: device descriptor.
+ * @verbose: verbose flag.
+ *
+ * If supertype doesn't support this functionality return success. Use metadata handler to get
+ * policies.
+ */
+mdadm_status_t drive_test_and_add_policies(struct supertype *st, dev_policy_t **pols, int fd,
+                                          const int verbose)
+{
+       if (!st->ss->test_and_add_drive_policies)
+               return MDADM_STATUS_SUCCESS;
+
+       if (st->ss->test_and_add_drive_policies(pols, fd, verbose) == MDADM_STATUS_SUCCESS) {
+               /* After successful call list cannot be empty */
+               assert(*pols);
+               return MDADM_STATUS_SUCCESS;
+       }
+
+       return MDADM_STATUS_ERROR;
+}
+
+/**
+ * sysfs_test_and_add_policies() - get policies for mddev and add them to pols.
+ * @st: supertype.
+ * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL.
+ * @mdi: mdinfo describes the MD array, must have GET_DISKS option.
+ * @verbose: verbose flag.
+ *
+ * If supertype doesn't support this functionality return success. To get policies, all disks
+ * connected to mddev are analyzed.
+ */
+mdadm_status_t sysfs_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols,
+                                                struct mdinfo *mdi, const int verbose)
+{
+       struct mdinfo *sd;
+
+       if (!st->ss->test_and_add_drive_policies)
+               return MDADM_STATUS_SUCCESS;
+
+       for (sd = mdi->devs; sd; sd = sd->next) {
+               char *devpath = map_dev(sd->disk.major, sd->disk.minor, 0);
+               int fd = dev_open(devpath, O_RDONLY);
+               int rv;
+
+               if (!is_fd_valid(fd)) {
+                       pr_err("Cannot open fd for %s\n", devpath);
+                       return MDADM_STATUS_ERROR;
+               }
+
+               rv = drive_test_and_add_policies(st, pols, fd, verbose);
+               close(fd);
+
+               if (rv)
+                       return MDADM_STATUS_ERROR;
+       }
+
+       return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * mddev_test_and_add_policies() - get policies for mddev and add them to pols.
+ * @st: supertype.
+ * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL.
+ * @array_fd: MD device descriptor.
+ * @verbose: verbose flag.
+ *
+ * If supertype doesn't support this functionality return success. Use fd to extract disks.
+ */
+mdadm_status_t mddev_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols,
+                                                int array_fd, const int verbose)
+{
+       struct mdinfo *sra;
+       int ret;
+
+       if (!st->ss->test_and_add_drive_policies)
+               return MDADM_STATUS_SUCCESS;
+
+       sra = sysfs_read(array_fd, NULL, GET_DEVS);
+       if (!sra) {
+               pr_err("Cannot load sysfs for %s\n", fd2devnm(array_fd));
+               return MDADM_STATUS_ERROR;
+       }
+
+       ret = sysfs_test_and_add_drive_policies(st, pols, sra, verbose);
+
+       sysfs_free(sra);
+       return ret;
+}
+
 void pol_add(struct dev_policy **pol,
                    char *name, char *val,
                    char *metadata)
@@ -679,7 +759,6 @@ int domain_test(struct domainlist *dom, struct dev_policy *pol,
         *  1:  has domains, all match
         */
        int found_any = -1;
-       int has_one_domain = 1;
        struct dev_policy *p;
 
        pol = pol_find(pol, pol_domain);
@@ -689,9 +768,6 @@ int domain_test(struct domainlist *dom, struct dev_policy *pol,
                        dom = dom->next;
                if (!dom || strcmp(dom->dom, p->value) != 0)
                        return 0;
-               if (has_one_domain && metadata && strcmp(metadata, "imsm") == 0)
-                       found_any = -1;
-               has_one_domain = 0;
        }
        return found_any;
 }
@@ -761,7 +837,7 @@ void policy_save_path(char *id_path, struct map_ent *array)
                return;
        }
 
-       if (fprintf(f, "%s %08x:%08x:%08x:%08x\n",
+       if (fprintf(f, "%20s %08x:%08x:%08x:%08x\n",
                    array->metadata,
                    array->uuid[0], array->uuid[1],
                    array->uuid[2], array->uuid[3]) <= 0)
@@ -784,7 +860,7 @@ int policy_check_path(struct mdinfo *disk, struct map_ent *array)
                if (!f)
                        continue;
 
-               rv = fscanf(f, " %s %x:%x:%x:%x\n",
+               rv = fscanf(f, " %20s %x:%x:%x:%x\n",
                            array->metadata,
                            array->uuid,
                            array->uuid+1,
index 7ea04c7ade280ba54b30769d0a86be3307a5bbcd..94c80c2cc6d0f24fa4c9a370608f2d130006636b 100644 (file)
@@ -22,7 +22,6 @@
 #include "probe_roms.h"
 #include "mdadm.h"
 #include <unistd.h>
-#include <signal.h>
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
@@ -69,7 +68,8 @@ static int probe_address16(const __u16 *ptr, __u16 *val)
 
 void probe_roms_exit(void)
 {
-       signal(SIGBUS, SIG_DFL);
+       signal_s(SIGBUS, SIG_DFL);
+
        if (rom_fd >= 0) {
                close(rom_fd);
                rom_fd = -1;
@@ -98,7 +98,7 @@ int probe_roms_init(unsigned long align)
        if (roms_init())
                return -1;
 
-       if (signal(SIGBUS, sigbus) == SIG_ERR)
+       if (signal_s(SIGBUS, sigbus) == SIG_ERR)
                rc = -1;
        if (rc == 0) {
                fd = open("/dev/mem", O_RDONLY);
index 5003343082f0226d2c283be43aa84dde9bc8dbd2..8999ca89e951d25c28af13ecc439adbda7d45886 100644 (file)
@@ -86,7 +86,7 @@ The latest version of
 .I raid6check
 should always be available from
 .IP
-.B http://www.kernel.org/pub/linux/utils/raid/mdadm/
+.B https://www.kernel.org/pub/linux/utils/raid/mdadm/
 .PP
 Related man pages:
 .PP
index a8e6005bc1be5f6936cd4242b0921156578390b3..99477761c640b696bc1a36df10747b5defe54025 100644 (file)
@@ -24,7 +24,6 @@
 
 #include "mdadm.h"
 #include <stdint.h>
-#include <signal.h>
 #include <sys/mman.h>
 
 #define CHECK_PAGE_BITS (12)
@@ -130,30 +129,36 @@ void raid6_stats(int *disk, int *results, int raid_disks, int chunk_size)
 }
 
 int lock_stripe(struct mdinfo *info, unsigned long long start,
-               int chunk_size, int data_disks, sighandler_t *sig) {
+               int chunk_size, int data_disks, sighandler_t *sig)
+{
        int rv;
+
+       sig[0] = signal_s(SIGTERM, SIG_IGN);
+       sig[1] = signal_s(SIGINT, SIG_IGN);
+       sig[2] = signal_s(SIGQUIT, SIG_IGN);
+
+       if (sig[0] == SIG_ERR || sig[1] == SIG_ERR || sig[2] == SIG_ERR)
+               return 1;
+
        if(mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
                return 2;
        }
 
-       sig[0] = signal(SIGTERM, SIG_IGN);
-       sig[1] = signal(SIGINT, SIG_IGN);
-       sig[2] = signal(SIGQUIT, SIG_IGN);
-
        rv = sysfs_set_num(info, NULL, "suspend_lo", start * chunk_size * data_disks);
        rv |= sysfs_set_num(info, NULL, "suspend_hi", (start + 1) * chunk_size * data_disks);
        return rv * 256;
 }
 
-int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig) {
+int unlock_all_stripes(struct mdinfo *info, sighandler_t *sig)
+{
        int rv;
        rv = sysfs_set_num(info, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
        rv |= sysfs_set_num(info, NULL, "suspend_hi", 0);
        rv |= sysfs_set_num(info, NULL, "suspend_lo", 0);
 
-       signal(SIGQUIT, sig[2]);
-       signal(SIGINT, sig[1]);
-       signal(SIGTERM, sig[0]);
+       signal_s(SIGQUIT, sig[2]);
+       signal_s(SIGINT, sig[1]);
+       signal_s(SIGTERM, sig[0]);
 
        if(munlockall() != 0)
                return 3;
index 31b07e89f318191ec97c39bda899dedbd7d2fbf6..a7a7229fc06ba4a7ced0ecde0f5b003aef49330a 100644 (file)
@@ -333,7 +333,7 @@ void make_tables(void)
 
        /* Compute log and inverse log */
        /* Modified code from:
-        *    http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html
+        *    https://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html
         */
        b = 1;
        raid6_gflog[0] = 0;
@@ -866,8 +866,16 @@ int test_stripes(int *source, unsigned long long *offsets,
                int disk;
 
                for (i = 0 ; i < raid_disks ; i++) {
-                       lseek64(source[i], offsets[i]+start, 0);
-                       read(source[i], stripes[i], chunk_size);
+                       if ((lseek64(source[i], offsets[i]+start, 0) < 0) ||
+                           (read(source[i], stripes[i], chunk_size) !=
+                            chunk_size)) {
+                               free(q);
+                               free(p);
+                               free(blocks);
+                               free(stripes);
+                               free(stripe_buf);
+                               return -1;
+                       }
                }
                for (i = 0 ; i < data_disks ; i++) {
                        int disk = geo_map(i, start/chunk_size, raid_disks,
diff --git a/sha1.c b/sha1.c
index 11be7045abfa42050f4ebc586574e6fb8b99df4b..1e4ad5d980f9e06e0d1b63cb869a3e8530db2bff 100644 (file)
--- a/sha1.c
+++ b/sha1.c
@@ -229,7 +229,17 @@ sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
   if (len >= 64)
     {
 #if !_STRING_ARCH_unaligned
-# define alignof(type) offsetof (struct { char c; type x; }, x)
+/* GCC releases before GCC 4.9 had a bug in _Alignof.  See GCC bug 52023
+   <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52023>.
+   clang versions < 8.0.0 have the same bug.  */
+# if (!defined __STDC_VERSION__ || __STDC_VERSION__ < 201112 \
+      || (defined __GNUC__ && __GNUC__ < 4 + (__GNUC_MINOR__ < 9) \
+   && !defined __clang__) \
+      || (defined __clang__ && __clang_major__ < 8))
+#  define alignof(type) offsetof (struct { char c; type x; }, x)
+# else
+#  define alignof(type) _Alignof(type)
+# endif
 # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
       if (UNALIGNED_P (buffer))
        while (len > 64)
@@ -258,7 +268,7 @@ sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
        {
          sha1_process_block (ctx->buffer, 64, ctx);
          left_over -= 64;
-         memcpy (ctx->buffer, &ctx->buffer[16], left_over);
+         memmove (ctx->buffer, &ctx->buffer[16], left_over);
        }
       ctx->buflen = left_over;
     }
index 7cd5702d81fa3547c3f7f070058ba83509fea883..21426c753c6dff4d9dfdba152bb2fb8ac503f107 100644 (file)
@@ -503,13 +503,6 @@ struct ddf_super {
 static int load_super_ddf_all(struct supertype *st, int fd,
                              void **sbp, char *devname);
 static int get_svd_state(const struct ddf_super *, const struct vcl *);
-static int
-validate_geometry_ddf_container(struct supertype *st,
-                               int level, int layout, int raiddisks,
-                               int chunk, unsigned long long size,
-                               unsigned long long data_offset,
-                               char *dev, unsigned long long *freesize,
-                               int verbose);
 
 static int validate_geometry_ddf_bvd(struct supertype *st,
                                     int level, int layout, int raiddisks,
@@ -1477,13 +1470,13 @@ static void examine_vds(struct ddf_super *sb)
                printf("\n");
                printf("         unit[%d] : %d\n", i, be16_to_cpu(ve->unit));
                printf("        state[%d] : %s, %s%s\n", i,
-                      map_num(ddf_state, ve->state & 7),
+                      map_num_s(ddf_state, ve->state & 7),
                       (ve->state & DDF_state_morphing) ? "Morphing, ": "",
                       (ve->state & DDF_state_inconsistent)? "Not Consistent" : "Consistent");
                printf("   init state[%d] : %s\n", i,
-                      map_num(ddf_init_state, ve->init_state&DDF_initstate_mask));
+                      map_num_s(ddf_init_state, ve->init_state & DDF_initstate_mask));
                printf("       access[%d] : %s\n", i,
-                      map_num(ddf_access, (ve->init_state & DDF_access_mask) >> 6));
+                      map_num_s(ddf_access, (ve->init_state & DDF_access_mask) >> 6));
                printf("         Name[%d] : %.16s\n", i, ve->name);
                examine_vd(i, sb, ve->guid);
        }
@@ -1599,15 +1592,20 @@ static unsigned int get_vd_num_of_subarray(struct supertype *st)
        sra = sysfs_read(-1, st->devnm, GET_VERSION);
        if (!sra || sra->array.major_version != -1 ||
            sra->array.minor_version != -2 ||
-           !is_subarray(sra->text_version))
+           !is_subarray(sra->text_version)) {
+               if (sra)
+                       sysfs_free(sra);
                return DDF_NOTFOUND;
+       }
 
        sub = strchr(sra->text_version + 1, '/');
        if (sub != NULL)
                vcnum = strtoul(sub + 1, &end, 10);
        if (sub == NULL || *sub == '\0' || *end != '\0' ||
-           vcnum >= be16_to_cpu(ddf->active->max_vd_entries))
+           vcnum >= be16_to_cpu(ddf->active->max_vd_entries)) {
+               sysfs_free(sra);
                return DDF_NOTFOUND;
+       }
 
        return vcnum;
 }
@@ -1619,7 +1617,7 @@ static void brief_examine_super_ddf(struct supertype *st, int verbose)
        struct mdinfo info;
        char nbuf[64];
        getinfo_super_ddf(st, &info, NULL);
-       fname_from_uuid(st, &info, nbuf, ':');
+       fname_from_uuid(&info, nbuf);
 
        printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
 }
@@ -1634,7 +1632,7 @@ static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
        unsigned int i;
        char nbuf[64];
        getinfo_super_ddf(st, &info, NULL);
-       fname_from_uuid(st, &info, nbuf, ':');
+       fname_from_uuid(&info, nbuf);
 
        for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
                struct virtual_entry *ve = &ddf->virt->entries[i];
@@ -1647,10 +1645,10 @@ static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
                ddf->currentconf =&vcl;
                vcl.vcnum = i;
                uuid_from_super_ddf(st, info.uuid);
-               fname_from_uuid(st, &info, nbuf1, ':');
+               fname_from_uuid(&info, nbuf1);
                _ddf_array_name(namebuf, ddf, i);
                printf("ARRAY%s%s container=%s member=%d UUID=%s\n",
-                      namebuf[0] == '\0' ? "" : " /dev/md/", namebuf,
+                      namebuf[0] == '\0' ? "" : " " DEV_MD_DIR, namebuf,
                       nbuf+5, i, nbuf1+5);
        }
 }
@@ -1660,7 +1658,7 @@ static void export_examine_super_ddf(struct supertype *st)
        struct mdinfo info;
        char nbuf[64];
        getinfo_super_ddf(st, &info, NULL);
-       fname_from_uuid(st, &info, nbuf, ':');
+       fname_from_uuid(&info, nbuf);
        printf("MD_METADATA=ddf\n");
        printf("MD_LEVEL=container\n");
        printf("MD_UUID=%s\n", nbuf+5);
@@ -1800,7 +1798,7 @@ static void brief_detail_super_ddf(struct supertype *st, char *subarray)
                return;
        else
                uuid_of_ddf_subarray(ddf, vcnum, info.uuid);
-       fname_from_uuid(st, &info, nbuf,':');
+       fname_from_uuid(&info, nbuf);
        printf(" UUID=%s", nbuf + 5);
 }
 
@@ -1986,12 +1984,14 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *m
                info->disk.number = be32_to_cpu(ddf->dlist->disk.refnum);
                info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
 
+               if (info->disk.raid_disk < 0)
+                       return;
+
                info->data_offset = be64_to_cpu(ddf->phys->
                                                  entries[info->disk.raid_disk].
                                                  config_size);
                info->component_size = ddf->dlist->size - info->data_offset;
-               if (info->disk.raid_disk >= 0)
-                       pde = ddf->phys->entries + info->disk.raid_disk;
+               pde = ddf->phys->entries + info->disk.raid_disk;
                if (pde &&
                    !(be16_to_cpu(pde->state) & DDF_Failed) &&
                    !(be16_to_cpu(pde->state) & DDF_Missing))
@@ -2146,75 +2146,6 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, cha
                }
 }
 
-static int update_super_ddf(struct supertype *st, struct mdinfo *info,
-                           char *update,
-                           char *devname, int verbose,
-                           int uuid_set, char *homehost)
-{
-       /* For 'assemble' and 'force' we need to return non-zero if any
-        * change was made.  For others, the return value is ignored.
-        * Update options are:
-        *  force-one : This device looks a bit old but needs to be included,
-        *        update age info appropriately.
-        *  assemble: clear any 'faulty' flag to allow this device to
-        *              be assembled.
-        *  force-array: Array is degraded but being forced, mark it clean
-        *         if that will be needed to assemble it.
-        *
-        *  newdev:  not used ????
-        *  grow:  Array has gained a new device - this is currently for
-        *              linear only
-        *  resync: mark as dirty so a resync will happen.
-        *  uuid:  Change the uuid of the array to match what is given
-        *  homehost:  update the recorded homehost
-        *  name:  update the name - preserving the homehost
-        *  _reshape_progress: record new reshape_progress position.
-        *
-        * Following are not relevant for this version:
-        *  sparc2.2 : update from old dodgey metadata
-        *  super-minor: change the preferred_minor number
-        *  summaries:  update redundant counters.
-        */
-       int rv = 0;
-//     struct ddf_super *ddf = st->sb;
-//     struct vd_config *vd = find_vdcr(ddf, info->container_member);
-//     struct virtual_entry *ve = find_ve(ddf);
-
-       /* we don't need to handle "force-*" or "assemble" as
-        * there is no need to 'trick' the kernel.  When the metadata is
-        * first updated to activate the array, all the implied modifications
-        * will just happen.
-        */
-
-       if (strcmp(update, "grow") == 0) {
-               /* FIXME */
-       } else if (strcmp(update, "resync") == 0) {
-//             info->resync_checkpoint = 0;
-       } else if (strcmp(update, "homehost") == 0) {
-               /* homehost is stored in controller->vendor_data,
-                * or it is when we are the vendor
-                */
-//             if (info->vendor_is_local)
-//                     strcpy(ddf->controller.vendor_data, homehost);
-               rv = -1;
-       } else if (strcmp(update, "name") == 0) {
-               /* name is stored in virtual_entry->name */
-//             memset(ve->name, ' ', 16);
-//             strncpy(ve->name, info->name, 16);
-               rv = -1;
-       } else if (strcmp(update, "_reshape_progress") == 0) {
-               /* We don't support reshape yet */
-       } else if (strcmp(update, "assemble") == 0 ) {
-               /* Do nothing, just succeed */
-               rv = 0;
-       } else
-               rv = -1;
-
-//     update_all_csum(ddf);
-
-       return rv;
-}
-
 static void make_header_guid(char *guid)
 {
        be32 stamp;
@@ -2435,8 +2366,7 @@ static int init_super_ddf(struct supertype *st,
         * Remaining 16 are serial number.... maybe a hostname would do?
         */
        memcpy(ddf->controller.guid, T10, sizeof(T10));
-       gethostname(hostname, sizeof(hostname));
-       hostname[sizeof(hostname) - 1] = 0;
+       s_gethostname(hostname, sizeof(hostname));
        hostlen = strlen(hostname);
        memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
        for (i = strlen(T10) ; i+hostlen < 24; i++)
@@ -2637,9 +2567,11 @@ static int init_super_ddf_bvd(struct supertype *st,
                ve->init_state = DDF_init_not;
 
        memset(ve->pad1, 0xff, 14);
-       memset(ve->name, ' ', 16);
-       if (name)
-               strncpy(ve->name, name, 16);
+       memset(ve->name, '\0', sizeof(ve->name));
+       if (name) {
+               int l = strnlen(name, sizeof(ve->name));
+               memcpy(ve->name, name, l);
+       }
        ddf->virt->populated_vdes =
                cpu_to_be16(be16_to_cpu(ddf->virt->populated_vdes)+1);
 
@@ -3320,6 +3252,42 @@ static int reserve_space(struct supertype *st, int raiddisks,
        return 1;
 }
 
+static int
+validate_geometry_ddf_container(struct supertype *st,
+                               int level, int raiddisks,
+                               unsigned long long data_offset,
+                               char *dev, unsigned long long *freesize,
+                               int verbose)
+{
+       int fd;
+       unsigned long long ldsize;
+
+       if (!is_container(level))
+               return 0;
+       if (!dev)
+               return 1;
+
+       fd = dev_open(dev, O_RDONLY|O_EXCL);
+       if (fd < 0) {
+               if (verbose)
+                       pr_err("ddf: Cannot open %s: %s\n",
+                              dev, strerror(errno));
+               return 0;
+       }
+       if (!get_dev_size(fd, dev, &ldsize)) {
+               close(fd);
+               return 0;
+       }
+       close(fd);
+       if (freesize) {
+               *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
+               if (*freesize == 0)
+                       return 0;
+       }
+
+       return 1;
+}
+
 static int validate_geometry_ddf(struct supertype *st,
                                 int level, int layout, int raiddisks,
                                 int *chunk, unsigned long long size,
@@ -3338,20 +3306,18 @@ static int validate_geometry_ddf(struct supertype *st,
         * If given BVDs, we make an SVD, changing all the GUIDs in the process.
         */
 
-       if (*chunk == UnSet)
-               *chunk = DEFAULT_CHUNK;
-
        if (level == LEVEL_NONE)
                level = LEVEL_CONTAINER;
-       if (level == LEVEL_CONTAINER) {
+       if (is_container(level)) {
                /* Must be a fresh device to add to a container */
-               return validate_geometry_ddf_container(st, level, layout,
-                                                      raiddisks, *chunk,
-                                                      size, data_offset, dev,
-                                                      freesize,
-                                                      verbose);
+               return validate_geometry_ddf_container(st, level, raiddisks,
+                                                      data_offset, dev,
+                                                      freesize, verbose);
        }
 
+       if (*chunk == UnSet)
+               *chunk = DEFAULT_CHUNK;
+
        if (!dev) {
                mdu_array_info_t array = {
                        .level = level,
@@ -3447,42 +3413,6 @@ static int validate_geometry_ddf(struct supertype *st,
        return 1;
 }
 
-static int
-validate_geometry_ddf_container(struct supertype *st,
-                               int level, int layout, int raiddisks,
-                               int chunk, unsigned long long size,
-                               unsigned long long data_offset,
-                               char *dev, unsigned long long *freesize,
-                               int verbose)
-{
-       int fd;
-       unsigned long long ldsize;
-
-       if (level != LEVEL_CONTAINER)
-               return 0;
-       if (!dev)
-               return 1;
-
-       fd = open(dev, O_RDONLY|O_EXCL, 0);
-       if (fd < 0) {
-               if (verbose)
-                       pr_err("ddf: Cannot open %s: %s\n",
-                              dev, strerror(errno));
-               return 0;
-       }
-       if (!get_dev_size(fd, dev, &ldsize)) {
-               close(fd);
-               return 0;
-       }
-       close(fd);
-
-       *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
-       if (*freesize == 0)
-               return 0;
-
-       return 1;
-}
-
 static int validate_geometry_ddf_bvd(struct supertype *st,
                                     int level, int layout, int raiddisks,
                                     int *chunk, unsigned long long size,
@@ -3495,7 +3425,7 @@ static int validate_geometry_ddf_bvd(struct supertype *st,
        struct dl *dl;
        unsigned long long maxsize;
        /* ddf/bvd supports lots of things, but not containers */
-       if (level == LEVEL_CONTAINER) {
+       if (is_container(level)) {
                if (verbose)
                        pr_err("DDF cannot create a container within an container\n");
                return 0;
@@ -3914,7 +3844,8 @@ static int store_super_ddf(struct supertype *st, int fd)
        return 0;
 }
 
-static int compare_super_ddf(struct supertype *st, struct supertype *tst)
+static int compare_super_ddf(struct supertype *st, struct supertype *tst,
+                            int verbose)
 {
        /*
         * return:
@@ -4055,20 +3986,19 @@ static int compare_super_ddf(struct supertype *st, struct supertype *tst)
  * We need to confirm that the array matches the metadata in 'c' so
  * that we don't corrupt any metadata.
  */
-static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
+static int ddf_open_new(struct supertype *c, struct active_array *a, int inst)
 {
        struct ddf_super *ddf = c->sb;
-       int n = atoi(inst);
        struct mdinfo *dev;
        struct dl *dl;
        static const char faulty[] = "faulty";
 
-       if (all_ff(ddf->virt->entries[n].guid)) {
-               pr_err("subarray %d doesn't exist\n", n);
+       if (all_ff(ddf->virt->entries[inst].guid)) {
+               pr_err("subarray %d doesn't exist\n", inst);
                return -ENODEV;
        }
-       dprintf("new subarray %d, GUID: %s\n", n,
-               guid_str(ddf->virt->entries[n].guid));
+       dprintf("new subarray %d, GUID: %s\n", inst,
+               guid_str(ddf->virt->entries[inst].guid));
        for (dev = a->info.devs; dev; dev = dev->next) {
                for (dl = ddf->dlist; dl; dl = dl->next)
                        if (dl->major == dev->disk.major &&
@@ -4076,13 +4006,13 @@ static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
                                break;
                if (!dl || dl->pdnum < 0) {
                        pr_err("device %d/%d of subarray %d not found in meta data\n",
-                               dev->disk.major, dev->disk.minor, n);
+                               dev->disk.major, dev->disk.minor, inst);
                        return -1;
                }
                if ((be16_to_cpu(ddf->phys->entries[dl->pdnum].state) &
                        (DDF_Online|DDF_Missing|DDF_Failed)) != DDF_Online) {
                        pr_err("new subarray %d contains broken device %d/%d (%02x)\n",
-                              n, dl->major, dl->minor,
+                              inst, dl->major, dl->minor,
                               be16_to_cpu(ddf->phys->entries[dl->pdnum].state));
                        if (write(dev->state_fd, faulty, sizeof(faulty)-1) !=
                            sizeof(faulty) - 1)
@@ -4090,7 +4020,7 @@ static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
                        dev->curr_state = DS_FAULTY;
                }
        }
-       a->info.container_member = n;
+       a->info.container_member = inst;
        return 0;
 }
 
@@ -4911,7 +4841,7 @@ static int raid10_degraded(struct mdinfo *info)
                        pr_err("BUG: invalid raid disk\n");
                        goto out;
                }
-               if (d->state_fd > 0)
+               if (is_fd_valid(d->state_fd))
                        found[i]++;
        }
        ret = 2;
@@ -5122,13 +5052,16 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
         */
        vc = find_vdcr(ddf, a->info.container_member, rv->disk.raid_disk,
                       &n_bvd, &vcl);
-       if (vc == NULL)
+       if (vc == NULL) {
+               free(rv);
                return NULL;
+       }
 
        mu = xmalloc(sizeof(*mu));
        if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
                free(mu);
-               mu = NULL;
+               free(rv);
+               return NULL;
        }
 
        mu->len = ddf->conf_rec_len * 512 * vcl->conf.sec_elmnt_count;
@@ -5158,6 +5091,8 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
                        pr_err("BUG: can't find disk %d (%d/%d)\n",
                               di->disk.raid_disk,
                               di->disk.major, di->disk.minor);
+                       free(mu);
+                       free(rv);
                        return NULL;
                }
                vc->phys_refnum[i_prim] = ddf->phys->entries[dl->pdnum].refnum;
@@ -5213,7 +5148,6 @@ struct superswitch super_ddf = {
        .match_home     = match_home_ddf,
        .uuid_from_super= uuid_from_super_ddf,
        .getinfo_super  = getinfo_super_ddf,
-       .update_super   = update_super_ddf,
 
        .avail_size     = avail_size_ddf,
 
@@ -5228,6 +5162,7 @@ struct superswitch super_ddf = {
        .default_geometry = default_geometry_ddf,
 
        .external       = 1,
+       .swapuuid       = 0,
 
 /* for mdmon */
        .open_new       = ddf_open_new,
index e4d2122daeeef218e582cfc320b427c1fd1c7d4c..2b8b6fda976c57688a9d88caa06df8e2b0ed0e51 100644 (file)
 #define HAVE_STDINT_H 1
 #include "mdadm.h"
 #include "mdmon.h"
+#include "dlink.h"
 #include "sha1.h"
 #include "platform-intel.h"
 #include <values.h>
 #include <scsi/sg.h>
 #include <ctype.h>
 #include <dirent.h>
+#include "drive_encryption.h"
 
 /* MPB == Metadata Parameter Block */
 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
-#define MPB_VERSION_RAID0 "1.0.00"
-#define MPB_VERSION_RAID1 "1.1.00"
-#define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
-#define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
-#define MPB_VERSION_RAID5 "1.2.02"
-#define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
-#define MPB_VERSION_CNG "1.2.06"
+
+/* Legacy IMSM versions:
+ * MPB_VERSION_RAID0 1.0.00
+ * MPB_VERSION_RAID1 1.1.00
+ * MPB_VERSION_MANY_VOLUMES_PER_ARRAY 1.2.00
+ * MPB_VERSION_3OR4_DISK_ARRAY 1.2.01
+ * MPB_VERSION_RAID5 1.2.02
+ * MPB_VERSION_5OR6_DISK_ARRAY 1.2.04
+ * MPB_VERSION_CNG 1.2.06
+ */
+
 #define MPB_VERSION_ATTRIBS "1.3.00"
+#define MPB_VERSION_ATTRIBS_JD "2.0.00"
 #define MAX_SIGNATURE_LENGTH  32
 #define MAX_RAID_SERIAL_LEN   16
 
@@ -55,6 +62,8 @@
 #define MPB_ATTRIB_RAIDCNG             __cpu_to_le32(0x00000020)
 /* supports expanded stripe sizes of  256K, 512K and 1MB */
 #define MPB_ATTRIB_EXP_STRIPE_SIZE     __cpu_to_le32(0x00000040)
+/* supports RAID10 with more than 4 drives */
+#define MPB_ATTRIB_RAID10_EXT          __cpu_to_le32(0x00000080)
 
 /* The OROM Support RST Caching of Volumes */
 #define MPB_ATTRIB_NVM                 __cpu_to_le32(0x02000000)
@@ -82,6 +91,7 @@
                                        MPB_ATTRIB_RAID10          | \
                                        MPB_ATTRIB_RAID5           | \
                                        MPB_ATTRIB_EXP_STRIPE_SIZE | \
+                                       MPB_ATTRIB_RAID10_EXT      | \
                                        MPB_ATTRIB_BBM)
 
 /* Define attributes that are unused but not harmful */
                                                   * mutliple PPL area
                                                   */
 
+/*
+ * Internal Write-intent bitmap is stored in the same area where PPL.
+ * Both features are mutually exclusive, so it is not an issue.
+ * The first 8KiB of the area are reserved and shall not be used.
+ */
+#define IMSM_BITMAP_AREA_RESERVED_SIZE 8192
+
+#define IMSM_BITMAP_HEADER_OFFSET (IMSM_BITMAP_AREA_RESERVED_SIZE)
+#define IMSM_BITMAP_HEADER_SIZE MAX_SECTOR_SIZE
+
+#define IMSM_BITMAP_START_OFFSET (IMSM_BITMAP_HEADER_OFFSET + IMSM_BITMAP_HEADER_SIZE)
+#define IMSM_BITMAP_AREA_SIZE (MULTIPLE_PPL_AREA_SIZE_IMSM - IMSM_BITMAP_START_OFFSET)
+#define IMSM_BITMAP_AND_HEADER_SIZE (IMSM_BITMAP_AREA_SIZE + IMSM_BITMAP_HEADER_SIZE)
+
+#define IMSM_DEFAULT_BITMAP_CHUNKSIZE (64 * 1024 * 1024)
+#define IMSM_DEFAULT_BITMAP_DAEMON_SLEEP 5
+
 /*
  * This macro let's us ensure that no-one accidentally
  * changes the size of a struct
@@ -147,7 +174,8 @@ struct imsm_map {
        __u8  raid_level;
 #define IMSM_T_RAID0 0
 #define IMSM_T_RAID1 1
-#define IMSM_T_RAID5 5         /* since metadata version 1.2.02 ? */
+#define IMSM_T_RAID5 5
+#define IMSM_T_RAID10 10
        __u8  num_members;      /* number of member disks */
        __u8  num_domains;      /* number of parity domains */
        __u8  failed_disk_num;  /* valid only when state is degraded */
@@ -164,7 +192,7 @@ struct imsm_map {
 ASSERT_SIZE(imsm_map, 52)
 
 struct imsm_vol {
-       __u32 curr_migr_unit;
+       __u32 curr_migr_unit_lo;
        __u32 checkpoint_id;    /* id to access curr_migr_unit */
        __u8  migr_state;       /* Normal or Migrating */
 #define MIGR_INIT 0
@@ -181,7 +209,8 @@ struct imsm_vol {
        __u8  fs_state;         /* fast-sync state for CnG (0xff == disabled) */
        __u16 verify_errors;    /* number of mismatches */
        __u16 bad_blocks;       /* number of bad blocks during verify */
-       __u32 filler[4];
+       __u32 curr_migr_unit_hi;
+       __u32 filler[3];
        struct imsm_map map[1];
        /* here comes another one if migr_state */
 };
@@ -229,6 +258,7 @@ struct imsm_dev {
 #define RWH_MULTIPLE_DISTRIBUTED 3
 #define RWH_MULTIPLE_PPLS_JOURNALING_DRIVE 4
 #define RWH_MULTIPLE_OFF 5
+#define RWH_BITMAP 6
        __u8  rwh_policy; /* Raid Write Hole Policy */
        __u8  jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
        __u8  filler1;
@@ -260,8 +290,9 @@ struct imsm_super {
                                         * (starts at 1)
                                         */
        __u16 filler1;                  /* 0x4E - 0x4F */
-#define IMSM_FILLERS 34
-       __u32 filler[IMSM_FILLERS];     /* 0x50 - 0xD7 RAID_MPB_FILLERS */
+       __u64 creation_time;            /* 0x50 - 0x57 Array creation time */
+#define IMSM_FILLERS 32
+       __u32 filler[IMSM_FILLERS];     /* 0x58 - 0xD7 RAID_MPB_FILLERS */
        struct imsm_disk disk[1];       /* 0xD8 diskTbl[numDisks] */
        /* here comes imsm_dev[num_raid_devs] */
        /* here comes BBM logs */
@@ -342,8 +373,21 @@ struct migr_record {
                                       * destination - high order 32 bits */
        __u32 num_migr_units_hi;      /* Total num migration units-of-op
                                       * high order 32 bits */
+       __u32 filler[16];
 };
-ASSERT_SIZE(migr_record, 64)
+ASSERT_SIZE(migr_record, 128)
+
+/**
+ * enum imsm_status - internal IMSM return values representation.
+ * @STATUS_OK: function succeeded.
+ * @STATUS_ERROR: General error ocurred (not specified).
+ *
+ * Typedefed to imsm_status_t.
+ */
+typedef enum imsm_status {
+       IMSM_STATUS_ERROR = -1,
+       IMSM_STATUS_OK = 0,
+} imsm_status_t;
 
 struct md_list {
        /* usage marker:
@@ -359,8 +403,6 @@ struct md_list {
        struct md_list *next;
 };
 
-#define pr_vrb(fmt, arg...) (void) (verbose && pr_err(fmt, ##arg))
-
 static __u8 migr_type(struct imsm_dev *dev)
 {
        if (dev->vol.migr_type == MIGR_VERIFY &&
@@ -465,8 +507,15 @@ struct intel_disk {
        struct intel_disk *next;
 };
 
+/**
+ * struct extent - reserved space details.
+ * @start: start offset.
+ * @size: size of reservation, set to 0 for metadata reservation.
+ * @vol: index of the volume, meaningful if &size is set.
+ */
 struct extent {
        unsigned long long start, size;
+       int vol;
 };
 
 /* definitions of reshape process types */
@@ -474,6 +523,7 @@ enum imsm_reshape_type {
        CH_TAKEOVER,
        CH_MIGRATION,
        CH_ARRAY_SIZE,
+       CH_ABORT
 };
 
 /* definition of messages passed to imsm_process_update */
@@ -550,7 +600,7 @@ struct imsm_update_size_change {
 
 struct imsm_update_general_migration_checkpoint {
        enum imsm_update_type type;
-       __u32 curr_migr_unit;
+       __u64 curr_migr_unit;
 };
 
 struct disk_info {
@@ -593,9 +643,53 @@ static const char *_sys_dev_type[] = {
        [SYS_DEV_SAS] = "SAS",
        [SYS_DEV_SATA] = "SATA",
        [SYS_DEV_NVME] = "NVMe",
-       [SYS_DEV_VMD] = "VMD"
+       [SYS_DEV_VMD] = "VMD",
+       [SYS_DEV_SATA_VMD] = "SATA VMD"
 };
 
+static int no_platform = -1;
+
+static int check_no_platform(void)
+{
+       static const char search[] = "mdadm.imsm.test=1";
+       FILE *fp;
+
+       if (no_platform >= 0)
+               return no_platform;
+
+       if (check_env("IMSM_NO_PLATFORM")) {
+               no_platform = 1;
+               return 1;
+       }
+       fp = fopen("/proc/cmdline", "r");
+       if (fp) {
+               char *l = conf_line(fp);
+               char *w = l;
+
+               if (l == NULL) {
+                       fclose(fp);
+                       return 0;
+               }
+
+               do {
+                       if (strcmp(w, search) == 0)
+                               no_platform = 1;
+                       w = dl_next(w);
+               } while (w != l);
+               free_line(l);
+               fclose(fp);
+               if (no_platform >= 0)
+                       return no_platform;
+       }
+       no_platform = 0;
+       return 0;
+}
+
+void imsm_set_no_platform(int v)
+{
+       no_platform = v;
+}
+
 const char *get_sys_dev_type(enum sys_dev_type type)
 {
        if (type >= SYS_DEV_MAX)
@@ -670,22 +764,22 @@ static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
        if ((list = find_intel_devices()) == NULL)
                return 0;
 
-       if (fd < 0)
+       if (!is_fd_valid(fd))
                disk_path  = (char *) devname;
        else
-               disk_path = diskfd_to_devpath(fd);
+               disk_path = diskfd_to_devpath(fd, 1, NULL);
 
        if (!disk_path)
                return 0;
 
        for (elem = list; elem; elem = elem->next)
                if (path_attached_to_hba(disk_path, elem->path))
-                       return elem;
+                       break;
 
        if (disk_path != devname)
                free(disk_path);
 
-       return NULL;
+       return elem;
 }
 
 static int find_intel_hba_capability(int fd, struct intel_super *super,
@@ -830,6 +924,21 @@ static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
        return inf;
 }
 
+/**
+ * __get_imsm_dev() - Get device with index from imsm_super.
+ * @mpb: &imsm_super pointer, not NULL.
+ * @index: Device index.
+ *
+ * Function works as non-NULL, aborting in such a case,
+ * when NULL would be returned.
+ *
+ * Device index should be in range 0 up to num_raid_devs.
+ * Function assumes the index was already verified.
+ * Index must be valid, otherwise abort() is called.
+ *
+ * Return: Pointer to corresponding imsm_dev.
+ *
+ */
 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
 {
        int offset;
@@ -837,30 +946,47 @@ static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
        void *_mpb = mpb;
 
        if (index >= mpb->num_raid_devs)
-               return NULL;
+               goto error;
 
        /* devices start after all disks */
        offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
 
-       for (i = 0; i <= index; i++)
+       for (i = 0; i <= index; i++, offset += sizeof_imsm_dev(_mpb + offset, 0))
                if (i == index)
                        return _mpb + offset;
-               else
-                       offset += sizeof_imsm_dev(_mpb + offset, 0);
-
-       return NULL;
+error:
+       pr_err("cannot find imsm_dev with index %u in imsm_super\n", index);
+       abort();
 }
 
+/**
+ * get_imsm_dev() - Get device with index from intel_super.
+ * @super: &intel_super pointer, not NULL.
+ * @index: Device index.
+ *
+ * Function works as non-NULL, aborting in such a case,
+ * when NULL would be returned.
+ *
+ * Device index should be in range 0 up to num_raid_devs.
+ * Function assumes the index was already verified.
+ * Index must be valid, otherwise abort() is called.
+ *
+ * Return: Pointer to corresponding imsm_dev.
+ *
+ */
 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
 {
        struct intel_dev *dv;
 
        if (index >= super->anchor->num_raid_devs)
-               return NULL;
+               goto error;
+
        for (dv = super->devlist; dv; dv = dv->next)
                if (dv->index == index)
                        return dv->dev;
-       return NULL;
+error:
+       pr_err("cannot find imsm_dev with index %u in intel_super\n", index);
+       abort();
 }
 
 static inline unsigned long long __le48_to_cpu(const struct bbm_log_block_addr
@@ -1130,7 +1256,7 @@ static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
        map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
 }
 
-static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
+static int get_imsm_disk_slot(struct imsm_map *map, const unsigned int idx)
 {
        int slot;
        __u32 ord;
@@ -1141,21 +1267,66 @@ static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
                        return slot;
        }
 
-       return -1;
+       return IMSM_STATUS_ERROR;
+}
+/**
+ * update_imsm_raid_level() - update raid level appropriately in &imsm_map.
+ * @map:       &imsm_map pointer.
+ * @new_level: MD style level.
+ *
+ * For backward compatibility reasons we need to differentiate RAID10.
+ * In the past IMSM RAID10 was presented as RAID1.
+ * Keep compatibility unless it is not explicitly updated by UEFI driver.
+ *
+ * Routine needs num_members to be set and (optionally) raid_level.
+ */
+static void update_imsm_raid_level(struct imsm_map *map, int new_level)
+{
+       if (new_level != IMSM_T_RAID10) {
+               map->raid_level = new_level;
+               return;
+       }
+
+       if (map->num_members == 4) {
+               if (map->raid_level == IMSM_T_RAID10 || map->raid_level == IMSM_T_RAID1)
+                       return;
+
+               map->raid_level = IMSM_T_RAID1;
+               return;
+       }
+
+       map->raid_level = IMSM_T_RAID10;
 }
 
 static int get_imsm_raid_level(struct imsm_map *map)
 {
-       if (map->raid_level == 1) {
+       if (map->raid_level == IMSM_T_RAID1) {
                if (map->num_members == 2)
-                       return 1;
+                       return IMSM_T_RAID1;
                else
-                       return 10;
+                       return IMSM_T_RAID10;
        }
 
        return map->raid_level;
 }
 
+/**
+ * get_disk_slot_in_dev() - retrieve disk slot from &imsm_dev.
+ * @super: &intel_super pointer, not NULL.
+ * @dev_idx: imsm device index.
+ * @idx: disk index.
+ *
+ * Return: Slot on success, IMSM_STATUS_ERROR otherwise.
+ */
+static int get_disk_slot_in_dev(struct intel_super *super, const __u8 dev_idx,
+                               const unsigned int idx)
+{
+       struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
+       struct imsm_map *map = get_imsm_map(dev, MAP_0);
+
+       return get_imsm_disk_slot(map, idx);
+}
+
 static int cmp_extent(const void *av, const void *bv)
 {
        const struct extent *a = av;
@@ -1172,13 +1343,9 @@ static int count_memberships(struct dl *dl, struct intel_super *super)
        int memberships = 0;
        int i;
 
-       for (i = 0; i < super->anchor->num_raid_devs; i++) {
-               struct imsm_dev *dev = get_imsm_dev(super, i);
-               struct imsm_map *map = get_imsm_map(dev, MAP_0);
-
-               if (get_imsm_disk_slot(map, dl->index) >= 0)
+       for (i = 0; i < super->anchor->num_raid_devs; i++)
+               if (get_disk_slot_in_dev(super, i, dl->index) >= 0)
                        memberships++;
-       }
 
        return memberships;
 }
@@ -1207,6 +1374,33 @@ static unsigned long long total_blocks(struct imsm_disk *disk)
        return join_u32(disk->total_blocks_lo, disk->total_blocks_hi);
 }
 
+/**
+ * imsm_num_data_members() - get data drives count for an array.
+ * @map: Map to analyze.
+ *
+ * num_data_members value represents minimal count of drives for level.
+ * The name of the property could be misleading for RAID5 with asymmetric layout
+ * because some data required to be calculated from parity.
+ * The property is extracted from level and num_members value.
+ *
+ * Return: num_data_members value on success, zero otherwise.
+ */
+static __u8 imsm_num_data_members(struct imsm_map *map)
+{
+       switch (get_imsm_raid_level(map)) {
+       case 0:
+               return map->num_members;
+       case 1:
+       case 10:
+               return map->num_members / 2;
+       case 5:
+               return map->num_members - 1;
+       default:
+               dprintf("unsupported raid level\n");
+               return 0;
+       }
+}
+
 static unsigned long long pba_of_lba0(struct imsm_map *map)
 {
        if (map == NULL)
@@ -1228,6 +1422,14 @@ static unsigned long long num_data_stripes(struct imsm_map *map)
        return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi);
 }
 
+static unsigned long long vol_curr_migr_unit(struct imsm_dev *dev)
+{
+       if (dev == NULL)
+               return 0;
+
+       return join_u32(dev->vol.curr_migr_unit_lo, dev->vol.curr_migr_unit_hi);
+}
+
 static unsigned long long imsm_dev_size(struct imsm_dev *dev)
 {
        if (dev == NULL)
@@ -1272,6 +1474,24 @@ static void set_total_blocks(struct imsm_disk *disk, unsigned long long n)
        split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi);
 }
 
+/**
+ * set_num_domains() - Set number of domains for an array.
+ * @map: Map to be updated.
+ *
+ * num_domains property represents copies count of each data drive, thus make
+ * it meaningful only for RAID1 and RAID10. IMSM supports two domains for
+ * raid1 and raid10.
+ */
+static void set_num_domains(struct imsm_map *map)
+{
+       int level = get_imsm_raid_level(map);
+
+       if (level == 1 || level == 10)
+               map->num_domains = 2;
+       else
+               map->num_domains = 1;
+}
+
 static void set_pba_of_lba0(struct imsm_map *map, unsigned long long n)
 {
        split_ull(n, &map->pba_of_lba0_lo, &map->pba_of_lba0_hi);
@@ -1287,6 +1507,32 @@ static void set_num_data_stripes(struct imsm_map *map, unsigned long long n)
        split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi);
 }
 
+/**
+ * update_num_data_stripes() - Calculate and update num_data_stripes value.
+ * @map: map to be updated.
+ * @dev_size: size of volume.
+ *
+ * num_data_stripes value is addictionally divided by num_domains, therefore for
+ * levels where num_domains is not 1, nds is a part of real value.
+ */
+static void update_num_data_stripes(struct imsm_map *map,
+                                    unsigned long long dev_size)
+{
+       unsigned long long nds = dev_size / imsm_num_data_members(map);
+
+       nds /= map->num_domains;
+       nds /= map->blocks_per_strip;
+       set_num_data_stripes(map, nds);
+}
+
+static void set_vol_curr_migr_unit(struct imsm_dev *dev, unsigned long long n)
+{
+       if (dev == NULL)
+               return;
+
+       split_ull(n, &dev->vol.curr_migr_unit_lo, &dev->vol.curr_migr_unit_hi);
+}
+
 static void set_imsm_dev_size(struct imsm_dev *dev, unsigned long long n)
 {
        split_ull(n, &dev->size_low, &dev->size_high);
@@ -1337,9 +1583,10 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl,
                                  int get_minimal_reservation)
 {
        /* find a list of used extents on the given physical device */
-       struct extent *rv, *e;
-       int i;
        int memberships = count_memberships(dl, super);
+       struct extent *rv = xcalloc(memberships + 1, sizeof(struct extent));
+       struct extent *e = rv;
+       int i;
        __u32 reservation;
 
        /* trim the reserved area for spares, so they can join any array
@@ -1351,9 +1598,6 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl,
        else
                reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
 
-       rv = xcalloc(sizeof(struct extent), (memberships + 1));
-       e = rv;
-
        for (i = 0; i < super->anchor->num_raid_devs; i++) {
                struct imsm_dev *dev = get_imsm_dev(super, i);
                struct imsm_map *map = get_imsm_map(dev, MAP_0);
@@ -1361,6 +1605,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl,
                if (get_imsm_disk_slot(map, dl->index) >= 0) {
                        e->start = pba_of_lba0(map);
                        e->size = per_dev_array_size(map);
+                       e->vol = i;
                        e++;
                }
        }
@@ -1442,17 +1687,29 @@ static int is_journal(struct imsm_disk *disk)
        return (disk->status & JOURNAL_DISK) == JOURNAL_DISK;
 }
 
-/* round array size down to closest MB and ensure it splits evenly
- * between members
+/**
+ * round_member_size_to_mb()- Round given size to closest MiB.
+ * @size: size to round in sectors.
  */
-static unsigned long long round_size_to_mb(unsigned long long size, unsigned int
-                                          disk_count)
+static inline unsigned long long round_member_size_to_mb(unsigned long long size)
 {
-       size /= disk_count;
-       size = (size >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
-       size *= disk_count;
+       return (size >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+}
 
-       return size;
+/**
+ * round_size_to_mb()- Round given size.
+ * @array_size: size to round in sectors.
+ * @disk_count: count of data members.
+ *
+ * Get size per each data member and round it to closest MiB to ensure that data
+ * splits evenly between members.
+ *
+ * Return: Array size, rounded down.
+ */
+static inline unsigned long long round_size_to_mb(unsigned long long array_size,
+                                                 unsigned int disk_count)
+{
+       return round_member_size_to_mb(array_size / disk_count) * disk_count;
 }
 
 static int able_to_resync(int raid_level, int missing_disks)
@@ -1516,49 +1773,7 @@ static __u32 imsm_min_reserved_sectors(struct intel_super *super)
        return  (remainder < rv) ? remainder : rv;
 }
 
-/*
- * Return minimum size of a spare and sector size
- * that can be used in this array
- */
-int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c)
-{
-       struct intel_super *super = st->sb;
-       struct dl *dl;
-       struct extent *e;
-       int i;
-       unsigned long long size = 0;
-
-       c->min_size = 0;
-       c->sector_size = 0;
-
-       if (!super)
-               return -EINVAL;
-       /* find first active disk in array */
-       dl = super->disks;
-       while (dl && (is_failed(&dl->disk) || dl->index == -1))
-               dl = dl->next;
-       if (!dl)
-               return -EINVAL;
-       /* find last lba used by subarrays */
-       e = get_extents(super, dl, 0);
-       if (!e)
-               return -EINVAL;
-       for (i = 0; e[i].size; i++)
-               continue;
-       if (i > 0)
-               size = e[i-1].start + e[i-1].size;
-       free(e);
-
-       /* add the amount of space needed for metadata */
-       size += imsm_min_reserved_sectors(super);
-
-       c->min_size = size * 512;
-       c->sector_size = super->sector_size;
-
-       return 0;
-}
-
-static int is_gen_migration(struct imsm_dev *dev);
+static bool is_gen_migration(struct imsm_dev *dev);
 
 #define IMSM_4K_DIV 8
 
@@ -1578,6 +1793,7 @@ static void print_imsm_dev(struct intel_super *super,
 
        printf("\n");
        printf("[%.16s]:\n", dev->volume);
+       printf("       Subarray : %d\n", super->current_vol);
        printf("           UUID : %s\n", uuid);
        printf("     RAID Level : %d", get_imsm_raid_level(map));
        if (map2)
@@ -1604,7 +1820,7 @@ static void print_imsm_dev(struct intel_super *super,
        printf("\n");
        printf("    Failed disk : ");
        if (map->failed_disk_num == 0xff)
-               printf("none");
+               printf(STR_COMMON_NONE);
        else
                printf("%i", map->failed_disk_num);
        printf("\n");
@@ -1625,7 +1841,7 @@ static void print_imsm_dev(struct intel_super *super,
                   (unsigned long long)sz * 512 / super->sector_size,
               human_size(sz * 512));
        printf("  Sector Offset : %llu\n",
-               pba_of_lba0(map));
+               pba_of_lba0(map) * 512 / super->sector_size);
        printf("    Num Stripes : %llu\n",
                num_data_stripes(map));
        printf("     Chunk Size : %u KiB",
@@ -1658,8 +1874,7 @@ static void print_imsm_dev(struct intel_super *super,
                struct imsm_map *map = get_imsm_map(dev, MAP_1);
 
                printf(" <-- %s", map_state_str[map->map_state]);
-               printf("\n     Checkpoint : %u ",
-                          __le32_to_cpu(dev->vol.curr_migr_unit));
+               printf("\n     Checkpoint : %llu ", vol_curr_migr_unit(dev));
                if (is_gen_migration(dev) && (slot > 1 || slot < 0))
                        printf("(N/A)");
                else
@@ -1680,8 +1895,12 @@ static void print_imsm_dev(struct intel_super *super,
                printf("Multiple distributed PPLs\n");
        else if (dev->rwh_policy == RWH_MULTIPLE_PPLS_JOURNALING_DRIVE)
                printf("Multiple PPLs on journaling drive\n");
+       else if (dev->rwh_policy == RWH_BITMAP)
+               printf("Write-intent bitmap\n");
        else
                printf("<unknown:%d>\n", dev->rwh_policy);
+
+       printf("      Volume ID : %u\n", dev->my_vol_raid_dev_num);
 }
 
 static void print_imsm_disk(struct imsm_disk *disk,
@@ -1748,7 +1967,8 @@ void convert_to_4k(struct intel_super *super)
                struct imsm_map *map = get_imsm_map(dev, MAP_0);
                /* dev */
                set_imsm_dev_size(dev, imsm_dev_size(dev)/IMSM_4K_DIV);
-               dev->vol.curr_migr_unit /= IMSM_4K_DIV;
+               set_vol_curr_migr_unit(dev,
+                                      vol_curr_migr_unit(dev) / IMSM_4K_DIV);
 
                /* map0 */
                set_blocks_per_member(map, blocks_per_member(map)/IMSM_4K_DIV);
@@ -1797,13 +2017,14 @@ void examine_migr_rec_imsm(struct intel_super *super)
                struct imsm_map *map;
                int slot = -1;
 
-               if (is_gen_migration(dev) == 0)
+               if (is_gen_migration(dev) == false)
                                continue;
 
                printf("\nMigration Record Information:");
 
                /* first map under migration */
                map = get_imsm_map(dev, MAP_0);
+
                if (map)
                        slot = get_imsm_disk_slot(map, super->disks->index);
                if (map == NULL || slot > 1 || slot < 0) {
@@ -1876,7 +2097,8 @@ void convert_from_4k(struct intel_super *super)
                struct imsm_map *map = get_imsm_map(dev, MAP_0);
                /* dev */
                set_imsm_dev_size(dev, imsm_dev_size(dev)*IMSM_4K_DIV);
-               dev->vol.curr_migr_unit *= IMSM_4K_DIV;
+               set_vol_curr_migr_unit(dev,
+                                      vol_curr_migr_unit(dev) * IMSM_4K_DIV);
 
                /* map0 */
                set_blocks_per_member(map, blocks_per_member(map)*IMSM_4K_DIV);
@@ -1914,91 +2136,18 @@ void convert_from_4k(struct intel_super *super)
        mpb->check_sum = __gen_imsm_checksum(mpb);
 }
 
-/*******************************************************************************
- * function: imsm_check_attributes
- * Description: Function checks if features represented by attributes flags
- *             are supported by mdadm.
- * Parameters:
- *             attributes - Attributes read from metadata
- * Returns:
- *             0 - passed attributes contains unsupported features flags
- *             1 - all features are supported
- ******************************************************************************/
-static int imsm_check_attributes(__u32 attributes)
+/**
+ * imsm_check_attributes() - Check if features represented by attributes flags are supported.
+ *
+ * @attributes: attributes read from metadata.
+ * Returns: true if all features are supported, false otherwise.
+ */
+static bool imsm_check_attributes(__u32 attributes)
 {
-       int ret_val = 1;
-       __u32 not_supported = MPB_ATTRIB_SUPPORTED^0xffffffff;
-
-       not_supported &= ~MPB_ATTRIB_IGNORED;
-
-       not_supported &= attributes;
-       if (not_supported) {
-               pr_err("(IMSM): Unsupported attributes : %x\n",
-                       (unsigned)__le32_to_cpu(not_supported));
-               if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
-                       dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY \n");
-                       not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
-               }
-               if (not_supported & MPB_ATTRIB_2TB) {
-                       dprintf("\t\tMPB_ATTRIB_2TB\n");
-                       not_supported ^= MPB_ATTRIB_2TB;
-               }
-               if (not_supported & MPB_ATTRIB_RAID0) {
-                       dprintf("\t\tMPB_ATTRIB_RAID0\n");
-                       not_supported ^= MPB_ATTRIB_RAID0;
-               }
-               if (not_supported & MPB_ATTRIB_RAID1) {
-                       dprintf("\t\tMPB_ATTRIB_RAID1\n");
-                       not_supported ^= MPB_ATTRIB_RAID1;
-               }
-               if (not_supported & MPB_ATTRIB_RAID10) {
-                       dprintf("\t\tMPB_ATTRIB_RAID10\n");
-                       not_supported ^= MPB_ATTRIB_RAID10;
-               }
-               if (not_supported & MPB_ATTRIB_RAID1E) {
-                       dprintf("\t\tMPB_ATTRIB_RAID1E\n");
-                       not_supported ^= MPB_ATTRIB_RAID1E;
-               }
-               if (not_supported & MPB_ATTRIB_RAID5) {
-               dprintf("\t\tMPB_ATTRIB_RAID5\n");
-                       not_supported ^= MPB_ATTRIB_RAID5;
-               }
-               if (not_supported & MPB_ATTRIB_RAIDCNG) {
-                       dprintf("\t\tMPB_ATTRIB_RAIDCNG\n");
-                       not_supported ^= MPB_ATTRIB_RAIDCNG;
-               }
-               if (not_supported & MPB_ATTRIB_BBM) {
-                       dprintf("\t\tMPB_ATTRIB_BBM\n");
-               not_supported ^= MPB_ATTRIB_BBM;
-               }
-               if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
-                       dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY (== MPB_ATTRIB_LEGACY)\n");
-                       not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
-               }
-               if (not_supported & MPB_ATTRIB_EXP_STRIPE_SIZE) {
-                       dprintf("\t\tMPB_ATTRIB_EXP_STRIP_SIZE\n");
-                       not_supported ^= MPB_ATTRIB_EXP_STRIPE_SIZE;
-               }
-               if (not_supported & MPB_ATTRIB_2TB_DISK) {
-                       dprintf("\t\tMPB_ATTRIB_2TB_DISK\n");
-                       not_supported ^= MPB_ATTRIB_2TB_DISK;
-               }
-               if (not_supported & MPB_ATTRIB_NEVER_USE2) {
-                       dprintf("\t\tMPB_ATTRIB_NEVER_USE2\n");
-                       not_supported ^= MPB_ATTRIB_NEVER_USE2;
-               }
-               if (not_supported & MPB_ATTRIB_NEVER_USE) {
-                       dprintf("\t\tMPB_ATTRIB_NEVER_USE\n");
-                       not_supported ^= MPB_ATTRIB_NEVER_USE;
-               }
+       if ((attributes & (MPB_ATTRIB_SUPPORTED | MPB_ATTRIB_IGNORED)) == attributes)
+               return true;
 
-               if (not_supported)
-                       dprintf("(IMSM): Unknown attributes : %x\n", not_supported);
-
-               ret_val = 0;
-       }
-
-       return ret_val;
+       return false;
 }
 
 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
@@ -2014,6 +2163,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
        __u32 sum;
        __u32 reserved = imsm_reserved_sectors(super, super->disks);
        struct dl *dl;
+       time_t creation_time;
 
        strncpy(str, (char *)mpb->sig, MPB_SIG_LEN);
        str[MPB_SIG_LEN-1] = '\0';
@@ -2022,13 +2172,15 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
        printf("    Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
        printf("         Family : %08x\n", __le32_to_cpu(mpb->family_num));
        printf("     Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
-       printf("     Attributes : ");
-       if (imsm_check_attributes(mpb->attributes))
-               printf("All supported\n");
-       else
-               printf("not supported\n");
+       creation_time = __le64_to_cpu(mpb->creation_time);
+       printf("  Creation Time : %.24s\n",
+               creation_time ? ctime(&creation_time) : "Unknown");
+
+       printf("     Attributes : %08x (%s)\n", mpb->attributes,
+              imsm_check_attributes(mpb->attributes) ? "supported" : "not supported");
+
        getinfo_super_imsm(st, &info, NULL);
-       fname_from_uuid(st, &info, nbuf, ':');
+       fname_from_uuid(&info, nbuf);
        printf("           UUID : %s\n", nbuf + 5);
        sum = __le32_to_cpu(mpb->check_sum);
        printf("       Checksum : %08x %s\n", sum,
@@ -2053,7 +2205,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
 
                super->current_vol = i;
                getinfo_super_imsm(st, &info, NULL);
-               fname_from_uuid(st, &info, nbuf, ':');
+               fname_from_uuid(&info, nbuf);
                print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
        }
        for (i = 0; i < mpb->num_disks; i++) {
@@ -2076,15 +2228,9 @@ static void brief_examine_super_imsm(struct supertype *st, int verbose)
        /* We just write a generic IMSM ARRAY entry */
        struct mdinfo info;
        char nbuf[64];
-       struct intel_super *super = st->sb;
-
-       if (!super->anchor->num_raid_devs) {
-               printf("ARRAY metadata=imsm\n");
-               return;
-       }
 
        getinfo_super_imsm(st, &info, NULL);
-       fname_from_uuid(st, &info, nbuf, ':');
+       fname_from_uuid(&info, nbuf);
        printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
 }
 
@@ -2101,14 +2247,14 @@ static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
                return;
 
        getinfo_super_imsm(st, &info, NULL);
-       fname_from_uuid(st, &info, nbuf, ':');
+       fname_from_uuid(&info, nbuf);
        for (i = 0; i < super->anchor->num_raid_devs; i++) {
                struct imsm_dev *dev = get_imsm_dev(super, i);
 
                super->current_vol = i;
                getinfo_super_imsm(st, &info, NULL);
-               fname_from_uuid(st, &info, nbuf1, ':');
-               printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
+               fname_from_uuid(&info, nbuf1);
+               printf("ARRAY " DEV_MD_DIR "%.16s container=%s member=%d UUID=%s\n",
                       dev->volume, nbuf + 5, i, nbuf1 + 5);
        }
 }
@@ -2121,66 +2267,12 @@ static void export_examine_super_imsm(struct supertype *st)
        char nbuf[64];
 
        getinfo_super_imsm(st, &info, NULL);
-       fname_from_uuid(st, &info, nbuf, ':');
+       fname_from_uuid(&info, nbuf);
        printf("MD_METADATA=imsm\n");
        printf("MD_LEVEL=container\n");
        printf("MD_UUID=%s\n", nbuf+5);
        printf("MD_DEVICES=%u\n", mpb->num_disks);
-}
-
-static int copy_metadata_imsm(struct supertype *st, int from, int to)
-{
-       /* The second last sector of the device contains
-        * the "struct imsm_super" metadata.
-        * This contains mpb_size which is the size in bytes of the
-        * extended metadata.  This is located immediately before
-        * the imsm_super.
-        * We want to read all that, plus the last sector which
-        * may contain a migration record, and write it all
-        * to the target.
-        */
-       void *buf;
-       unsigned long long dsize, offset;
-       int sectors;
-       struct imsm_super *sb;
-       struct intel_super *super = st->sb;
-       unsigned int sector_size = super->sector_size;
-       unsigned int written = 0;
-
-       if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE) != 0)
-               return 1;
-
-       if (!get_dev_size(from, NULL, &dsize))
-               goto err;
-
-       if (lseek64(from, dsize-(2*sector_size), 0) < 0)
-               goto err;
-       if ((unsigned int)read(from, buf, sector_size) != sector_size)
-               goto err;
-       sb = buf;
-       if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0)
-               goto err;
-
-       sectors = mpb_sectors(sb, sector_size) + 2;
-       offset = dsize - sectors * sector_size;
-       if (lseek64(from, offset, 0) < 0 ||
-           lseek64(to, offset, 0) < 0)
-               goto err;
-       while (written < sectors * sector_size) {
-               int n = sectors*sector_size - written;
-               if (n > 4096)
-                       n = 4096;
-               if (read(from, buf, n) != n)
-                       goto err;
-               if (write(to, buf, n) != n)
-                       goto err;
-               written += n;
-       }
-       free(buf);
-       return 0;
-err:
-       free(buf);
-       return 1;
+       printf("MD_CREATION_TIME=%llu\n", __le64_to_cpu(mpb->creation_time));
 }
 
 static void detail_super_imsm(struct supertype *st, char *homehost,
@@ -2195,7 +2287,7 @@ static void detail_super_imsm(struct supertype *st, char *homehost,
                super->current_vol = strtoul(subarray, NULL, 10);
 
        getinfo_super_imsm(st, &info, NULL);
-       fname_from_uuid(st, &info, nbuf, ':');
+       fname_from_uuid(&info, nbuf);
        printf("\n              UUID : %s\n", nbuf + 5);
 
        super->current_vol = temp_vol;
@@ -2212,7 +2304,7 @@ static void brief_detail_super_imsm(struct supertype *st, char *subarray)
                super->current_vol = strtoul(subarray, NULL, 10);
 
        getinfo_super_imsm(st, &info, NULL);
-       fname_from_uuid(st, &info, nbuf, ':');
+       fname_from_uuid(&info, nbuf);
        printf(" UUID=%s", nbuf + 5);
 
        super->current_vol = temp_vol;
@@ -2222,12 +2314,41 @@ static int imsm_read_serial(int fd, char *devname, __u8 *serial,
                            size_t serial_buf_len);
 static void fd2devname(int fd, char *name);
 
-static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
+void print_encryption_information(int disk_fd, enum sys_dev_type hba_type)
+{
+       struct encryption_information information = {0};
+       mdadm_status_t status = MDADM_STATUS_SUCCESS;
+       const char *indent = "                  ";
+
+       switch (hba_type) {
+       case SYS_DEV_VMD:
+       case SYS_DEV_NVME:
+               status = get_nvme_opal_encryption_information(disk_fd, &information, 1);
+               break;
+       case SYS_DEV_SATA:
+       case SYS_DEV_SATA_VMD:
+               status = get_ata_encryption_information(disk_fd, &information, 1);
+               break;
+       default:
+               return;
+       }
+
+       if (status) {
+               pr_err("Failed to get drive encryption information.\n");
+               return;
+       }
+
+       printf("%sEncryption(Ability|Status): %s|%s\n", indent,
+              get_encryption_ability_string(information.ability),
+              get_encryption_status_string(information.status));
+}
+
+static int ahci_enumerate_ports(struct sys_dev *hba, int port_count, int host_base, int verbose)
 {
        /* dump an unsorted list of devices attached to AHCI Intel storage
         * controller, as well as non-connected ports
         */
-       int hba_len = strlen(hba_path) + 1;
+       int hba_len = strlen(hba->path) + 1;
        struct dirent *ent;
        DIR *dir;
        char *path = NULL;
@@ -2253,36 +2374,31 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
                char vendor[64];
                char buf[1024];
                int major, minor;
-               char *device;
+               char device[PATH_MAX];
                char *c;
                int port;
                int type;
 
                if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
                        continue;
-               path = devt_to_devpath(makedev(major, minor));
+               path = devt_to_devpath(makedev(major, minor), 1, NULL);
                if (!path)
                        continue;
-               if (!path_attached_to_hba(path, hba_path)) {
+               if (!path_attached_to_hba(path, hba->path)) {
                        free(path);
                        path = NULL;
                        continue;
                }
 
-               /* retrieve the scsi device type */
-               if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
+               /* retrieve the scsi device */
+               if (!devt_to_devpath(makedev(major, minor), 1, device)) {
                        if (verbose > 0)
-                               pr_err("failed to allocate 'device'\n");
+                               pr_err("failed to get device\n");
                        err = 2;
                        break;
                }
-               sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
-               if (load_sys(device, buf, sizeof(buf)) != 0) {
-                       if (verbose > 0)
-                               pr_err("failed to read device type for %s\n",
-                                       path);
+               if (devpath_to_char(device, "type", buf, sizeof(buf), 0)) {
                        err = 2;
-                       free(device);
                        break;
                }
                type = strtoul(buf, NULL, 10);
@@ -2291,8 +2407,9 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
                if (!(type == 0 || type == 7 || type == 14)) {
                        vendor[0] = '\0';
                        model[0] = '\0';
-                       sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
-                       if (load_sys(device, buf, sizeof(buf)) == 0) {
+
+                       if (devpath_to_char(device, "vendor", buf,
+                                           sizeof(buf), 0) == 0) {
                                strncpy(vendor, buf, sizeof(vendor));
                                vendor[sizeof(vendor) - 1] = '\0';
                                c = (char *) &vendor[sizeof(vendor) - 1];
@@ -2300,8 +2417,9 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
                                        *c-- = '\0';
 
                        }
-                       sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
-                       if (load_sys(device, buf, sizeof(buf)) == 0) {
+
+                       if (devpath_to_char(device, "model", buf,
+                                           sizeof(buf), 0) == 0) {
                                strncpy(model, buf, sizeof(model));
                                model[sizeof(model) - 1] = '\0';
                                c = (char *) &model[sizeof(model) - 1];
@@ -2326,7 +2444,6 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
                                }
                } else
                        buf[0] = '\0';
-               free(device);
 
                /* chop device path to 'host%d' and calculate the port number */
                c = strchr(&path[hba_len], '/');
@@ -2360,7 +2477,7 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
                }
 
                fd = dev_open(ent->d_name, O_RDONLY);
-               if (fd < 0)
+               if (!is_fd_valid(fd))
                        printf("          Port%d : - disk info unavailable -\n", port);
                else {
                        fd2devname(fd, buf);
@@ -2370,6 +2487,8 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
                                printf(" (%s)\n", buf);
                        else
                                printf(" ()\n");
+
+                       print_encryption_information(fd, hba->type);
                        close(fd);
                }
                free(path);
@@ -2392,43 +2511,52 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
 
 static int print_nvme_info(struct sys_dev *hba)
 {
-       char buf[1024];
        struct dirent *ent;
        DIR *dir;
-       char *rp;
-       int fd;
 
        dir = opendir("/sys/block/");
        if (!dir)
                return 1;
 
        for (ent = readdir(dir); ent; ent = readdir(dir)) {
-               if (strstr(ent->d_name, "nvme")) {
-                       sprintf(buf, "/sys/block/%s", ent->d_name);
-                       rp = realpath(buf, NULL);
-                       if (!rp)
-                               continue;
-                       if (path_attached_to_hba(rp, hba->path)) {
-                               fd = open_dev(ent->d_name);
-                               if (fd < 0) {
-                                       free(rp);
-                                       continue;
-                               }
+               char ns_path[PATH_MAX];
+               char cntrl_path[PATH_MAX];
+               char buf[PATH_MAX];
+               int fd = -1;
 
-                               fd2devname(fd, buf);
-                               if (hba->type == SYS_DEV_VMD)
-                                       printf(" NVMe under VMD : %s", buf);
-                               else if (hba->type == SYS_DEV_NVME)
-                                       printf("    NVMe Device : %s", buf);
-                               if (!imsm_read_serial(fd, NULL, (__u8 *)buf,
-                                                     sizeof(buf)))
-                                       printf(" (%s)\n", buf);
-                               else
-                                       printf("()\n");
-                               close(fd);
-                       }
-                       free(rp);
-               }
+               if (!strstr(ent->d_name, "nvme"))
+                       goto skip;
+
+               fd = open_dev(ent->d_name);
+               if (!is_fd_valid(fd))
+                       goto skip;
+
+               if (!diskfd_to_devpath(fd, 0, ns_path) ||
+                   !diskfd_to_devpath(fd, 1, cntrl_path))
+                       goto skip;
+
+               if (!path_attached_to_hba(cntrl_path, hba->path))
+                       goto skip;
+
+               if (!imsm_is_nvme_namespace_supported(fd, 0))
+                       goto skip;
+
+               fd2devname(fd, buf);
+               if (hba->type == SYS_DEV_VMD)
+                       printf(" NVMe under VMD : %s", buf);
+               else if (hba->type == SYS_DEV_NVME)
+                       printf("    NVMe Device : %s", buf);
+
+               if (!imsm_read_serial(fd, NULL, (__u8 *)buf,
+                                     sizeof(buf)))
+                       printf(" (%s)\n", buf);
+               else
+                       printf("()\n");
+
+               print_encryption_information(fd, hba->type);
+
+skip:
+               close_fd(&fd);
        }
 
        closedir(dir);
@@ -2448,6 +2576,8 @@ static void print_found_intel_controllers(struct sys_dev *elem)
 
                if (elem->type == SYS_DEV_VMD)
                        fprintf(stderr, "VMD domain");
+               else if (elem->type == SYS_DEV_SATA_VMD)
+                       fprintf(stderr, "SATA VMD domain");
                else
                        fprintf(stderr, "RAID controller");
 
@@ -2486,9 +2616,18 @@ static int ahci_get_port_count(const char *hba_path, int *port_count)
        return host_base;
 }
 
-static void print_imsm_capability(const struct imsm_orom *orom)
+static void print_imsm_level_capability(const struct imsm_orom *orom)
 {
-       printf("       Platform : Intel(R) ");
+       int idx;
+
+       for (idx = 0; imsm_level_ops[idx].name; idx++)
+               if (imsm_level_ops[idx].is_level_supported(orom))
+                       printf("%s ", imsm_level_ops[idx].name);
+}
+
+static void print_imsm_capability(const struct imsm_orom *orom)
+{
+       printf("       Platform : Intel(R) ");
        if (orom->capabilities == 0 && orom->driver_features == 0)
                printf("Matrix Storage Manager\n");
        else if (imsm_orom_is_enterprise(orom) && orom->major_ver >= 6)
@@ -2496,15 +2635,19 @@ static void print_imsm_capability(const struct imsm_orom *orom)
        else
                printf("Rapid Storage Technology%s\n",
                        imsm_orom_is_enterprise(orom) ? " enterprise" : "");
-       if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build)
-               printf("        Version : %d.%d.%d.%d\n", orom->major_ver,
-                               orom->minor_ver, orom->hotfix_ver, orom->build);
-       printf("    RAID Levels :%s%s%s%s%s\n",
-              imsm_orom_has_raid0(orom) ? " raid0" : "",
-              imsm_orom_has_raid1(orom) ? " raid1" : "",
-              imsm_orom_has_raid1e(orom) ? " raid1e" : "",
-              imsm_orom_has_raid10(orom) ? " raid10" : "",
-              imsm_orom_has_raid5(orom) ? " raid5" : "");
+       if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build) {
+               if (imsm_orom_is_vmd_without_efi(orom))
+                       printf("        Version : %d.%d\n", orom->major_ver,
+                              orom->minor_ver);
+               else
+                       printf("        Version : %d.%d.%d.%d\n", orom->major_ver,
+                              orom->minor_ver, orom->hotfix_ver, orom->build);
+       }
+
+       printf("    RAID Levels : ");
+       print_imsm_level_capability(orom);
+       printf("\n");
+
        printf("    Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
               imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
               imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
@@ -2539,12 +2682,11 @@ static void print_imsm_capability_export(const struct imsm_orom *orom)
        if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build)
                printf("IMSM_VERSION=%d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
                                orom->hotfix_ver, orom->build);
-       printf("IMSM_SUPPORTED_RAID_LEVELS=%s%s%s%s%s\n",
-                       imsm_orom_has_raid0(orom) ? "raid0 " : "",
-                       imsm_orom_has_raid1(orom) ? "raid1 " : "",
-                       imsm_orom_has_raid1e(orom) ? "raid1e " : "",
-                       imsm_orom_has_raid5(orom) ? "raid10 " : "",
-                       imsm_orom_has_raid10(orom) ? "raid5 " : "");
+
+       printf("IMSM_SUPPORTED_RAID_LEVELS=");
+       print_imsm_level_capability(orom);
+       printf("\n");
+
        printf("IMSM_SUPPORTED_CHUNK_SIZES=%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
                        imsm_orom_has_chunk(orom, 2) ? "2k " : "",
                        imsm_orom_has_chunk(orom, 4) ? "4k " : "",
@@ -2588,7 +2730,7 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle
        int result=1;
 
        if (enumerate_only) {
-               if (check_env("IMSM_NO_PLATFORM"))
+               if (check_no_platform())
                        return 0;
                list = find_intel_devices();
                if (!list)
@@ -2618,8 +2760,9 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle
                if (!find_imsm_capability(hba)) {
                        char buf[PATH_MAX];
                        pr_err("imsm capabilities not found for controller: %s (type %s)\n",
-                                 hba->type == SYS_DEV_VMD ? vmd_domain_to_controller(hba, buf) : hba->path,
-                                 get_sys_dev_type(hba->type));
+                                 hba->type == SYS_DEV_VMD || hba->type == SYS_DEV_SATA_VMD ?
+                                 vmd_domain_to_controller(hba, buf) :
+                                 hba->path, get_sys_dev_type(hba->type));
                        continue;
                }
                result = 0;
@@ -2672,11 +2815,12 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle
 
                        printf(" I/O Controller : %s (%s)\n",
                                hba->path, get_sys_dev_type(hba->type));
-                       if (hba->type == SYS_DEV_SATA) {
+                       if (hba->type == SYS_DEV_SATA || hba->type == SYS_DEV_SATA_VMD) {
                                host_base = ahci_get_port_count(hba->path, &port_count);
-                               if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
+                               if (ahci_enumerate_ports(hba, port_count, host_base, verbose)) {
                                        if (verbose > 0)
-                                               pr_err("failed to enumerate ports on SATA controller at %s.\n", hba->pci_id);
+                                               pr_err("failed to enumerate ports on %s controller at %s.\n",
+                                                       get_sys_dev_type(hba->type), hba->pci_id);
                                        result |= 2;
                                }
                        }
@@ -2706,7 +2850,8 @@ static int export_detail_platform_imsm(int verbose, char *controller_path)
                if (!find_imsm_capability(hba) && verbose > 0) {
                        char buf[PATH_MAX];
                        pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_IMSM_CAPABLE_DEVICE_UNDER_%s\n",
-                       hba->type == SYS_DEV_VMD ? vmd_domain_to_controller(hba, buf) : hba->path);
+                               hba->type == SYS_DEV_VMD || hba->type == SYS_DEV_SATA_VMD ?
+                               vmd_domain_to_controller(hba, buf) : hba->path);
                }
                else
                        result = 0;
@@ -2715,7 +2860,7 @@ static int export_detail_platform_imsm(int verbose, char *controller_path)
        const struct orom_entry *entry;
 
        for (entry = orom_entries; entry; entry = entry->next) {
-               if (entry->type == SYS_DEV_VMD) {
+               if (entry->type == SYS_DEV_VMD || entry->type == SYS_DEV_SATA_VMD) {
                        for (hba = list; hba; hba = hba->next)
                                print_imsm_capability_export(&entry->orom);
                        continue;
@@ -2792,34 +2937,6 @@ static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
        memcpy(uuid, buf, 4*4);
 }
 
-#if 0
-static void
-get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
-{
-       __u8 *v = get_imsm_version(mpb);
-       __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
-       char major[] = { 0, 0, 0 };
-       char minor[] = { 0 ,0, 0 };
-       char patch[] = { 0, 0, 0 };
-       char *ver_parse[] = { major, minor, patch };
-       int i, j;
-
-       i = j = 0;
-       while (*v != '\0' && v < end) {
-               if (*v != '.' && j < 2)
-                       ver_parse[i][j++] = *v;
-               else {
-                       i++;
-                       j = 0;
-               }
-               v++;
-       }
-
-       *m = strtol(minor, NULL, 0);
-       *p = strtol(patch, NULL, 0);
-}
-#endif
-
 static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
 {
        /* migr_strip_size when repairing or initializing parity */
@@ -2881,26 +2998,6 @@ static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
                return num_stripes_per_unit_resync(dev);
 }
 
-static __u8 imsm_num_data_members(struct imsm_map *map)
-{
-       /* named 'imsm_' because raid0, raid1 and raid10
-        * counter-intuitively have the same number of data disks
-        */
-       switch (get_imsm_raid_level(map)) {
-       case 0:
-               return map->num_members;
-               break;
-       case 1:
-       case 10:
-               return map->num_members/2;
-       case 5:
-               return map->num_members - 1;
-       default:
-               dprintf("unsupported raid level\n");
-               return 0;
-       }
-}
-
 static unsigned long long calc_component_size(struct imsm_map *map,
                                              struct imsm_dev *dev)
 {
@@ -3099,15 +3196,13 @@ static struct imsm_dev *imsm_get_device_during_migration(
  *             sector of disk)
  * Parameters:
  *     super   : imsm internal array info
- *     info    : general array info
  * Returns:
  *      0 : success
  *     -1 : fail
  *     -2 : no migration in progress
  ******************************************************************************/
-static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
+static int load_imsm_migr_rec(struct intel_super *super)
 {
-       struct mdinfo *sd;
        struct dl *dl;
        char nm[30];
        int retval = -1;
@@ -3115,6 +3210,7 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
        struct imsm_dev *dev;
        struct imsm_map *map;
        int slot = -1;
+       int keep_fd = 1;
 
        /* find map under migration */
        dev = imsm_get_device_during_migration(super);
@@ -3123,44 +3219,41 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
        if (dev == NULL)
                return -2;
 
-       if (info) {
-               for (sd = info->devs ; sd ; sd = sd->next) {
-                       /* read only from one of the first two slots */
-                       if ((sd->disk.raid_disk < 0) ||
-                           (sd->disk.raid_disk > 1))
-                               continue;
+       map = get_imsm_map(dev, MAP_0);
+       if (!map)
+               return -1;
 
-                       sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
-                       fd = dev_open(nm, O_RDONLY);
-                       if (fd >= 0)
-                               break;
-               }
-       }
-       if (fd < 0) {
-               map = get_imsm_map(dev, MAP_0);
-               for (dl = super->disks; dl; dl = dl->next) {
-                       /* skip spare and failed disks
-                       */
-                       if (dl->index < 0)
-                               continue;
-                       /* read only from one of the first two slots */
-                       if (map)
-                               slot = get_imsm_disk_slot(map, dl->index);
-                       if (map == NULL || slot > 1 || slot < 0)
-                               continue;
+       for (dl = super->disks; dl; dl = dl->next) {
+               /* skip spare and failed disks
+                */
+               if (dl->index < 0)
+                       continue;
+               /* read only from one of the first two slots
+                */
+               slot = get_imsm_disk_slot(map, dl->index);
+               if (slot > 1 || slot < 0)
+                       continue;
+
+               if (!is_fd_valid(dl->fd)) {
                        sprintf(nm, "%d:%d", dl->major, dl->minor);
                        fd = dev_open(nm, O_RDONLY);
-                       if (fd >= 0)
+
+                       if (is_fd_valid(fd)) {
+                               keep_fd = 0;
                                break;
+                       }
+               } else {
+                       fd = dl->fd;
+                       break;
                }
        }
-       if (fd < 0)
-               goto out;
-       retval = read_imsm_migr_rec(fd, super);
 
-out:
-       if (fd >= 0)
+       if (!is_fd_valid(fd))
+               return retval;
+       retval = read_imsm_migr_rec(fd, super);
+       if (!keep_fd)
                close(fd);
+
        return retval;
 }
 
@@ -3198,7 +3291,7 @@ static int imsm_create_metadata_checkpoint_update(
        }
        (*u)->type = update_general_migration_checkpoint;
        (*u)->curr_migr_unit = current_migr_unit(super->migr_rec);
-       dprintf("prepared for %u\n", (*u)->curr_migr_unit);
+       dprintf("prepared for %llu\n", (unsigned long long)(*u)->curr_migr_unit);
 
        return update_memory_size;
 }
@@ -3221,8 +3314,6 @@ static int write_imsm_migr_rec(struct supertype *st)
        struct intel_super *super = st->sb;
        unsigned int sector_size = super->sector_size;
        unsigned long long dsize;
-       char nm[30];
-       int fd = -1;
        int retval = -1;
        struct dl *sd;
        int len;
@@ -3255,26 +3346,21 @@ static int write_imsm_migr_rec(struct supertype *st)
                if (map == NULL || slot > 1 || slot < 0)
                        continue;
 
-               sprintf(nm, "%d:%d", sd->major, sd->minor);
-               fd = dev_open(nm, O_RDWR);
-               if (fd < 0)
-                       continue;
-               get_dev_size(fd, NULL, &dsize);
-               if (lseek64(fd, dsize - (MIGR_REC_SECTOR_POSITION*sector_size),
+               get_dev_size(sd->fd, NULL, &dsize);
+               if (lseek64(sd->fd, dsize - (MIGR_REC_SECTOR_POSITION *
+                   sector_size),
                    SEEK_SET) < 0) {
                        pr_err("Cannot seek to anchor block: %s\n",
                               strerror(errno));
                        goto out;
                }
-               if ((unsigned int)write(fd, super->migr_rec_buf,
+               if ((unsigned int)write(sd->fd, super->migr_rec_buf,
                    MIGR_REC_BUF_SECTORS*sector_size) !=
                    MIGR_REC_BUF_SECTORS*sector_size) {
                        pr_err("Cannot write migr record block: %s\n",
                               strerror(errno));
                        goto out;
                }
-               close(fd);
-               fd = -1;
        }
        if (sector_size == 4096)
                convert_from_4k_imsm_migr_rec(super);
@@ -3300,8 +3386,6 @@ static int write_imsm_migr_rec(struct supertype *st)
 
        retval = 0;
  out:
-       if (fd >= 0)
-               close(fd);
        return retval;
 }
 
@@ -3354,6 +3438,53 @@ static unsigned long long imsm_component_size_alignment_check(int level,
        return component_size;
 }
 
+/*******************************************************************************
+ * Function:   get_bitmap_header_sector
+ * Description:        Returns the sector where the bitmap header is placed.
+ * Parameters:
+ *     st              : supertype information
+ *     dev_idx         : index of the device with bitmap
+ *
+ * Returns:
+ *      The sector where the bitmap header is placed
+ ******************************************************************************/
+static unsigned long long get_bitmap_header_sector(struct intel_super *super,
+                                                  int dev_idx)
+{
+       struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
+       struct imsm_map *map = get_imsm_map(dev, MAP_0);
+
+       if (!super->sector_size) {
+               dprintf("sector size is not set\n");
+               return 0;
+       }
+
+       return pba_of_lba0(map) + calc_component_size(map, dev) +
+              (IMSM_BITMAP_HEADER_OFFSET / super->sector_size);
+}
+
+/*******************************************************************************
+ * Function:   get_bitmap_sector
+ * Description:        Returns the sector where the bitmap is placed.
+ * Parameters:
+ *     st              : supertype information
+ *     dev_idx         : index of the device with bitmap
+ *
+ * Returns:
+ *      The sector where the bitmap is placed
+ ******************************************************************************/
+static unsigned long long get_bitmap_sector(struct intel_super *super,
+                                           int dev_idx)
+{
+       if (!super->sector_size) {
+               dprintf("sector size is not set\n");
+               return 0;
+       }
+
+       return get_bitmap_header_sector(super, dev_idx) +
+              (IMSM_BITMAP_HEADER_SIZE / super->sector_size);
+}
+
 static unsigned long long get_ppl_sector(struct intel_super *super, int dev_idx)
 {
        struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
@@ -3394,6 +3525,12 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
        info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
 
        if (is_gen_migration(dev)) {
+               /*
+                * device prev_map should be added if it is in the middle
+                * of migration
+                */
+               assert(prev_map);
+
                info->reshape_active = 1;
                info->new_level = get_imsm_raid_level(map);
                info->new_layout = imsm_level_to_layout(info->new_level);
@@ -3474,7 +3611,12 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
        } else if (info->array.level <= 0) {
                info->consistency_policy = CONSISTENCY_POLICY_NONE;
        } else {
-               info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+               if (dev->rwh_policy == RWH_BITMAP) {
+                       info->bitmap_offset = get_bitmap_sector(super, super->current_vol);
+                       info->consistency_policy = CONSISTENCY_POLICY_BITMAP;
+               } else {
+                       info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+               }
        }
 
        info->reshape_progress = 0;
@@ -3490,7 +3632,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
                case MIGR_INIT: {
                        __u64 blocks_per_unit = blocks_per_migr_unit(super,
                                                                     dev);
-                       __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
+                       __u64 units = vol_curr_migr_unit(dev);
 
                        info->resync_start = blocks_per_unit * units;
                        break;
@@ -3499,7 +3641,6 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
                        __u64 blocks_per_unit = blocks_per_migr_unit(super,
                                                                     dev);
                        __u64 units = current_migr_unit(migr_rec);
-                       unsigned long long array_blocks;
                        int used_disks;
 
                        if (__le32_to_cpu(migr_rec->ascending_migr) &&
@@ -3518,12 +3659,8 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
 
                        used_disks = imsm_num_data_members(prev_map);
                        if (used_disks > 0) {
-                               array_blocks = per_dev_array_size(map) *
+                               info->custom_array_size = per_dev_array_size(map) *
                                        used_disks;
-                               info->custom_array_size =
-                                       round_size_to_mb(array_blocks,
-                                                        used_disks);
-
                        }
                }
                case MIGR_VERIFY:
@@ -3692,7 +3829,7 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
                 */
                max_enough = max(max_enough, enough);
        }
-       dprintf("enough: %d\n", max_enough);
+
        info->container_enough = max_enough;
 
        if (super->disks) {
@@ -3762,8 +3899,8 @@ struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
 }
 
 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
-                            char *update, char *devname, int verbose,
-                            int uuid_set, char *homehost)
+                            enum update_opt update, char *devname,
+                            int verbose, int uuid_set, char *homehost)
 {
        /* For 'assemble' and 'force' we need to return non-zero if any
         * change was made.  For others, the return value is ignored.
@@ -3799,7 +3936,8 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info,
 
        mpb = super->anchor;
 
-       if (strcmp(update, "uuid") == 0) {
+       switch (update) {
+       case UOPT_UUID:
                /* We take this to mean that the family_num should be updated.
                 * However that is much smaller than the uuid so we cannot really
                 * allow an explicit uuid to be given.  And it is hard to reliably
@@ -3823,10 +3961,14 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info,
                }
                if (rv == 0)
                        mpb->orig_family_num = info->uuid[0];
-       } else if (strcmp(update, "assemble") == 0)
+               break;
+       case UOPT_SPEC_ASSEMBLE:
                rv = 0;
-       else
+               break;
+       default:
                rv = -1;
+               break;
+       }
 
        /* successful update? recompute checksum */
        if (rv == 0)
@@ -3878,14 +4020,12 @@ static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
        memcpy(dest, src, sizeof_imsm_dev(src, 0));
 }
 
-static int compare_super_imsm(struct supertype *st, struct supertype *tst)
+static int compare_super_imsm(struct supertype *st, struct supertype *tst,
+                             int verbose)
 {
-       /*
-        * return:
+       /*  return:
         *  0 same, or first was empty, and second was copied
-        *  1 second had wrong number
-        *  2 wrong uuid
-        *  3 wrong other info
+        *  1 sb are different
         */
        struct intel_super *first = st->sb;
        struct intel_super *sec = tst->sb;
@@ -3895,29 +4035,30 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst)
                tst->sb = NULL;
                return 0;
        }
+
        /* in platform dependent environment test if the disks
         * use the same Intel hba
-        * If not on Intel hba at all, allow anything.
+        * if not on Intel hba at all, allow anything.
+        * doesn't check HBAs if num_raid_devs is not set, as it means
+        * it is a free floating spare, and all spares regardless of HBA type
+        * will fall into separate container during the assembly
         */
-       if (!check_env("IMSM_NO_PLATFORM") && first->hba && sec->hba) {
+       if (first->hba && sec->hba && first->anchor->num_raid_devs != 0) {
                if (first->hba->type != sec->hba->type) {
-                       fprintf(stderr,
-                               "HBAs of devices do not match %s != %s\n",
-                               get_sys_dev_type(first->hba->type),
-                               get_sys_dev_type(sec->hba->type));
-                       return 3;
+                       if (verbose)
+                               pr_err("HBAs of devices do not match %s != %s\n",
+                                      get_sys_dev_type(first->hba->type),
+                                      get_sys_dev_type(sec->hba->type));
+                       return 1;
                }
                if (first->orom != sec->orom) {
-                       fprintf(stderr,
-                               "HBAs of devices do not match %s != %s\n",
-                               first->hba->pci_id, sec->hba->pci_id);
-                       return 3;
+                       if (verbose)
+                               pr_err("HBAs of devices do not match %s != %s\n",
+                                      first->hba->pci_id, sec->hba->pci_id);
+                       return 1;
                }
        }
 
-       /* if an anchor does not have num_raid_devs set then it is a free
-        * floating spare
-        */
        if (first->anchor->num_raid_devs > 0 &&
            sec->anchor->num_raid_devs > 0) {
                /* Determine if these disks might ever have been
@@ -3929,7 +4070,7 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst)
 
                if (memcmp(first->anchor->sig, sec->anchor->sig,
                           MAX_SIGNATURE_LENGTH) != 0)
-                       return 3;
+                       return 1;
 
                if (first_family == 0)
                        first_family = first->anchor->family_num;
@@ -3937,76 +4078,35 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst)
                        sec_family = sec->anchor->family_num;
 
                if (first_family != sec_family)
-                       return 3;
+                       return 1;
 
        }
 
-       /* if 'first' is a spare promote it to a populated mpb with sec's
-        * family number
-        */
-       if (first->anchor->num_raid_devs == 0 &&
-           sec->anchor->num_raid_devs > 0) {
-               int i;
-               struct intel_dev *dv;
-               struct imsm_dev *dev;
-
-               /* we need to copy raid device info from sec if an allocation
-                * fails here we don't associate the spare
-                */
-               for (i = 0; i < sec->anchor->num_raid_devs; i++) {
-                       dv = xmalloc(sizeof(*dv));
-                       dev = xmalloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
-                       dv->dev = dev;
-                       dv->index = i;
-                       dv->next = first->devlist;
-                       first->devlist = dv;
-               }
-               if (i < sec->anchor->num_raid_devs) {
-                       /* allocation failure */
-                       free_devlist(first);
-                       pr_err("imsm: failed to associate spare\n");
-                       return 3;
-               }
-               first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
-               first->anchor->orig_family_num = sec->anchor->orig_family_num;
-               first->anchor->family_num = sec->anchor->family_num;
-               memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
-               for (i = 0; i < sec->anchor->num_raid_devs; i++)
-                       imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
-       }
+       /* if an anchor does not have num_raid_devs set then it is a free
+       * floating spare. don't assosiate spare with any array, as during assembly
+       * spares shall fall into separate container, from which they can be moved
+       * when necessary
+       */
+       if (first->anchor->num_raid_devs ^ sec->anchor->num_raid_devs)
+               return 1;
 
        return 0;
 }
 
 static void fd2devname(int fd, char *name)
 {
-       struct stat st;
-       char path[256];
-       char dname[PATH_MAX];
        char *nm;
-       int rv;
 
-       name[0] = '\0';
-       if (fstat(fd, &st) != 0)
+       nm = fd2kname(fd);
+       if (!nm)
                return;
-       sprintf(path, "/sys/dev/block/%d:%d",
-               major(st.st_rdev), minor(st.st_rdev));
 
-       rv = readlink(path, dname, sizeof(dname)-1);
-       if (rv <= 0)
-               return;
-
-       dname[rv] = '\0';
-       nm = strrchr(dname, '/');
-       if (nm) {
-               nm++;
-               snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
-       }
+       snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
 }
 
 static int nvme_get_serial(int fd, void *buf, size_t buf_len)
 {
-       char path[60];
+       char path[PATH_MAX];
        char *name = fd2kname(fd);
 
        if (!name)
@@ -4015,9 +4115,10 @@ static int nvme_get_serial(int fd, void *buf, size_t buf_len)
        if (strncmp(name, "nvme", 4) != 0)
                return 1;
 
-       snprintf(path, sizeof(path) - 1, "/sys/block/%s/device/serial", name);
+       if (!diskfd_to_devpath(fd, 1, path))
+               return 1;
 
-       return load_sys(path, buf, buf_len);
+       return devpath_to_char(path, "serial", buf, buf_len, 0);
 }
 
 extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
@@ -4034,17 +4135,17 @@ static int imsm_read_serial(int fd, char *devname,
 
        memset(buf, 0, sizeof(buf));
 
+       if (check_env("IMSM_DEVNAME_AS_SERIAL")) {
+               memset(serial, 0, serial_buf_len);
+               fd2devname(fd, (char *) serial);
+               return 0;
+       }
+
        rv = nvme_get_serial(fd, buf, sizeof(buf));
 
        if (rv)
                rv = scsi_get_serial(fd, buf, sizeof(buf));
 
-       if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
-               memset(serial, 0, MAX_RAID_SERIAL_LEN);
-               fd2devname(fd, (char *) serial);
-               return 0;
-       }
-
        if (rv != 0) {
                if (devname)
                        pr_err("Failed to retrieve serial for %s\n",
@@ -4194,7 +4295,7 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super,
 
        dev->vol.migr_state = 1;
        set_migr_type(dev, migr_type);
-       dev->vol.curr_migr_unit = 0;
+       set_vol_curr_migr_unit(dev, 0);
        dest = get_imsm_map(dev, MAP_1);
 
        /* duplicate and then set the target end state in map[0] */
@@ -4229,7 +4330,7 @@ static void end_migration(struct imsm_dev *dev, struct intel_super *super,
         *
         * FIXME add support for raid-level-migration
         */
-       if (map_state != map->map_state && (is_gen_migration(dev) == 0) &&
+       if (map_state != map->map_state && (is_gen_migration(dev) == false) &&
            prev->map_state != IMSM_T_STATE_UNINITIALIZED) {
                /* when final map state is other than expected
                 * merge maps (not for migration)
@@ -4254,7 +4355,7 @@ static void end_migration(struct imsm_dev *dev, struct intel_super *super,
 
        dev->vol.migr_state = 0;
        set_migr_type(dev, 0);
-       dev->vol.curr_migr_unit = 0;
+       set_vol_curr_migr_unit(dev, 0);
        map->map_state = map_state;
 }
 
@@ -4332,8 +4433,7 @@ int check_mpb_migr_compatibility(struct intel_super *super)
        for (i = 0; i < super->anchor->num_raid_devs; i++) {
                struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
 
-               if (dev_iter &&
-                   dev_iter->vol.migr_state == 1 &&
+               if (dev_iter->vol.migr_state == 1 &&
                    dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
                        /* This device is migrating */
                        map0 = get_imsm_map(dev_iter, MAP_0);
@@ -4421,6 +4521,7 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
            MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE) != 0) {
                pr_err("could not allocate migr_rec buffer\n");
                free(super->buf);
+               super->buf = NULL;
                return 2;
        }
        super->clean_migration_record_by_mdmon = 0;
@@ -4482,8 +4583,6 @@ static void clear_hi(struct intel_super *super)
        }
        for (i = 0; i < mpb->num_raid_devs; ++i) {
                struct imsm_dev *dev = get_imsm_dev(super, i);
-               if (!dev)
-                       return;
                for (n = 0; n < 2; ++n) {
                        struct imsm_map *map = get_imsm_map(dev, n);
                        if (!map)
@@ -4516,10 +4615,10 @@ load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd
        return err;
 }
 
-static void __free_imsm_disk(struct dl *d)
+static void __free_imsm_disk(struct dl *d, int do_close)
 {
-       if (d->fd >= 0)
-               close(d->fd);
+       if (do_close)
+               close_fd(&d->fd);
        if (d->devname)
                free(d->devname);
        if (d->e)
@@ -4535,17 +4634,17 @@ static void free_imsm_disks(struct intel_super *super)
        while (super->disks) {
                d = super->disks;
                super->disks = d->next;
-               __free_imsm_disk(d);
+               __free_imsm_disk(d, 1);
        }
        while (super->disk_mgmt_list) {
                d = super->disk_mgmt_list;
                super->disk_mgmt_list = d->next;
-               __free_imsm_disk(d);
+               __free_imsm_disk(d, 1);
        }
        while (super->missing) {
                d = super->missing;
                super->missing = d->next;
-               __free_imsm_disk(d);
+               __free_imsm_disk(d, 1);
        }
 
 }
@@ -4624,12 +4723,12 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de
        struct sys_dev *hba_name;
        int rv = 0;
 
-       if (fd >= 0 && test_partition(fd)) {
+       if (is_fd_valid(fd) && test_partition(fd)) {
                pr_err("imsm: %s is a partition, cannot be used in IMSM\n",
                       devname);
                return 1;
        }
-       if (fd < 0 || check_env("IMSM_NO_PLATFORM")) {
+       if (!is_fd_valid(fd) || check_no_platform()) {
                super->orom = NULL;
                super->hba = NULL;
                return 0;
@@ -4650,10 +4749,12 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de
                                "    but the container is assigned to Intel(R) %s %s (",
                                devname,
                                get_sys_dev_type(hba_name->type),
-                               hba_name->type == SYS_DEV_VMD ? "domain" : "RAID controller",
+                               hba_name->type == SYS_DEV_VMD || hba_name->type == SYS_DEV_SATA_VMD ?
+                                       "domain" : "RAID controller",
                                hba_name->pci_id ? : "Err!",
                                get_sys_dev_type(super->hba->type),
-                               hba->type == SYS_DEV_VMD ? "domain" : "RAID controller");
+                               hba->type == SYS_DEV_VMD || hba_name->type == SYS_DEV_SATA_VMD ?
+                                       "domain" : "RAID controller");
 
                        while (hba) {
                                fprintf(stderr, "%s", hba->pci_id ? : "Err!");
@@ -5038,7 +5139,7 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
        int err = 0;
        int i = 0;
 
-       if (fd >= 0)
+       if (is_fd_valid(fd))
                /* 'fd' is an opened container */
                err = get_sra_super_block(fd, &super_list, devname, &i, keep_fd);
        else
@@ -5060,7 +5161,7 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
        }
 
        /* load migration record */
-       err = load_imsm_migr_rec(super, NULL);
+       err = load_imsm_migr_rec(super);
        if (err == -1) {
                /* migration is in progress,
                 * but migr_rec cannot be loaded,
@@ -5095,7 +5196,7 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
                return err;
 
        *sbp = super;
-       if (fd >= 0)
+       if (is_fd_valid(fd))
                strcpy(st->container_devnm, fd2devnm(fd));
        else
                st->container_devnm[0] = 0;
@@ -5121,7 +5222,7 @@ get_devlist_super_block(struct md_list *devlist, struct intel_super **super_list
                if (tmpdev->container == 1) {
                        int lmax = 0;
                        int fd = dev_open(tmpdev->devname, O_RDONLY|O_EXCL);
-                       if (fd < 0) {
+                       if (!is_fd_valid(fd)) {
                                pr_err("cannot open device %s: %s\n",
                                        tmpdev->devname, strerror(errno));
                                err = 8;
@@ -5173,19 +5274,22 @@ static int get_super_block(struct intel_super **super_list, char *devnm, char *d
 
        sprintf(nm, "%d:%d", major, minor);
        dfd = dev_open(nm, O_RDWR);
-       if (dfd < 0) {
+       if (!is_fd_valid(dfd)) {
                err = 2;
                goto error;
        }
 
-       get_dev_sector_size(dfd, NULL, &s->sector_size);
+       if (!get_dev_sector_size(dfd, NULL, &s->sector_size)) {
+               err = 2;
+               goto error;
+       }
        find_intel_hba_capability(dfd, s, devname);
        err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
 
        /* retry the load if we might have raced against mdmon */
        if (err == 3 && devnm && mdmon_running(devnm))
                for (retry = 0; retry < 3; retry++) {
-                       usleep(3000);
+                       sleep_for(0, MSEC_TO_NSEC(3), true);
                        err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
                        if (err != 3)
                                break;
@@ -5197,11 +5301,10 @@ static int get_super_block(struct intel_super **super_list, char *devnm, char *d
        } else {
                if (s)
                        free_imsm(s);
-               if (dfd >= 0)
-                       close(dfd);
+               close_fd(&dfd);
        }
-       if (dfd >= 0 && !keep_fd)
-               close(dfd);
+       if (!keep_fd)
+               close_fd(&dfd);
        return err;
 
 }
@@ -5257,9 +5360,13 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname)
        free_super_imsm(st);
 
        super = alloc_super();
-       get_dev_sector_size(fd, NULL, &super->sector_size);
        if (!super)
                return 1;
+
+       if (!get_dev_sector_size(fd, NULL, &super->sector_size)) {
+               free_imsm(super);
+               return 1;
+       }
        /* Load hba and capabilities if they exist.
         * But do not preclude loading metadata in case capabilities or hba are
         * non-compliant and ignore_hw_compat is set.
@@ -5284,7 +5391,7 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname)
 
                if (mdstat && mdmon_running(mdstat->devnm) && getpid() != mdmon_pid(mdstat->devnm)) {
                        for (retry = 0; retry < 3; retry++) {
-                               usleep(3000);
+                               sleep_for(0, MSEC_TO_NSEC(3), true);
                                rv = load_and_parse_mpb(fd, super, devname, 0);
                                if (rv != 3)
                                        break;
@@ -5309,7 +5416,7 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname)
        }
 
        /* load migration record */
-       if (load_imsm_migr_rec(super, NULL) == 0) {
+       if (load_imsm_migr_rec(super) == 0) {
                /* Check for unsupported migration features */
                if (check_mpb_migr_compatibility(super) != 0) {
                        pr_err("Unsupported migration detected");
@@ -5340,87 +5447,81 @@ static unsigned long long info_to_blocks_per_member(mdu_array_info_t *info,
                return (size * 2) & ~(info_to_blocks_per_strip(info) - 1);
 }
 
+static void imsm_write_signature(struct imsm_super *mpb)
+{
+       /* It is safer to eventually truncate version rather than left it not NULL ended */
+       snprintf((char *) mpb->sig, MAX_SIGNATURE_LENGTH, MPB_SIGNATURE MPB_VERSION_ATTRIBS);
+}
+
 static void imsm_update_version_info(struct intel_super *super)
 {
        /* update the version and attributes */
        struct imsm_super *mpb = super->anchor;
-       char *version;
        struct imsm_dev *dev;
        struct imsm_map *map;
        int i;
 
+       mpb->attributes |= MPB_ATTRIB_CHECKSUM_VERIFY;
+
        for (i = 0; i < mpb->num_raid_devs; i++) {
                dev = get_imsm_dev(super, i);
                map = get_imsm_map(dev, MAP_0);
+
                if (__le32_to_cpu(dev->size_high) > 0)
                        mpb->attributes |= MPB_ATTRIB_2TB;
 
-               /* FIXME detect when an array spans a port multiplier */
-               #if 0
-               mpb->attributes |= MPB_ATTRIB_PM;
-               #endif
-
-               if (mpb->num_raid_devs > 1 ||
-                   mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
-                       version = MPB_VERSION_ATTRIBS;
-                       switch (get_imsm_raid_level(map)) {
-                       case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
-                       case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
-                       case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
-                       case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
-                       }
-               } else {
-                       if (map->num_members >= 5)
-                               version = MPB_VERSION_5OR6_DISK_ARRAY;
-                       else if (dev->status == DEV_CLONE_N_GO)
-                               version = MPB_VERSION_CNG;
-                       else if (get_imsm_raid_level(map) == 5)
-                               version = MPB_VERSION_RAID5;
-                       else if (map->num_members >= 3)
-                               version = MPB_VERSION_3OR4_DISK_ARRAY;
-                       else if (get_imsm_raid_level(map) == 1)
-                               version = MPB_VERSION_RAID1;
-                       else
-                               version = MPB_VERSION_RAID0;
+               switch (get_imsm_raid_level(map)) {
+               case IMSM_T_RAID0:
+                       mpb->attributes |= MPB_ATTRIB_RAID0;
+                       break;
+               case IMSM_T_RAID1:
+                       mpb->attributes |= MPB_ATTRIB_RAID1;
+                       break;
+               case IMSM_T_RAID5:
+                       mpb->attributes |= MPB_ATTRIB_RAID5;
+                       break;
+               case IMSM_T_RAID10:
+                       mpb->attributes |= MPB_ATTRIB_RAID10;
+                       if (map->num_members > 4)
+                               mpb->attributes |= MPB_ATTRIB_RAID10_EXT;
+                       break;
                }
-               strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
        }
+
+       imsm_write_signature(mpb);
 }
 
-static int check_name(struct intel_super *super, char *name, int quiet)
+/**
+ * imsm_check_name() - check imsm naming criteria.
+ * @super: &intel_super pointer, not NULL.
+ * @name: name to check.
+ * @verbose: verbose level.
+ *
+ * Name must be no longer than &MAX_RAID_SERIAL_LEN and must be unique across volumes.
+ *
+ * Returns: &true if @name matches, &false otherwise.
+ */
+static bool imsm_is_name_allowed(struct intel_super *super, const char * const name,
+                                const int verbose)
 {
        struct imsm_super *mpb = super->anchor;
-       char *reason = NULL;
-       char *start = name;
-       size_t len = strlen(name);
        int i;
 
-       if (len > 0) {
-               while (isspace(start[len - 1]))
-                       start[--len] = 0;
-               while (*start && isspace(*start))
-                       ++start, --len;
-               memmove(name, start, len + 1);
+       if (is_string_lq(name, MAX_RAID_SERIAL_LEN + 1) == false) {
+               pr_vrb("imsm: Name \"%s\" is too long\n", name);
+               return false;
        }
 
-       if (len > MAX_RAID_SERIAL_LEN)
-               reason = "must be 16 characters or less";
-       else if (len == 0)
-               reason = "must be a non-empty string";
-
        for (i = 0; i < mpb->num_raid_devs; i++) {
                struct imsm_dev *dev = get_imsm_dev(super, i);
 
                if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
-                       reason = "already exists";
-                       break;
+                       pr_vrb("imsm: Name \"%s\" already exists\n", name);
+                       return false;
                }
        }
 
-       if (reason && !quiet)
-               pr_err("imsm volume name %s\n", reason);
-
-       return !reason;
+       return true;
 }
 
 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
@@ -5443,7 +5544,6 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
        int namelen;
        unsigned long long array_blocks;
        size_t size_old, size_new;
-       unsigned long long num_data_stripes;
        unsigned int data_disks;
        unsigned long long size_per_member;
 
@@ -5516,8 +5616,9 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
                }
        }
 
-       if (!check_name(super, name, 0))
+       if (imsm_is_name_allowed(super, name, 1) == false)
                return 0;
+
        dv = xmalloc(sizeof(*dv));
        dev = xcalloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
        /*
@@ -5540,12 +5641,12 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
        vol->migr_state = 0;
        set_migr_type(dev, MIGR_INIT);
        vol->dirty = !info->state;
-       vol->curr_migr_unit = 0;
+       set_vol_curr_migr_unit(dev, 0);
        map = get_imsm_map(dev, MAP_0);
        set_pba_of_lba0(map, super->create_offset);
        map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
        map->failed_disk_num = ~0;
-       if (info->level > 0)
+       if (info->level > IMSM_T_RAID0)
                map->map_state = (info->state ? IMSM_T_STATE_NORMAL
                                  : IMSM_T_STATE_UNINITIALIZED);
        else
@@ -5553,33 +5654,23 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
                                                      IMSM_T_STATE_NORMAL;
        map->ddf = 1;
 
-       if (info->level == 1 && info->raid_disks > 2) {
+       if (info->level == IMSM_T_RAID1 && info->raid_disks > 2) {
                free(dev);
                free(dv);
-               pr_err("imsm does not support more than 2 disksin a raid1 volume\n");
+               pr_err("imsm does not support more than 2 disks in a raid1 volume\n");
                return 0;
        }
+       map->num_members = info->raid_disks;
 
-       map->raid_level = info->level;
-       if (info->level == 10) {
-               map->raid_level = 1;
-               map->num_domains = info->raid_disks / 2;
-       } else if (info->level == 1)
-               map->num_domains = info->raid_disks;
-       else
-               map->num_domains = 1;
-
-       /* info->size is only int so use the 'size' parameter instead */
-       num_data_stripes = size_per_member / info_to_blocks_per_strip(info);
-       num_data_stripes /= map->num_domains;
-       set_num_data_stripes(map, num_data_stripes);
+       update_imsm_raid_level(map, info->level);
+       set_num_domains(map);
 
        size_per_member += NUM_BLOCKS_DIRTY_STRIPE_REGION;
        set_blocks_per_member(map, info_to_blocks_per_member(info,
                                                             size_per_member /
                                                             BLOCKS_PER_KB));
 
-       map->num_members = info->raid_disks;
+       update_num_data_stripes(map, array_blocks);
        for (i = 0; i < map->num_members; i++) {
                /* initialized in add_to_super */
                set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
@@ -5596,7 +5687,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
                free(dev);
                free(dv);
                pr_err("imsm does not support consistency policy %s\n",
-                      map_num(consistency_policies, s->consistency_policy));
+                      map_num_s(consistency_policies, s->consistency_policy));
                return 0;
        }
 
@@ -5626,7 +5717,6 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
        struct intel_super *super;
        struct imsm_super *mpb;
        size_t mpb_size;
-       char *version;
 
        if (data_offset != INVALID_SECTORS) {
                pr_err("data-offset not supported by imsm\n");
@@ -5669,13 +5759,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
                return 0;
        }
 
-       mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
-
-       version = (char *) mpb->sig;
-       strcpy(version, MPB_SIGNATURE);
-       version += strlen(MPB_SIGNATURE);
-       strcpy(version, MPB_VERSION_RAID0);
-
+       imsm_update_version_info(super);
        return 1;
 }
 
@@ -5683,7 +5767,7 @@ static int drive_validate_sector_size(struct intel_super *super, struct dl *dl)
 {
        unsigned int member_sector_size;
 
-       if (dl->fd < 0) {
+       if (!is_fd_valid(dl->fd)) {
                pr_err("Invalid file descriptor for %s\n", dl->devname);
                return 0;
        }
@@ -5705,6 +5789,10 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
        struct imsm_map *map;
        struct dl *dl, *df;
        int slot;
+       int autolayout = 0;
+
+       if (!is_fd_valid(fd))
+               autolayout = 1;
 
        dev = get_imsm_dev(super, super->current_vol);
        map = get_imsm_map(dev, MAP_0);
@@ -5715,25 +5803,32 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
                return 1;
        }
 
-       if (fd == -1) {
-               /* we're doing autolayout so grab the pre-marked (in
-                * validate_geometry) raid_disk
-                */
-               for (dl = super->disks; dl; dl = dl->next)
+       for (dl = super->disks; dl ; dl = dl->next) {
+               if (autolayout) {
                        if (dl->raiddisk == dk->raid_disk)
                                break;
-       } else {
-               for (dl = super->disks; dl ; dl = dl->next)
-                       if (dl->major == dk->major &&
-                           dl->minor == dk->minor)
-                               break;
+               } else if (dl->major == dk->major && dl->minor == dk->minor)
+                       break;
        }
 
        if (!dl) {
-               pr_err("%s is not a member of the same container\n", devname);
+               if (!autolayout)
+                       pr_err("%s is not a member of the same container.\n",
+                              devname);
                return 1;
        }
 
+       if (!autolayout && super->current_vol > 0) {
+               int _slot = get_disk_slot_in_dev(super, 0, dl->index);
+
+               if (_slot != dk->raid_disk) {
+                       pr_err("Member %s is in %d slot for the first volume, but is in %d slot for a new volume.\n",
+                              dl->devname, _slot, dk->raid_disk);
+                       pr_err("Raid members are in different order than for the first volume, aborting.\n");
+                       return 1;
+               }
+       }
+
        if (mpb->num_disks == 0)
                if (!get_dev_sector_size(dl->fd, dl->devname,
                                         &super->sector_size))
@@ -5807,7 +5902,7 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
                struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
 
                _disk = __get_imsm_disk(mpb, dl->index);
-               if (!_dev || !_disk) {
+               if (!_disk) {
                        pr_err("BUG mpb setup error\n");
                        return 1;
                }
@@ -5817,6 +5912,7 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
                sum += __gen_imsm_checksum(mpb);
                mpb->family_num = __cpu_to_le32(sum);
                mpb->orig_family_num = mpb->family_num;
+               mpb->creation_time = __cpu_to_le64((__u64)time(NULL));
        }
        super->current_disk = dl;
        return 0;
@@ -5854,6 +5950,9 @@ int mark_spare(struct dl *disk)
        return ret_val;
 }
 
+
+static int write_super_imsm_spare(struct intel_super *super, struct dl *d);
+
 static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
                             int fd, char *devname,
                             unsigned long long data_offset)
@@ -5893,29 +5992,30 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
        rv = imsm_read_serial(fd, devname, dd->serial, MAX_RAID_SERIAL_LEN);
        if (rv) {
                pr_err("failed to retrieve scsi serial, aborting\n");
-               if (dd->devname)
-                       free(dd->devname);
-               free(dd);
+               __free_imsm_disk(dd, 0);
                abort();
        }
+
        if (super->hba && ((super->hba->type == SYS_DEV_NVME) ||
           (super->hba->type == SYS_DEV_VMD))) {
                int i;
-               char *devpath = diskfd_to_devpath(fd);
-               char controller_path[PATH_MAX];
-
-               if (!devpath) {
-                       pr_err("failed to get devpath, aborting\n");
-                       if (dd->devname)
-                               free(dd->devname);
-                       free(dd);
+               char cntrl_path[PATH_MAX];
+               char *cntrl_name;
+               char pci_dev_path[PATH_MAX];
+
+               if (!diskfd_to_devpath(fd, 2, pci_dev_path) ||
+                   !diskfd_to_devpath(fd, 1, cntrl_path)) {
+                       pr_err("failed to get dev paths, aborting\n");
+                       __free_imsm_disk(dd, 0);
                        return 1;
                }
 
-               snprintf(controller_path, PATH_MAX-1, "%s/device", devpath);
-               free(devpath);
+               cntrl_name = basename(cntrl_path);
+               if (is_multipath_nvme(fd))
+                       pr_err("%s controller supports Multi-Path I/O, Intel (R) VROC does not support multipathing\n",
+                              cntrl_name);
 
-               if (devpath_to_vendor(controller_path) == 0x8086) {
+               if (devpath_to_vendor(pci_dev_path) == 0x8086) {
                        /*
                         * If Intel's NVMe drive has serial ended with
                         * "-A","-B","-1" or "-2" it means that this is "x8"
@@ -5942,14 +6042,16 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
                    !imsm_orom_has_tpv_support(super->orom)) {
                        pr_err("\tPlatform configuration does not support non-Intel NVMe drives.\n"
                               "\tPlease refer to Intel(R) RSTe/VROC user guide.\n");
-                       free(dd->devname);
-                       free(dd);
+                       __free_imsm_disk(dd, 0);
                        return 1;
                }
        }
 
        get_dev_size(fd, NULL, &size);
-       get_dev_sector_size(fd, NULL, &member_sector_size);
+       if (!get_dev_sector_size(fd, NULL, &member_sector_size)) {
+               __free_imsm_disk(dd, 0);
+               return 1;
+       }
 
        if (super->sector_size == 0) {
                /* this a first device, so sector_size is not set yet */
@@ -5983,9 +6085,13 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
                dd->next = super->disk_mgmt_list;
                super->disk_mgmt_list = dd;
        } else {
+               /* this is called outside of mdmon
+                * write initial spare metadata
+                * mdmon will overwrite it.
+                */
                dd->next = super->disks;
                super->disks = dd;
-               super->updates_pending++;
+               write_super_imsm_spare(super, dd);
        }
 
        return 0;
@@ -6024,53 +6130,60 @@ static union {
        struct imsm_super anchor;
 } spare_record __attribute__ ((aligned(MAX_SECTOR_SIZE)));
 
-/* spare records have their own family number and do not have any defined raid
- * devices
- */
-static int write_super_imsm_spares(struct intel_super *super, int doclose)
+
+static int write_super_imsm_spare(struct intel_super *super, struct dl *d)
 {
-       struct imsm_super *mpb = super->anchor;
        struct imsm_super *spare = &spare_record.anchor;
        __u32 sum;
-       struct dl *d;
+
+       if (d->index != -1)
+               return 1;
 
        spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super));
        spare->generation_num = __cpu_to_le32(1UL);
-       spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
        spare->num_disks = 1;
        spare->num_raid_devs = 0;
-       spare->cache_size = mpb->cache_size;
        spare->pwr_cycle_count = __cpu_to_le32(1);
 
-       snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
-                MPB_SIGNATURE MPB_VERSION_RAID0);
+       imsm_write_signature(spare);
 
-       for (d = super->disks; d; d = d->next) {
-               if (d->index != -1)
-                       continue;
+       spare->disk[0] = d->disk;
+       if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
+               spare->attributes |= MPB_ATTRIB_2TB_DISK;
+
+       if (super->sector_size == 4096)
+               convert_to_4k_imsm_disk(&spare->disk[0]);
+
+       sum = __gen_imsm_checksum(spare);
+       spare->family_num = __cpu_to_le32(sum);
+       spare->orig_family_num = 0;
+       sum = __gen_imsm_checksum(spare);
+       spare->check_sum = __cpu_to_le32(sum);
 
-               spare->disk[0] = d->disk;
-               if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
-                       spare->attributes |= MPB_ATTRIB_2TB_DISK;
+       if (store_imsm_mpb(d->fd, spare)) {
+               pr_err("failed for device %d:%d %s\n",
+                       d->major, d->minor, strerror(errno));
+               return 1;
+       }
 
-               if (super->sector_size == 4096)
-                       convert_to_4k_imsm_disk(&spare->disk[0]);
+       return 0;
+}
+/* spare records have their own family number and do not have any defined raid
+ * devices
+ */
+static int write_super_imsm_spares(struct intel_super *super, int doclose)
+{
+       struct dl *d;
 
-               sum = __gen_imsm_checksum(spare);
-               spare->family_num = __cpu_to_le32(sum);
-               spare->orig_family_num = 0;
-               sum = __gen_imsm_checksum(spare);
-               spare->check_sum = __cpu_to_le32(sum);
+       for (d = super->disks; d; d = d->next) {
+               if (d->index != -1)
+                       continue;
 
-               if (store_imsm_mpb(d->fd, spare)) {
-                       pr_err("failed for device %d:%d %s\n",
-                               d->major, d->minor, strerror(errno));
+               if (write_super_imsm_spare(super, d))
                        return 1;
-               }
-               if (doclose) {
-                       close(d->fd);
-                       d->fd = -1;
-               }
+
+               if (doclose)
+                       close_fd(&d->fd);
        }
 
        return 0;
@@ -6120,10 +6233,10 @@ static int write_super_imsm(struct supertype *st, int doclose)
        for (i = 0; i < mpb->num_raid_devs; i++) {
                struct imsm_dev *dev = __get_imsm_dev(mpb, i);
                struct imsm_dev *dev2 = get_imsm_dev(super, i);
-               if (dev && dev2) {
-                       imsm_copy_dev(dev, dev2);
-                       mpb_size += sizeof_imsm_dev(dev, 0);
-               }
+
+               imsm_copy_dev(dev, dev2);
+               mpb_size += sizeof_imsm_dev(dev, 0);
+
                if (is_gen_migration(dev2))
                        clear_migration_record = 0;
        }
@@ -6184,10 +6297,8 @@ static int write_super_imsm(struct supertype *st, int doclose)
                                d->major, d->minor,
                                d->fd, strerror(errno));
 
-               if (doclose) {
-                       close(d->fd);
-                       d->fd = -1;
-               }
+               if (doclose)
+                       close_fd(&d->fd);
        }
 
        if (spares)
@@ -6418,7 +6529,7 @@ static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
                if (mdmon_running(st->container_devnm))
                        st->update_tail = &st->updates;
 
-               if (st->ss->update_subarray(st, subarray, "ppl", NULL)) {
+               if (st->ss->update_subarray(st, subarray, UOPT_PPL, NULL)) {
                        pr_err("Failed to update subarray %s\n",
                              subarray);
                } else {
@@ -6441,7 +6552,7 @@ static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
                   (map->map_state == IMSM_T_STATE_NORMAL &&
                   !(dev->vol.dirty & RAIDVOL_DIRTY)) ||
                   (is_rebuilding(dev) &&
-                   dev->vol.curr_migr_unit == 0 &&
+                   vol_curr_migr_unit(dev) == 0 &&
                    get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_1) != idx))
                        ret = st->ss->write_init_ppl(st, info, d->fd);
                else
@@ -6486,6 +6597,60 @@ static int write_init_ppl_imsm_all(struct supertype *st, struct mdinfo *info)
        return ret;
 }
 
+/*******************************************************************************
+ * Function:   write_init_bitmap_imsm_vol
+ * Description:        Write a bitmap header and prepares the area for the bitmap.
+ * Parameters:
+ *     st      : supertype information
+ *     vol_idx : the volume index to use
+ *
+ * Returns:
+ *      0 : success
+ *     -1 : fail
+ ******************************************************************************/
+static int write_init_bitmap_imsm_vol(struct supertype *st, int vol_idx)
+{
+       struct intel_super *super = st->sb;
+       int prev_current_vol = super->current_vol;
+       struct dl *d;
+       int ret = 0;
+
+       super->current_vol = vol_idx;
+       for (d = super->disks; d; d = d->next) {
+               if (d->index < 0 || is_failed(&d->disk))
+                       continue;
+               ret = st->ss->write_bitmap(st, d->fd, NoUpdate);
+               if (ret)
+                       break;
+       }
+       super->current_vol = prev_current_vol;
+       return ret;
+}
+
+/*******************************************************************************
+ * Function:   write_init_bitmap_imsm_all
+ * Description:        Write a bitmap header and prepares the area for the bitmap.
+ *             Operation is executed for volumes with CONSISTENCY_POLICY_BITMAP.
+ * Parameters:
+ *     st      : supertype information
+ *     info    : info about the volume where the bitmap should be written
+ *     vol_idx : the volume index to use
+ *
+ * Returns:
+ *      0 : success
+ *     -1 : fail
+ ******************************************************************************/
+static int write_init_bitmap_imsm_all(struct supertype *st, struct mdinfo *info,
+                                     int vol_idx)
+{
+       int ret = 0;
+
+       if (info && (info->consistency_policy == CONSISTENCY_POLICY_BITMAP))
+               ret = write_init_bitmap_imsm_vol(st, vol_idx);
+
+       return ret;
+}
+
 static int write_init_super_imsm(struct supertype *st)
 {
        struct intel_super *super = st->sb;
@@ -6509,7 +6674,10 @@ static int write_init_super_imsm(struct supertype *st)
                         */
                        rv = mgmt_disk(st);
                } else {
+                       /* adding the second volume to the array */
                        rv = write_init_ppl_imsm_all(st, &info);
+                       if (!rv)
+                               rv = write_init_bitmap_imsm_all(st, &info, current_vol);
                        if (!rv)
                                rv = create_array(st, current_vol);
                }
@@ -6517,8 +6685,12 @@ static int write_init_super_imsm(struct supertype *st)
                struct dl *d;
                for (d = super->disks; d; d = d->next)
                        Kill(d->devname, NULL, 0, -1, 1);
-               if (current_vol >= 0)
+               if (current_vol >= 0) {
                        rv = write_init_ppl_imsm_all(st, &info);
+                       if (!rv)
+                               rv = write_init_bitmap_imsm_all(st, &info, current_vol);
+               }
+
                if (!rv)
                        rv = write_super_imsm(st, 1);
        }
@@ -6540,8 +6712,7 @@ static int store_super_imsm(struct supertype *st, int fd)
 }
 
 static int validate_geometry_imsm_container(struct supertype *st, int level,
-                                           int layout, int raiddisks, int chunk,
-                                           unsigned long long size,
+                                           int raiddisks,
                                            unsigned long long data_offset,
                                            char *dev,
                                            unsigned long long *freesize,
@@ -6549,39 +6720,31 @@ static int validate_geometry_imsm_container(struct supertype *st, int level,
 {
        int fd;
        unsigned long long ldsize;
-       struct intel_super *super;
+       struct intel_super *super = NULL;
        int rv = 0;
 
-       if (level != LEVEL_CONTAINER)
+       if (!is_container(level))
                return 0;
        if (!dev)
                return 1;
 
-       fd = open(dev, O_RDONLY|O_EXCL, 0);
-       if (fd < 0) {
-               if (verbose > 0)
-                       pr_err("imsm: Cannot open %s: %s\n",
-                               dev, strerror(errno));
-               return 0;
-       }
-       if (!get_dev_size(fd, dev, &ldsize)) {
-               close(fd);
+       fd = dev_open(dev, O_RDONLY|O_EXCL);
+       if (!is_fd_valid(fd)) {
+               pr_vrb("imsm: Cannot open %s: %s\n", dev, strerror(errno));
                return 0;
        }
+       if (!get_dev_size(fd, dev, &ldsize))
+               goto exit;
 
        /* capabilities retrieve could be possible
         * note that there is no fd for the disks in array.
         */
        super = alloc_super();
-       if (!super) {
-               close(fd);
-               return 0;
-       }
-       if (!get_dev_sector_size(fd, NULL, &super->sector_size)) {
-               close(fd);
-               free_imsm(super);
-               return 0;
-       }
+       if (!super)
+               goto exit;
+
+       if (!get_dev_sector_size(fd, NULL, &super->sector_size))
+               goto exit;
 
        rv = find_intel_hba_capability(fd, super, verbose > 0 ? dev : NULL);
        if (rv != 0) {
@@ -6592,32 +6755,42 @@ static int validate_geometry_imsm_container(struct supertype *st, int level,
                        fd, str, super->orom, rv, raiddisks);
 #endif
                /* no orom/efi or non-intel hba of the disk */
-               close(fd);
-               free_imsm(super);
-               return 0;
+               rv = 0;
+               goto exit;
        }
-       close(fd);
        if (super->orom) {
                if (raiddisks > super->orom->tds) {
                        if (verbose)
                                pr_err("%d exceeds maximum number of platform supported disks: %d\n",
                                        raiddisks, super->orom->tds);
-                       free_imsm(super);
-                       return 0;
+                       goto exit;
                }
                if ((super->orom->attr & IMSM_OROM_ATTR_2TB_DISK) == 0 &&
                    (ldsize >> 9) >> 32 > 0) {
                        if (verbose)
                                pr_err("%s exceeds maximum platform supported size\n", dev);
-                       free_imsm(super);
-                       return 0;
+                       goto exit;
                }
-       }
 
-       *freesize = avail_size_imsm(st, ldsize >> 9, data_offset);
-       free_imsm(super);
+               if (super->hba->type == SYS_DEV_VMD ||
+                   super->hba->type == SYS_DEV_NVME) {
+                       if (!imsm_is_nvme_namespace_supported(fd, 1)) {
+                               if (verbose)
+                                       pr_err("NVMe namespace %s is not supported by IMSM\n",
+                                               basename(dev));
+                               goto exit;
+                       }
+               }
+       }
+       if (freesize)
+               *freesize = avail_size_imsm(st, ldsize >> 9, data_offset);
+       rv = 1;
+exit:
+       if (super)
+               free_imsm(super);
+       close(fd);
 
-       return 1;
+       return rv;
 }
 
 static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
@@ -6647,20 +6820,35 @@ static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
        return end - base_start;
 }
 
-static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
+/** merge_extents() - analyze extents and get free size.
+ * @super: Intel metadata, not NULL.
+ * @expanding: if set, we are expanding &super->current_vol.
+ *
+ * Build a composite disk with all known extents and generate a size given the
+ * "all disks in an array must share a common start offset" constraint.
+ * If a volume is expanded, then return free space after the volume.
+ *
+ * Return: Free space or 0 on failure.
+ */
+static unsigned long long merge_extents(struct intel_super *super, const bool expanding)
 {
-       /* build a composite disk with all known extents and generate a new
-        * 'maxsize' given the "all disks in an array must share a common start
-        * offset" constraint
-        */
-       struct extent *e = xcalloc(sum_extents, sizeof(*e));
+       struct extent *e;
        struct dl *dl;
-       int i, j;
-       int start_extent;
-       unsigned long long pos;
+       int i, j, pos_vol_idx = -1;
+       int extent_idx = 0;
+       int sum_extents = 0;
+       unsigned long long pos = 0;
        unsigned long long start = 0;
-       unsigned long long maxsize;
-       unsigned long reserve;
+       unsigned long long free_size = 0;
+
+       unsigned long pre_reservation = 0;
+       unsigned long post_reservation = IMSM_RESERVED_SECTORS;
+       unsigned long reservation_size;
+
+       for (dl = super->disks; dl; dl = dl->next)
+               if (dl->e)
+                       sum_extents += dl->extent_cnt;
+       e = xcalloc(sum_extents, sizeof(struct extent));
 
        /* coalesce and sort all extents. also, check to see if we need to
         * reserve space between member arrays
@@ -6679,72 +6867,94 @@ static unsigned long long merge_extents(struct intel_super *super, int sum_exten
        j = 0;
        while (i < sum_extents) {
                e[j].start = e[i].start;
+               e[j].vol = e[i].vol;
                e[j].size = find_size(e, &i, sum_extents);
                j++;
                if (e[j-1].size == 0)
                        break;
        }
 
-       pos = 0;
-       maxsize = 0;
-       start_extent = 0;
        i = 0;
        do {
-               unsigned long long esize;
+               unsigned long long esize = e[i].start - pos;
 
-               esize = e[i].start - pos;
-               if (esize >= maxsize) {
-                       maxsize = esize;
+               if (expanding ? pos_vol_idx == super->current_vol : esize >= free_size) {
+                       free_size = esize;
                        start = pos;
-                       start_extent = i;
+                       extent_idx = i;
                }
+
                pos = e[i].start + e[i].size;
+               pos_vol_idx = e[i].vol;
+
                i++;
        } while (e[i-1].size);
-       free(e);
 
-       if (maxsize == 0)
+       if (free_size == 0) {
+               dprintf("imsm: Cannot find free size.\n");
+               free(e);
                return 0;
+       }
 
-       /* FIXME assumes volume at offset 0 is the first volume in a
-        * container
-        */
-       if (start_extent > 0)
-               reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
-       else
-               reserve = 0;
+       if (!expanding && extent_idx != 0)
+               /*
+                * Not a real first volume in a container is created, pre_reservation is needed.
+                */
+               pre_reservation = IMSM_RESERVED_SECTORS;
 
-       if (maxsize < reserve)
-               return 0;
+       if (e[extent_idx].size == 0)
+               /*
+                * extent_idx points to the metadata, post_reservation is allready done.
+                */
+               post_reservation = 0;
+       free(e);
 
-       super->create_offset = ~((unsigned long long) 0);
-       if (start + reserve > super->create_offset)
-               return 0; /* start overflows create_offset */
-       super->create_offset = start + reserve;
+       reservation_size = pre_reservation + post_reservation;
 
-       return maxsize - reserve;
+       if (free_size < reservation_size) {
+               dprintf("imsm: Reservation size is greater than free space.\n");
+               return 0;
+       }
+
+       super->create_offset = start + pre_reservation;
+       return free_size - reservation_size;
 }
 
-static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
+/**
+ * is_raid_level_supported() - check if this count of drives and level is supported by platform.
+ * @orom: hardware properties, could be NULL.
+ * @level: requested raid level.
+ * @raiddisks: requested disk count.
+ *
+ * IMSM UEFI/OROM does not provide information about supported count of raid disks
+ * for particular level. That is why it is hardcoded.
+ * It is recommended to not allow of usage other levels than supported,
+ * IMSM code is not tested against different level implementations.
+ *
+ * Return: true if supported, false otherwise.
+ */
+static bool is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
 {
-       if (level < 0 || level == 6 || level == 4)
-               return 0;
+       int idx;
 
-       /* if we have an orom prevent invalid raid levels */
-       if (orom)
-               switch (level) {
-               case 0: return imsm_orom_has_raid0(orom);
-               case 1:
-                       if (raiddisks > 2)
-                               return imsm_orom_has_raid1e(orom);
-                       return imsm_orom_has_raid1(orom) && raiddisks == 2;
-               case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
-               case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
-               }
-       else
-               return 1; /* not on an Intel RAID platform so anything goes */
+       for (idx = 0; imsm_level_ops[idx].name; idx++) {
+               if (imsm_level_ops[idx].level == level)
+                       break;
+       }
 
-       return 0;
+       if (!imsm_level_ops[idx].name)
+               return false;
+
+       if (!imsm_level_ops[idx].is_raiddisks_count_supported(raiddisks))
+               return false;
+
+       if (!orom)
+               return true;
+
+       if (imsm_level_ops[idx].is_level_supported(orom))
+               return true;
+
+       return false;
 }
 
 static int
@@ -6766,12 +6976,12 @@ active_arrays_by_format(char *name, char* hba, struct md_list **devlist,
                    memb->members) {
                        struct dev_member *dev = memb->members;
                        int fd = -1;
-                       while(dev && (fd < 0)) {
+                       while (dev && !is_fd_valid(fd)) {
                                char *path = xmalloc(strlen(dev->name) + strlen("/dev/") + 1);
-                               num = sprintf(path, "%s%s", "/dev/", dev->name);
+                               num = snprintf(path, PATH_MAX, "%s%s", "/dev/", dev->name);
                                if (num > 0)
                                        fd = open(path, O_RDONLY, 0);
-                               if (num <= 0 || fd < 0) {
+                               if (num <= 0 || !is_fd_valid(fd)) {
                                        pr_vrb("Cannot open %s: %s\n",
                                               dev->name, strerror(errno));
                                }
@@ -6779,7 +6989,7 @@ active_arrays_by_format(char *name, char* hba, struct md_list **devlist,
                                dev = dev->next;
                        }
                        found = 0;
-                       if (fd >= 0 && disk_attached_to_hba(fd, hba)) {
+                       if (is_fd_valid(fd) && disk_attached_to_hba(fd, hba)) {
                                struct mdstat_ent *vol;
                                for (vol = mdstat ; vol ; vol = vol->next) {
                                        if (vol->active > 0 &&
@@ -6799,8 +7009,7 @@ active_arrays_by_format(char *name, char* hba, struct md_list **devlist,
                                        *devlist = dv;
                                }
                        }
-                       if (fd >= 0)
-                               close(fd);
+                       close_fd(&fd);
                }
        }
        free_mdstat(mdstat);
@@ -6850,7 +7059,7 @@ get_devices(const char *hba_path)
                char *path = NULL;
                if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
                        continue;
-               path = devt_to_devpath(makedev(major, minor));
+               path = devt_to_devpath(makedev(major, minor), 1, NULL);
                if (!path)
                        continue;
                if (!path_attached_to_hba(path, hba_path)) {
@@ -6861,7 +7070,7 @@ get_devices(const char *hba_path)
                free(path);
                path = NULL;
                fd = dev_open(ent->d_name, O_RDONLY);
-               if (fd >= 0) {
+               if (is_fd_valid(fd)) {
                        fd2devname(fd, buf);
                        close(fd);
                } else {
@@ -6920,7 +7129,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
                }
                tmpdev->container = 0;
                dfd = dev_open(devname, O_RDONLY|O_EXCL);
-               if (dfd < 0) {
+               if (!is_fd_valid(dfd)) {
                        dprintf("cannot open device %s: %s\n",
                                devname, strerror(errno));
                        tmpdev->used = 2;
@@ -6957,8 +7166,8 @@ count_volumes_list(struct md_list *devlist, char *homehost,
                                tmpdev->used = 2;
                        }
                }
-               if (dfd >= 0)
-                       close(dfd);
+               close_fd(&dfd);
+
                if (tmpdev->used == 2 || tmpdev->used == 4) {
                        /* Ignore unrecognised devices during auto-assembly */
                        goto loop;
@@ -6983,7 +7192,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
 
                        if (st->ss != tst->ss ||
                            st->minor_version != tst->minor_version ||
-                           st->ss->compare_super(st, tst) != 0) {
+                           st->ss->compare_super(st, tst, 1) != 0) {
                                /* Some mismatch. If exactly one array matches this host,
                                 * we can resolve on that one.
                                 * Or, if we are auto assembling, we just ignore the second
@@ -7321,13 +7530,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
                return 0;
        }
 
-       /* count total number of extents for merge */
-       i = 0;
-       for (dl = super->disks; dl; dl = dl->next)
-               if (dl->e)
-                       i += dl->extent_cnt;
-
-       maxsize = merge_extents(super, i);
+       maxsize = merge_extents(super, false);
 
        if (mpb->num_raid_devs > 0 && size && size != maxsize)
                pr_err("attempting to create a second volume with size less then remaining space.\n");
@@ -7357,25 +7560,41 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
        return 1;
 }
 
-static int imsm_get_free_size(struct supertype *st, int raiddisks,
-                        unsigned long long size, int chunk,
-                        unsigned long long *freesize)
+/**
+ * imsm_get_free_size() - get the biggest, common free space from members.
+ * @super: &intel_super pointer, not NULL.
+ * @raiddisks: number of raid disks.
+ * @size: requested size, could be 0 (means max size).
+ * @chunk: requested chunk size in KiB.
+ * @freesize: pointer for returned size value.
+ *
+ * Return: &IMSM_STATUS_OK or &IMSM_STATUS_ERROR.
+ *
+ * @freesize is set to meaningful value, this can be @size, or calculated
+ * max free size.
+ * super->create_offset value is modified and set appropriately in
+ * merge_extends() for further creation.
+ */
+static imsm_status_t imsm_get_free_size(struct intel_super *super,
+                                       const int raiddisks,
+                                       unsigned long long size,
+                                       const int chunk,
+                                       unsigned long long *freesize,
+                                       bool expanding)
 {
-       struct intel_super *super = st->sb;
        struct imsm_super *mpb = super->anchor;
        struct dl *dl;
        int i;
-       int extent_cnt;
        struct extent *e;
+       int cnt = 0;
+       int used = 0;
        unsigned long long maxsize;
-       unsigned long long minsize;
-       int cnt;
-       int used;
+       unsigned long long minsize = size;
+
+       if (minsize == 0)
+               minsize = chunk * 2;
 
        /* find the largest common start free region of the possible disks */
-       used = 0;
-       extent_cnt = 0;
-       cnt = 0;
        for (dl = super->disks; dl; dl = dl->next) {
                dl->raiddisk = -1;
 
@@ -7395,22 +7614,19 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks,
                        ;
                dl->e = e;
                dl->extent_cnt = i;
-               extent_cnt += i;
                cnt++;
        }
 
-       maxsize = merge_extents(super, extent_cnt);
-       minsize = size;
-       if (size == 0)
-               /* chunk is in K */
-               minsize = chunk * 2;
+       maxsize = merge_extents(super, expanding);
+       if (maxsize < minsize)  {
+               pr_err("imsm: Free space is %llu but must be equal or larger than %llu.\n",
+                      maxsize, minsize);
+               return IMSM_STATUS_ERROR;
+       }
 
-       if (cnt < raiddisks ||
-           (super->orom && used && used != raiddisks) ||
-           maxsize < minsize ||
-           maxsize == 0) {
-               pr_err("not enough devices with space to create array.\n");
-               return 0; /* No enough free spaces large enough */
+       if (cnt < raiddisks || (super->orom && used && used != raiddisks)) {
+               pr_err("imsm: Not enough devices with space to create array.\n");
+               return IMSM_STATUS_ERROR;
        }
 
        if (size == 0) {
@@ -7423,37 +7639,69 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks,
        }
        if (mpb->num_raid_devs > 0 && size && size != maxsize)
                pr_err("attempting to create a second volume with size less then remaining space.\n");
-       cnt = 0;
-       for (dl = super->disks; dl; dl = dl->next)
-               if (dl->e)
-                       dl->raiddisk = cnt++;
-
        *freesize = size;
 
        dprintf("imsm: imsm_get_free_size() returns : %llu\n", size);
 
-       return 1;
+       return IMSM_STATUS_OK;
 }
 
-static int reserve_space(struct supertype *st, int raiddisks,
-                        unsigned long long size, int chunk,
-                        unsigned long long *freesize)
+/**
+ * autolayout_imsm() - automatically layout a new volume.
+ * @super: &intel_super pointer, not NULL.
+ * @raiddisks: number of raid disks.
+ * @size: requested size, could be 0 (means max size).
+ * @chunk: requested chunk.
+ * @freesize: pointer for returned size value.
+ *
+ * We are being asked to automatically layout a new volume based on the current
+ * contents of the container. If the parameters can be satisfied autolayout_imsm
+ * will record the disks, start offset, and will return size of the volume to
+ * be created. See imsm_get_free_size() for details.
+ * add_to_super() and getinfo_super() detect when autolayout is in progress.
+ * If first volume exists, slots are set consistently to it.
+ *
+ * Return: &IMSM_STATUS_OK on success, &IMSM_STATUS_ERROR otherwise.
+ *
+ * Disks are marked for creation via dl->raiddisk.
+ */
+static imsm_status_t autolayout_imsm(struct intel_super *super,
+                                    const int raiddisks,
+                                    unsigned long long size, const int chunk,
+                                    unsigned long long *freesize)
 {
-       struct intel_super *super = st->sb;
-       struct dl *dl;
-       int cnt;
-       int rv = 0;
+       int curr_slot = 0;
+       struct dl *disk;
+       int vol_cnt = super->anchor->num_raid_devs;
+       imsm_status_t rv;
 
-       rv = imsm_get_free_size(st, raiddisks, size, chunk, freesize);
-       if (rv) {
-               cnt = 0;
-               for (dl = super->disks; dl; dl = dl->next)
-                       if (dl->e)
-                               dl->raiddisk = cnt++;
-               rv = 1;
+       rv = imsm_get_free_size(super, raiddisks, size, chunk, freesize, false);
+       if (rv != IMSM_STATUS_OK)
+               return IMSM_STATUS_ERROR;
+
+       for (disk = super->disks; disk; disk = disk->next) {
+               if (!disk->e)
+                       continue;
+
+               if (curr_slot == raiddisks)
+                       break;
+
+               if (vol_cnt == 0) {
+                       disk->raiddisk = curr_slot;
+               } else {
+                       int _slot = get_disk_slot_in_dev(super, 0, disk->index);
+
+                       if (_slot == -1) {
+                               pr_err("Disk %s is not used in first volume, aborting\n",
+                                      disk->devname);
+                               return IMSM_STATUS_ERROR;
+                       }
+                       disk->raiddisk = _slot;
+               }
+               curr_slot++;
        }
 
-       return rv;
+       return IMSM_STATUS_OK;
 }
 
 static int validate_geometry_imsm(struct supertype *st, int level, int layout,
@@ -7470,17 +7718,16 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
         * if given unused devices create a container
         * if given given devices in a container create a member volume
         */
-       if (level == LEVEL_CONTAINER) {
+       if (is_container(level))
                /* Must be a fresh device to add to a container */
-               return validate_geometry_imsm_container(st, level, layout,
-                                                       raiddisks,
-                                                       *chunk,
-                                                       size, data_offset,
-                                                       dev, freesize,
-                                                       verbose);
-       }
+               return validate_geometry_imsm_container(st, level, raiddisks,
+                                                       data_offset, dev,
+                                                       freesize, verbose);
 
-       if (size && (size < 1024)) {
+       /*
+        * Size is given in sectors.
+        */
+       if (size && (size < 2048)) {
                pr_err("Given size must be greater than 1M.\n");
                /* Depends on algorithm in Create.c :
                 * if container was given (dev == NULL) return -1,
@@ -7490,35 +7737,35 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
        }
 
        if (!dev) {
-               if (st->sb) {
-                       struct intel_super *super = st->sb;
-                       if (!validate_geometry_imsm_orom(st->sb, level, layout,
-                                                        raiddisks, chunk, size,
-                                                        verbose))
+               struct intel_super *super = st->sb;
+
+               /*
+                * Autolayout mode, st->sb must be set.
+                */
+               if (!super) {
+                       pr_vrb("superblock must be set for autolayout, aborting\n");
+                       return 0;
+               }
+
+               if (!validate_geometry_imsm_orom(st->sb, level, layout,
+                                                raiddisks, chunk, size,
+                                                verbose))
+                       return 0;
+
+               if (super->orom && freesize) {
+                       imsm_status_t rv;
+                       int count = count_volumes(super->hba, super->orom->dpa,
+                                             verbose);
+                       if (super->orom->vphba <= count) {
+                               pr_vrb("platform does not support more than %d raid volumes.\n",
+                                      super->orom->vphba);
                                return 0;
-                       /* we are being asked to automatically layout a
-                        * new volume based on the current contents of
-                        * the container.  If the the parameters can be
-                        * satisfied reserve_space will record the disks,
-                        * start offset, and size of the volume to be
-                        * created.  add_to_super and getinfo_super
-                        * detect when autolayout is in progress.
-                        */
-                       /* assuming that freesize is always given when array is
-                          created */
-                       if (super->orom && freesize) {
-                               int count;
-                               count = count_volumes(super->hba,
-                                                     super->orom->dpa, verbose);
-                               if (super->orom->vphba <= count) {
-                                       pr_vrb("platform does not support more than %d raid volumes.\n",
-                                              super->orom->vphba);
-                                       return 0;
-                               }
                        }
-                       if (freesize)
-                               return reserve_space(st, raiddisks, size,
-                                                    *chunk, freesize);
+
+                       rv = autolayout_imsm(super, raiddisks, size, *chunk,
+                                            freesize);
+                       if (rv != IMSM_STATUS_OK)
+                               return 0;
                }
                return 1;
        }
@@ -7532,26 +7779,26 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
 
        /* This device needs to be a device in an 'imsm' container */
        fd = open(dev, O_RDONLY|O_EXCL, 0);
-       if (fd >= 0) {
-               if (verbose)
-                       pr_err("Cannot create this array on device %s\n",
-                              dev);
+
+       if (is_fd_valid(fd)) {
+               pr_vrb("Cannot create this array on device %s\n", dev);
                close(fd);
                return 0;
        }
-       if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
-               if (verbose)
-                       pr_err("Cannot open %s: %s\n",
-                               dev, strerror(errno));
+       if (errno == EBUSY)
+               fd = open(dev, O_RDONLY, 0);
+
+       if (!is_fd_valid(fd)) {
+               pr_vrb("Cannot open %s: %s\n", dev, strerror(errno));
                return 0;
        }
+
        /* Well, it is in use by someone, maybe an 'imsm' container. */
        cfd = open_container(fd);
-       close(fd);
-       if (cfd < 0) {
-               if (verbose)
-                       pr_err("Cannot use %s: It is busy\n",
-                               dev);
+       close_fd(&fd);
+
+       if (!is_fd_valid(cfd)) {
+               pr_vrb("Cannot use %s: It is busy\n", dev);
                return 0;
        }
        sra = sysfs_read(cfd, NULL, GET_VERSION);
@@ -7623,7 +7870,7 @@ static int kill_subarray_imsm(struct supertype *st, char *subarray_id)
 
                if (i < current_vol)
                        continue;
-               sprintf(subarray, "%u", i);
+               snprintf(subarray, sizeof(subarray), "%u", i);
                if (is_subarray_active(subarray, st->devnm)) {
                        pr_err("deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
                               current_vol, i);
@@ -7668,24 +7915,40 @@ static int kill_subarray_imsm(struct supertype *st, char *subarray_id)
        return 0;
 }
 
+/**
+ * get_rwh_policy_from_update() - Get the rwh policy for update option.
+ * @update: Update option.
+ */
+static int get_rwh_policy_from_update(enum update_opt update)
+{
+       switch (update) {
+       case UOPT_PPL:
+               return RWH_MULTIPLE_DISTRIBUTED;
+       case UOPT_NO_PPL:
+               return RWH_MULTIPLE_OFF;
+       case UOPT_BITMAP:
+               return RWH_BITMAP;
+       case UOPT_NO_BITMAP:
+               return RWH_OFF;
+       default:
+               break;
+       }
+       return UOPT_UNDEFINED;
+}
+
 static int update_subarray_imsm(struct supertype *st, char *subarray,
-                               char *update, struct mddev_ident *ident)
+                               enum update_opt update, struct mddev_ident *ident)
 {
        /* update the subarray currently referenced by ->current_vol */
        struct intel_super *super = st->sb;
        struct imsm_super *mpb = super->anchor;
 
-       if (strcmp(update, "name") == 0) {
+       if (update == UOPT_NAME) {
                char *name = ident->name;
                char *ep;
                int vol;
 
-               if (is_subarray_active(subarray, st->devnm)) {
-                       pr_err("Unable to update name of active subarray\n");
-                       return 2;
-               }
-
-               if (!check_name(super, name, 0))
+               if (imsm_is_name_allowed(super, name, 1) == false)
                        return 2;
 
                vol = strtoul(subarray, &ep, 10);
@@ -7714,8 +7977,7 @@ static int update_subarray_imsm(struct supertype *st, char *subarray,
                        }
                        super->updates_pending++;
                }
-       } else if (strcmp(update, "ppl") == 0 ||
-                  strcmp(update, "no-ppl") == 0) {
+       } else if (get_rwh_policy_from_update(update) != UOPT_UNDEFINED) {
                int new_policy;
                char *ep;
                int vol = strtoul(subarray, &ep, 10);
@@ -7723,10 +7985,7 @@ static int update_subarray_imsm(struct supertype *st, char *subarray,
                if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
                        return 2;
 
-               if (strcmp(update, "ppl") == 0)
-                       new_policy = RWH_MULTIPLE_DISTRIBUTED;
-               else
-                       new_policy = RWH_MULTIPLE_OFF;
+               new_policy = get_rwh_policy_from_update(update);
 
                if (st->update_tail) {
                        struct imsm_update_rwh_policy *u = xmalloc(sizeof(*u));
@@ -7742,24 +8001,21 @@ static int update_subarray_imsm(struct supertype *st, char *subarray,
                        dev->rwh_policy = new_policy;
                        super->updates_pending++;
                }
+               if (new_policy == RWH_BITMAP)
+                       return write_init_bitmap_imsm_vol(st, vol);
        } else
                return 2;
 
        return 0;
 }
 
-static int is_gen_migration(struct imsm_dev *dev)
+static bool is_gen_migration(struct imsm_dev *dev)
 {
-       if (dev == NULL)
-               return 0;
-
-       if (!dev->vol.migr_state)
-               return 0;
-
-       if (migr_type(dev) == MIGR_GEN_MIGR)
-               return 1;
+       if (dev && dev->vol.migr_state &&
+           migr_type(dev) == MIGR_GEN_MIGR)
+               return true;
 
-       return 0;
+       return false;
 }
 
 static int is_rebuilding(struct imsm_dev *dev)
@@ -7826,7 +8082,7 @@ static void update_recovery_start(struct intel_super *super,
                return;
        }
 
-       units = __le32_to_cpu(dev->vol.curr_migr_unit);
+       units = vol_curr_migr_unit(dev);
        rebuild->recovery_start = units * blocks_per_migr_unit(super, dev);
 }
 
@@ -7853,9 +8109,9 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
        int current_vol = super->current_vol;
 
        /* do not assemble arrays when not all attributes are supported */
-       if (imsm_check_attributes(mpb->attributes) == 0) {
+       if (imsm_check_attributes(mpb->attributes) == false) {
                sb_errors = 1;
-               pr_err("Unsupported attributes in IMSM metadata.Arrays activation is blocked.\n");
+               pr_err("Unsupported attributes in IMSM metadata. Arrays activation is blocked.\n");
        }
 
        /* count spare devices, not used in maps
@@ -7959,10 +8215,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                                if ((!able_to_resync(level, missing) ||
                                     recovery_start == 0))
                                        this->resync_start = MaxSector;
-                       } else {
-                               /*
-                                * FIXME handle dirty degraded
-                                */
                        }
 
                        if (skip)
@@ -7993,7 +8245,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                        info_d->data_offset = pba_of_lba0(map);
                        info_d->component_size = calc_component_size(map, dev);
 
-                       if (map->raid_level == 5) {
+                       if (map->raid_level == IMSM_T_RAID5) {
                                info_d->ppl_sector = this->ppl_sector;
                                info_d->ppl_size = this->ppl_size;
                                if (this->consistency_policy == CONSISTENCY_POLICY_PPL &&
@@ -8145,19 +8397,19 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
 }
 
 static int imsm_open_new(struct supertype *c, struct active_array *a,
-                        char *inst)
+                        int inst)
 {
        struct intel_super *super = c->sb;
        struct imsm_super *mpb = super->anchor;
        struct imsm_update_prealloc_bb_mem u;
 
-       if (atoi(inst) >= mpb->num_raid_devs) {
-               pr_err("subarry index %d, out of range\n", atoi(inst));
+       if (inst >= mpb->num_raid_devs) {
+               pr_err("subarry index %d, out of range\n", inst);
                return -ENODEV;
        }
 
-       dprintf("imsm: open_new %s\n", inst);
-       a->info.container_member = atoi(inst);
+       dprintf("imsm: open_new %d\n", inst);
+       a->info.container_member = inst;
 
        u.type = update_prealloc_badblocks_mem;
        imsm_update_metadata_locally(c, &u, sizeof(u));
@@ -8267,7 +8519,7 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
        dprintf("imsm: mark missing\n");
        /* end process for initialization and rebuild only
         */
-       if (is_gen_migration(dev) == 0) {
+       if (is_gen_migration(dev) == false) {
                int failed = imsm_count_failed(super, dev, MAP_0);
 
                if (failed) {
@@ -8373,7 +8625,7 @@ static void imsm_progress_container_reshape(struct intel_super *super)
                prev_num_members = map->num_members;
                map->num_members = prev_disks;
                dev->vol.migr_state = 1;
-               dev->vol.curr_migr_unit = 0;
+               set_vol_curr_migr_unit(dev, 0);
                set_migr_type(dev, MIGR_GEN_MIGR);
                for (i = prev_num_members;
                     i < map->num_members; i++)
@@ -8410,29 +8662,12 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
                 * We might need to
                 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
                 * - finish the reshape (if last_checkpoint is big and action != reshape)
-                * - update curr_migr_unit
+                * - update vol_curr_migr_unit
                 */
                if (a->curr_action == reshape) {
-                       /* still reshaping, maybe update curr_migr_unit */
+                       /* still reshaping, maybe update vol_curr_migr_unit */
                        goto mark_checkpoint;
                } else {
-                       if (a->last_checkpoint == 0 && a->prev_action == reshape) {
-                               /* for some reason we aborted the reshape.
-                                *
-                                * disable automatic metadata rollback
-                                * user action is required to recover process
-                                */
-                               if (0) {
-                                       struct imsm_map *map2 =
-                                               get_imsm_map(dev, MAP_1);
-                                       dev->vol.migr_state = 0;
-                                       set_migr_type(dev, 0);
-                                       dev->vol.curr_migr_unit = 0;
-                                       memcpy(map, map2,
-                                              sizeof_imsm_map(map2));
-                                       super->updates_pending++;
-                               }
-                       }
                        if (a->last_checkpoint >= a->info.component_size) {
                                unsigned long long array_blocks;
                                int used_disks;
@@ -8496,6 +8731,9 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
                super->updates_pending++;
        }
 
+       if (a->prev_action == idle)
+               goto skip_mark_checkpoint;
+
 mark_checkpoint:
        /* skip checkpointing for general migration,
         * it is controlled in mdadm
@@ -8503,25 +8741,16 @@ mark_checkpoint:
        if (is_gen_migration(dev))
                goto skip_mark_checkpoint;
 
-       /* check if we can update curr_migr_unit from resync_start, recovery_start */
+       /* check if we can update vol_curr_migr_unit from resync_start,
+        * recovery_start
+        */
        blocks_per_unit = blocks_per_migr_unit(super, dev);
        if (blocks_per_unit) {
-               __u32 units32;
-               __u64 units;
-
-               units = a->last_checkpoint / blocks_per_unit;
-               units32 = units;
-
-               /* check that we did not overflow 32-bits, and that
-                * curr_migr_unit needs updating
-                */
-               if (units32 == units &&
-                   units32 != 0 &&
-                   __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
-                       dprintf("imsm: mark checkpoint (%u)\n", units32);
-                       dev->vol.curr_migr_unit = __cpu_to_le32(units32);
-                       super->updates_pending++;
-               }
+               set_vol_curr_migr_unit(dev,
+                                      a->last_checkpoint / blocks_per_unit);
+               dprintf("imsm: mark checkpoint (%llu)\n",
+                       vol_curr_migr_unit(dev));
+               super->updates_pending++;
        }
 
 skip_mark_checkpoint:
@@ -8625,7 +8854,6 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
                                break;
                        }
                        end_migration(dev, super, map_state);
-                       map = get_imsm_map(dev, MAP_0);
                        map->failed_disk_num = ~0;
                        super->updates_pending++;
                        a->last_checkpoint = 0;
@@ -8637,7 +8865,6 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
                                end_migration(dev, super, map_state);
                        else
                                map->map_state = map_state;
-                       map = get_imsm_map(dev, MAP_0);
                        map->failed_disk_num = ~0;
                        super->updates_pending++;
                        break;
@@ -8749,7 +8976,8 @@ static int store_imsm_mpb(int fd, struct imsm_super *mpb)
        unsigned long long sectors;
        unsigned int sector_size;
 
-       get_dev_sector_size(fd, NULL, &sector_size);
+       if (!get_dev_sector_size(fd, NULL, &sector_size))
+               return 1;
        get_dev_size(fd, NULL, &dsize);
 
        if (mpb_size > sector_size) {
@@ -8829,7 +9057,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot,
        for (dl = super->disks; dl; dl = dl->next) {
                /* If in this array, skip */
                for (d = a->info.devs ; d ; d = d->next)
-                       if (d->state_fd >= 0 &&
+                       if (is_fd_valid(d->state_fd) &&
                            d->disk.major == dl->major &&
                            d->disk.minor == dl->minor) {
                                dprintf("%x:%x already in array\n",
@@ -8932,29 +9160,26 @@ static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
        __u8 state;
 
        dev2 = get_imsm_dev(cont->sb, dev_idx);
-       if (dev2) {
-               state = imsm_check_degraded(cont->sb, dev2, failed, MAP_0);
-               if (state == IMSM_T_STATE_FAILED) {
-                       map = get_imsm_map(dev2, MAP_0);
-                       if (!map)
-                               return 1;
-                       for (slot = 0; slot < map->num_members; slot++) {
-                               /*
-                                * Check if failed disks are deleted from intel
-                                * disk list or are marked to be deleted
-                                */
-                               idx = get_imsm_disk_idx(dev2, slot, MAP_X);
-                               idisk = get_imsm_dl_disk(cont->sb, idx);
-                               /*
-                                * Do not rebuild the array if failed disks
-                                * from failed sub-array are not removed from
-                                * container.
-                                */
-                               if (idisk &&
-                                   is_failed(&idisk->disk) &&
-                                   (idisk->action != DISK_REMOVE))
-                                       return 0;
-                       }
+
+       state = imsm_check_degraded(cont->sb, dev2, failed, MAP_0);
+       if (state == IMSM_T_STATE_FAILED) {
+               map = get_imsm_map(dev2, MAP_0);
+               for (slot = 0; slot < map->num_members; slot++) {
+                       /*
+                        * Check if failed disks are deleted from intel
+                        * disk list or are marked to be deleted
+                        */
+                       idx = get_imsm_disk_idx(dev2, slot, MAP_X);
+                       idisk = get_imsm_dl_disk(cont->sb, idx);
+                       /*
+                        * Do not rebuild the array if failed disks
+                        * from failed sub-array are not removed from
+                        * container.
+                        */
+                       if (idisk &&
+                           is_failed(&idisk->disk) &&
+                           (idisk->action != DISK_REMOVE))
+                               return 0;
                }
        }
        return 1;
@@ -8989,13 +9214,15 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
        int i;
        int allowed;
 
-       for (d = a->info.devs ; d ; d = d->next) {
-               if ((d->curr_state & DS_FAULTY) &&
-                       d->state_fd >= 0)
+       for (d = a->info.devs ; d; d = d->next) {
+               if (!is_fd_valid(d->state_fd))
+                       continue;
+
+               if (d->curr_state & DS_FAULTY)
                        /* wait for Removal to happen */
                        return NULL;
-               if (d->state_fd >= 0)
-                       failed--;
+
+               failed--;
        }
 
        dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
@@ -9051,7 +9278,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
                        if (d->disk.raid_disk == i)
                                break;
                dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
-               if (d && (d->state_fd >= 0))
+               if (d && is_fd_valid(d->state_fd))
                        continue;
 
                /*
@@ -9181,7 +9408,7 @@ static int remove_disk_super(struct intel_super *super, int major, int minor)
                        else
                                super->disks = dl->next;
                        dl->next = NULL;
-                       __free_imsm_disk(dl);
+                       __free_imsm_disk(dl, 1);
                        dprintf("removed %x:%x\n", major, minor);
                        break;
                }
@@ -9231,7 +9458,7 @@ static int add_remove_disk_update(struct intel_super *super)
                                }
                        }
                        /* release allocate disk structure */
-                       __free_imsm_disk(disk_cfg);
+                       __free_imsm_disk(disk_cfg, 1);
                }
        }
        return check_degraded;
@@ -9276,7 +9503,7 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration *
                        }
 
                        to_state = map->map_state;
-                       if ((u->new_level == 5) && (map->raid_level == 0)) {
+                       if ((u->new_level == IMSM_T_RAID5) && (map->raid_level == IMSM_T_RAID0)) {
                                map->num_members++;
                                /* this should not happen */
                                if (u->new_disks[0] < 0) {
@@ -9287,11 +9514,13 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration *
                                        to_state = IMSM_T_STATE_NORMAL;
                        }
                        migrate(new_dev, super, to_state, MIGR_GEN_MIGR);
+
                        if (u->new_level > -1)
-                               map->raid_level = u->new_level;
+                               update_imsm_raid_level(map, u->new_level);
+
                        migr_map = get_imsm_map(new_dev, MAP_1);
-                       if ((u->new_level == 5) &&
-                           (migr_map->raid_level == 0)) {
+                       if ((u->new_level == IMSM_T_RAID5) &&
+                           (migr_map->raid_level == IMSM_T_RAID0)) {
                                int ord = map->num_members - 1;
                                migr_map->num_members--;
                                if (u->new_disks[0] < 0)
@@ -9306,7 +9535,6 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration *
                        /* update chunk size
                         */
                        if (u->new_chunksize > 0) {
-                               unsigned long long num_data_stripes;
                                struct imsm_map *dest_map =
                                        get_imsm_map(dev, MAP_0);
                                int used_disks =
@@ -9317,11 +9545,7 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration *
 
                                map->blocks_per_strip =
                                        __cpu_to_le16(u->new_chunksize * 2);
-                               num_data_stripes =
-                                       imsm_dev_size(dev) / used_disks;
-                               num_data_stripes /= map->blocks_per_strip;
-                               num_data_stripes /= map->num_domains;
-                               set_num_data_stripes(map, num_data_stripes);
+                               update_num_data_stripes(map, imsm_dev_size(dev));
                        }
 
                        /* ensure blocks_per_member has valid value
@@ -9332,7 +9556,7 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration *
 
                        /* add disk
                         */
-                       if (u->new_level != 5 || migr_map->raid_level != 0 ||
+                       if (u->new_level != IMSM_T_RAID5 || migr_map->raid_level != IMSM_T_RAID0 ||
                            migr_map->raid_level == map->raid_level)
                                goto skip_disk_add;
 
@@ -9395,7 +9619,6 @@ static int apply_size_change_update(struct imsm_update_size_change *u,
                        struct imsm_map *map = get_imsm_map(dev, MAP_0);
                        int used_disks = imsm_num_data_members(map);
                        unsigned long long blocks_per_member;
-                       unsigned long long num_data_stripes;
                        unsigned long long new_size_per_disk;
 
                        if (used_disks == 0)
@@ -9406,16 +9629,10 @@ static int apply_size_change_update(struct imsm_update_size_change *u,
                        new_size_per_disk = u->new_size / used_disks;
                        blocks_per_member = new_size_per_disk +
                                            NUM_BLOCKS_DIRTY_STRIPE_REGION;
-                       num_data_stripes = new_size_per_disk /
-                                          map->blocks_per_strip;
-                       num_data_stripes /= map->num_domains;
-                       dprintf("(size: %llu, blocks per member: %llu, num_data_stipes: %llu)\n",
-                               u->new_size, new_size_per_disk,
-                               num_data_stripes);
-                       set_blocks_per_member(map, blocks_per_member);
-                       set_num_data_stripes(map, num_data_stripes);
-                       imsm_set_array_size(dev, u->new_size);
 
+                       imsm_set_array_size(dev, u->new_size);
+                       set_blocks_per_member(map, blocks_per_member);
+                       update_num_data_stripes(map, u->new_size);
                        ret_val = 1;
                        break;
                }
@@ -9424,6 +9641,39 @@ static int apply_size_change_update(struct imsm_update_size_change *u,
        return ret_val;
 }
 
+static int prepare_spare_to_activate(struct supertype *st,
+                                    struct imsm_update_activate_spare *u)
+{
+       struct intel_super *super = st->sb;
+       int prev_current_vol = super->current_vol;
+       struct active_array *a;
+       int ret = 1;
+
+       for (a = st->arrays; a; a = a->next)
+               /*
+                * Additional initialization (adding bitmap header, filling
+                * the bitmap area with '1's to force initial rebuild for a whole
+                * data-area) is required when adding the spare to the volume
+                * with write-intent bitmap.
+                */
+               if (a->info.container_member == u->array &&
+                   a->info.consistency_policy == CONSISTENCY_POLICY_BITMAP) {
+                       struct dl *dl;
+
+                       for (dl = super->disks; dl; dl = dl->next)
+                               if (dl == u->dl)
+                                       break;
+                       if (!dl)
+                               break;
+
+                       super->current_vol = u->array;
+                       if (st->ss->write_bitmap(st, dl->fd, NoUpdate))
+                               ret = 0;
+                       super->current_vol = prev_current_vol;
+               }
+       return ret;
+}
+
 static int apply_update_activate_spare(struct imsm_update_activate_spare *u,
                                       struct intel_super *super,
                                       struct active_array *active_array)
@@ -9505,10 +9755,9 @@ static int apply_update_activate_spare(struct imsm_update_activate_spare *u,
                /* count arrays using the victim in the metadata */
                found = 0;
                for (a = active_array; a ; a = a->next) {
-                       dev = get_imsm_dev(super, a->info.container_member);
-                       map = get_imsm_map(dev, MAP_0);
+                       int dev_idx = a->info.container_member;
 
-                       if (get_imsm_disk_slot(map, victim) >= 0)
+                       if (get_disk_slot_in_dev(super, dev_idx, victim) >= 0)
                                found++;
                }
 
@@ -9608,7 +9857,7 @@ static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
                                id->index);
                        devices_to_reshape--;
                        newdev->vol.migr_state = 1;
-                       newdev->vol.curr_migr_unit = 0;
+                       set_vol_curr_migr_unit(newdev, 0);
                        set_migr_type(newdev, MIGR_GEN_MIGR);
                        newmap->num_members = u->new_raid_disks;
                        for (i = 0; i < delta_disks; i++) {
@@ -9664,8 +9913,6 @@ static int apply_takeover_update(struct imsm_update_takeover *u,
        map = get_imsm_map(dev, MAP_0);
 
        if (u->direction == R10_TO_R0) {
-               unsigned long long num_data_stripes;
-
                /* Number of failed disks must be half of initial disk number */
                if (imsm_count_failed(super, dev, MAP_0) !=
                                (map->num_members / 2))
@@ -9686,19 +9933,16 @@ static int apply_takeover_update(struct imsm_update_takeover *u,
                        }
                }
                /* update map */
-               map->num_members = map->num_members / 2;
+               map->num_members /= map->num_domains;
                map->map_state = IMSM_T_STATE_NORMAL;
-               map->num_domains = 1;
-               map->raid_level = 0;
+               update_imsm_raid_level(map, IMSM_T_RAID0);
+               set_num_domains(map);
+               update_num_data_stripes(map, imsm_dev_size(dev));
                map->failed_disk_num = -1;
-               num_data_stripes = imsm_dev_size(dev) / 2;
-               num_data_stripes /= map->blocks_per_strip;
-               set_num_data_stripes(map, num_data_stripes);
        }
 
        if (u->direction == R0_TO_R10) {
                void **space;
-               unsigned long long num_data_stripes;
 
                /* update slots in current disk list */
                for (dm = super->disks; dm; dm = dm->next) {
@@ -9733,14 +9977,12 @@ static int apply_takeover_update(struct imsm_update_takeover *u,
                memcpy(dev_new, dev, sizeof(*dev));
                /* update new map */
                map = get_imsm_map(dev_new, MAP_0);
-               map->num_members = map->num_members * 2;
+
                map->map_state = IMSM_T_STATE_DEGRADED;
-               map->num_domains = 2;
-               map->raid_level = 1;
-               num_data_stripes = imsm_dev_size(dev) / 2;
-               num_data_stripes /= map->blocks_per_strip;
-               num_data_stripes /= map->num_domains;
-               set_num_data_stripes(map, num_data_stripes);
+               update_imsm_raid_level(map, IMSM_T_RAID10);
+               set_num_domains(map);
+               map->num_members = map->num_members * map->num_domains;
+               update_num_data_stripes(map, imsm_dev_size(dev));
 
                /* replace dev<->dev_new */
                dv->dev = dev_new;
@@ -9810,8 +10052,8 @@ static void imsm_process_update(struct supertype *st,
                /* find device under general migration */
                for (id = super->devlist ; id; id = id->next) {
                        if (is_gen_migration(id->dev)) {
-                               id->dev->vol.curr_migr_unit =
-                                       __cpu_to_le32(u->curr_migr_unit);
+                               set_vol_curr_migr_unit(id->dev,
+                                                  u->curr_migr_unit);
                                super->updates_pending++;
                        }
                }
@@ -9848,7 +10090,9 @@ static void imsm_process_update(struct supertype *st,
        }
        case update_activate_spare: {
                struct imsm_update_activate_spare *u = (void *) update->buf;
-               if (apply_update_activate_spare(u, super, st->arrays))
+
+               if (prepare_spare_to_activate(st, u) &&
+                   apply_update_activate_spare(u, super, st->arrays))
                        super->updates_pending++;
                break;
        }
@@ -9970,7 +10214,6 @@ static void imsm_process_update(struct supertype *st,
                int victim = u->dev_idx;
                struct active_array *a;
                struct intel_dev **dp;
-               struct imsm_dev *dev;
 
                /* sanity check that we are not affecting the uuid of
                 * active arrays, or deleting an active array
@@ -9986,8 +10229,7 @@ static void imsm_process_update(struct supertype *st,
                 * is active in the container, so checking
                 * mpb->num_raid_devs is just extra paranoia
                 */
-               dev = get_imsm_dev(super, victim);
-               if (a || !dev || mpb->num_raid_devs == 1) {
+               if (a || mpb->num_raid_devs == 1 || victim >= super->anchor->num_raid_devs) {
                        dprintf("failed to delete subarray-%d\n", victim);
                        break;
                }
@@ -10021,7 +10263,8 @@ static void imsm_process_update(struct supertype *st,
                        if (a->info.container_member == target)
                                break;
                dev = get_imsm_dev(super, u->dev_idx);
-               if (a || !dev || !check_name(super, name, 1)) {
+
+               if (a || !dev || imsm_is_name_allowed(super, name, 0) == false) {
                        dprintf("failed to rename subarray-%d\n", target);
                        break;
                }
@@ -10050,10 +10293,6 @@ static void imsm_process_update(struct supertype *st,
                struct imsm_update_rwh_policy *u = (void *)update->buf;
                int target = u->dev_idx;
                struct imsm_dev *dev = get_imsm_dev(super, target);
-               if (!dev) {
-                       dprintf("could not find subarray-%d\n", target);
-                       break;
-               }
 
                if (dev->rwh_policy != u->new_policy) {
                        dev->rwh_policy = u->new_policy;
@@ -10397,22 +10636,7 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
                struct dl *dl = *dlp;
 
                *dlp = (*dlp)->next;
-               __free_imsm_disk(dl);
-       }
-}
-
-static void close_targets(int *targets, int new_disks)
-{
-       int i;
-
-       if (!targets)
-               return;
-
-       for (i = 0; i < new_disks; i++) {
-               if (targets[i] >= 0) {
-                       close(targets[i]);
-                       targets[i] = -1;
-               }
+               __free_imsm_disk(dl, 1);
        }
 }
 
@@ -10469,62 +10693,6 @@ static int imsm_get_allowed_degradation(int level, int raid_disks,
        }
 }
 
-/*******************************************************************************
- * Function:   open_backup_targets
- * Description:        Function opens file descriptors for all devices given in
- *             info->devs
- * Parameters:
- *     info            : general array info
- *     raid_disks      : number of disks
- *     raid_fds        : table of device's file descriptors
- *     super           : intel super for raid10 degradation check
- *     dev             : intel device for raid10 degradation check
- * Returns:
- *      0 : success
- *     -1 : fail
- ******************************************************************************/
-int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds,
-                       struct intel_super *super, struct imsm_dev *dev)
-{
-       struct mdinfo *sd;
-       int i;
-       int opened = 0;
-
-       for (i = 0; i < raid_disks; i++)
-               raid_fds[i] = -1;
-
-       for (sd = info->devs ; sd ; sd = sd->next) {
-               char *dn;
-
-               if (sd->disk.state & (1<<MD_DISK_FAULTY)) {
-                       dprintf("disk is faulty!!\n");
-                       continue;
-               }
-
-               if (sd->disk.raid_disk >= raid_disks || sd->disk.raid_disk < 0)
-                       continue;
-
-               dn = map_dev(sd->disk.major,
-                            sd->disk.minor, 1);
-               raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR);
-               if (raid_fds[sd->disk.raid_disk] < 0) {
-                       pr_err("cannot open component\n");
-                       continue;
-               }
-               opened++;
-       }
-       /* check if maximum array degradation level is not exceeded
-       */
-       if ((raid_disks - opened) >
-           imsm_get_allowed_degradation(info->new_level, raid_disks,
-                                        super, dev)) {
-               pr_err("Not enough disks can be opened.\n");
-               close_targets(raid_fds, raid_disks);
-               return -2;
-       }
-       return 0;
-}
-
 /*******************************************************************************
  * Function:   validate_container_imsm
  * Description: This routine validates container after assemble,
@@ -10538,14 +10706,14 @@ int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds,
  ******************************************************************************/
 int validate_container_imsm(struct mdinfo *info)
 {
-       if (check_env("IMSM_NO_PLATFORM"))
+       if (check_no_platform())
                return 0;
 
        struct sys_dev *idev;
        struct sys_dev *hba = NULL;
        struct sys_dev *intel_devices = find_intel_devices();
        char *dev_path = devt_to_devpath(makedev(info->disk.major,
-                                                                       info->disk.minor));
+                                                info->disk.minor), 1, NULL);
 
        for (idev = intel_devices; idev; idev = idev->next) {
                if (dev_path && strstr(dev_path, idev->path)) {
@@ -10566,7 +10734,8 @@ int validate_container_imsm(struct mdinfo *info)
        struct mdinfo *dev;
 
        for (dev = info->next; dev; dev = dev->next) {
-               dev_path = devt_to_devpath(makedev(dev->disk.major, dev->disk.minor));
+               dev_path = devt_to_devpath(makedev(dev->disk.major,
+                                                  dev->disk.minor), 1, NULL);
 
                struct sys_dev *hba2 = NULL;
                for (idev = intel_devices; idev; idev = idev->next) {
@@ -10765,13 +10934,11 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
        int new_data_disks;
        unsigned long long dsize, dev_sectors;
        long long unsigned min_dev_sectors = -1LLU;
-       struct mdinfo *sd;
-       char nm[30];
-       int fd;
        struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
        struct imsm_map *map_src = get_imsm_map(dev, MAP_1);
        unsigned long long num_migr_units;
        unsigned long long array_blocks;
+       struct dl *dl_disk = NULL;
 
        memset(migr_rec, 0, sizeof(struct migr_record));
        migr_rec->family_num = __cpu_to_le32(super->anchor->family_num);
@@ -10800,16 +10967,14 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
        migr_rec->post_migr_vol_cap_hi = dev->size_high;
 
        /* Find the smallest dev */
-       for (sd = info->devs ; sd ; sd = sd->next) {
-               sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
-               fd = dev_open(nm, O_RDONLY);
-               if (fd < 0)
+       for (dl_disk =  super->disks; dl_disk ; dl_disk = dl_disk->next) {
+               /* ignore spares in container */
+               if (dl_disk->index < 0)
                        continue;
-               get_dev_size(fd, NULL, &dsize);
+               get_dev_size(dl_disk->fd, NULL, &dsize);
                dev_sectors = dsize / 512;
                if (dev_sectors < min_dev_sectors)
                        min_dev_sectors = dev_sectors;
-               close(fd);
        }
        set_migr_chkp_area_pba(migr_rec, min_dev_sectors -
                                        RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
@@ -10843,22 +11008,21 @@ int save_backup_imsm(struct supertype *st,
 {
        int rv = -1;
        struct intel_super *super = st->sb;
-       unsigned long long *target_offsets;
-       int *targets;
        int i;
        struct imsm_map *map_dest = get_imsm_map(dev, MAP_0);
        int new_disks = map_dest->num_members;
        int dest_layout = 0;
-       int dest_chunk;
-       unsigned long long start;
+       int dest_chunk, targets[new_disks];
+       unsigned long long start, target_offsets[new_disks];
        int data_disks = imsm_num_data_members(map_dest);
 
-       targets = xmalloc(new_disks * sizeof(int));
-
-       for (i = 0; i < new_disks; i++)
-               targets[i] = -1;
-
-       target_offsets = xcalloc(new_disks, sizeof(unsigned long long));
+       for (i = 0; i < new_disks; i++) {
+               struct dl *dl_disk = get_imsm_dl_disk(super, i);
+               if (dl_disk && is_fd_valid(dl_disk->fd))
+                       targets[i] = dl_disk->fd;
+               else
+                       goto abort;
+       }
 
        start = info->reshape_progress * 512;
        for (i = 0; i < new_disks; i++) {
@@ -10869,10 +11033,6 @@ int save_backup_imsm(struct supertype *st,
                target_offsets[i] -= start/data_disks;
        }
 
-       if (open_backup_targets(info, new_disks, targets,
-                               super, dev))
-               goto abort;
-
        dest_layout = imsm_level_to_layout(map_dest->raid_level);
        dest_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
 
@@ -10895,12 +11055,6 @@ int save_backup_imsm(struct supertype *st,
        rv = 0;
 
 abort:
-       if (targets) {
-               close_targets(targets, new_disks);
-               free(targets);
-       }
-       free(target_offsets);
-
        return rv;
 }
 
@@ -10923,7 +11077,7 @@ int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
        unsigned long long blocks_per_unit;
        unsigned long long curr_migr_unit;
 
-       if (load_imsm_migr_rec(super, info) != 0) {
+       if (load_imsm_migr_rec(super) != 0) {
                dprintf("imsm: ERROR: Cannot read migration record for checkpoint save.\n");
                return 1;
        }
@@ -10974,17 +11128,17 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
        unsigned long long read_offset;
        unsigned long long write_offset;
        unsigned unit_len;
-       int *targets = NULL;
-       int new_disks, i, err;
+       int new_disks, err;
        char *buf = NULL;
        int retval = 1;
        unsigned int sector_size = super->sector_size;
-       unsigned long curr_migr_unit = current_migr_unit(migr_rec);
-       unsigned long num_migr_units = get_num_migr_units(migr_rec);
-       char buffer[20];
+       unsigned long long curr_migr_unit = current_migr_unit(migr_rec);
+       unsigned long long num_migr_units = get_num_migr_units(migr_rec);
+       char buffer[SYSFS_MAX_BUF_SIZE];
        int skipped_disks = 0;
+       struct dl *dl_disk;
 
-       err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20);
+       err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, sizeof(buffer));
        if (err < 1)
                return 1;
 
@@ -11015,37 +11169,34 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
        unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
        if (posix_memalign((void **)&buf, sector_size, unit_len) != 0)
                goto abort;
-       targets = xcalloc(new_disks, sizeof(int));
 
-       if (open_backup_targets(info, new_disks, targets, super, id->dev)) {
-               pr_err("Cannot open some devices belonging to array.\n");
-               goto abort;
-       }
+       for (dl_disk = super->disks; dl_disk; dl_disk = dl_disk->next) {
+               if (dl_disk->index < 0)
+                       continue;
 
-       for (i = 0; i < new_disks; i++) {
-               if (targets[i] < 0) {
+               if (!is_fd_valid(dl_disk->fd)) {
                        skipped_disks++;
                        continue;
                }
-               if (lseek64(targets[i], read_offset, SEEK_SET) < 0) {
+               if (lseek64(dl_disk->fd, read_offset, SEEK_SET) < 0) {
                        pr_err("Cannot seek to block: %s\n",
                               strerror(errno));
                        skipped_disks++;
                        continue;
                }
-               if ((unsigned)read(targets[i], buf, unit_len) != unit_len) {
+               if (read(dl_disk->fd, buf, unit_len) != (ssize_t)unit_len) {
                        pr_err("Cannot read copy area block: %s\n",
                               strerror(errno));
                        skipped_disks++;
                        continue;
                }
-               if (lseek64(targets[i], write_offset, SEEK_SET) < 0) {
+               if (lseek64(dl_disk->fd, write_offset, SEEK_SET) < 0) {
                        pr_err("Cannot seek to block: %s\n",
                               strerror(errno));
                        skipped_disks++;
                        continue;
                }
-               if ((unsigned)write(targets[i], buf, unit_len) != unit_len) {
+               if (write(dl_disk->fd, buf, unit_len) != (ssize_t)unit_len) {
                        pr_err("Cannot restore block: %s\n",
                               strerror(errno));
                        skipped_disks++;
@@ -11069,49 +11220,251 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
                retval = 0;
 
 abort:
-       if (targets) {
-               for (i = 0; i < new_disks; i++)
-                       if (targets[i])
-                               close(targets[i]);
-               free(targets);
-       }
        free(buf);
        return retval;
 }
 
-static char disk_by_path[] = "/dev/disk/by-path/";
-
-static const char *imsm_get_disk_controller_domain(const char *path)
-{
-       char disk_path[PATH_MAX];
-       char *drv=NULL;
-       struct stat st;
-
-       strcpy(disk_path, disk_by_path);
-       strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
-       if (stat(disk_path, &st) == 0) {
-               struct sys_dev* hba;
-               char *path;
-
-               path = devt_to_devpath(st.st_rdev);
-               if (path == NULL)
-                       return "unknown";
-               hba = find_disk_attached_hba(-1, path);
-               if (hba && hba->type == SYS_DEV_SAS)
-                       drv = "isci";
-               else if (hba && hba->type == SYS_DEV_SATA)
-                       drv = "ahci";
-               else if (hba && hba->type == SYS_DEV_VMD)
-                       drv = "vmd";
-               else if (hba && hba->type == SYS_DEV_NVME)
-                       drv = "nvme";
-               else
-                       drv = "unknown";
-               dprintf("path: %s hba: %s attached: %s\n",
-                       path, (hba) ? hba->path : "NULL", drv);
-               free(path);
+/**
+ * test_and_add_drive_controller_policy_imsm() - add disk controller to policies list.
+ * @type: Policy type to search on list.
+ * @pols: List of currently recorded policies.
+ * @disk_fd: File descriptor of the device to check.
+ * @hba: The hba disk is attached, could be NULL if verification is disabled.
+ * @verbose: verbose flag.
+ *
+ * IMSM cares about drive physical placement. If @hba is not set, it adds unknown policy.
+ * If there is no controller policy on pols we are free to add first one. If there is a policy then,
+ * new must be the same - no controller mixing allowed.
+ */
+static mdadm_status_t
+test_and_add_drive_controller_policy_imsm(const char * const type, dev_policy_t **pols, int disk_fd,
+                                         struct sys_dev *hba, const int verbose)
+{
+       const char *controller_policy = get_sys_dev_type(SYS_DEV_UNKNOWN);
+       struct dev_policy *pol = pol_find(*pols, (char *)type);
+       char devname[MAX_RAID_SERIAL_LEN];
+
+       if (hba)
+               controller_policy = get_sys_dev_type(hba->type);
+
+       if (!pol) {
+               pol_add(pols, (char *)type, (char *)controller_policy, "imsm");
+               return MDADM_STATUS_SUCCESS;
+       }
+
+       if (strcmp(pol->value, controller_policy) == 0)
+               return MDADM_STATUS_SUCCESS;
+
+       fd2devname(disk_fd, devname);
+       pr_vrb("Intel(R) raid controller \"%s\" found for %s, but \"%s\" was detected earlier\n",
+              controller_policy, devname, pol->value);
+       pr_vrb("Disks under different controllers cannot be used, aborting\n");
+
+       return MDADM_STATUS_ERROR;
+}
+
+/**
+ * test_and_add_drive_encryption_policy_imsm() - add disk encryption to policies list.
+ * @type: policy type to search in the list.
+ * @pols: list of currently recorded policies.
+ * @disk_fd: file descriptor of the device to check.
+ * @hba: The hba to which the drive is attached, could be NULL if verification is disabled.
+ * @verbose: verbose flag.
+ *
+ * IMSM cares about drive encryption state. It is not allowed to mix disks with different
+ * encryption state within one md device.
+ * If there is no encryption policy on pols we are free to add first one.
+ * If there is a policy then, new must be the same.
+ */
+static mdadm_status_t
+test_and_add_drive_encryption_policy_imsm(const char * const type, dev_policy_t **pols, int disk_fd,
+                                         struct sys_dev *hba, const int verbose)
+{
+       struct dev_policy *expected_policy = pol_find(*pols, (char *)type);
+       struct encryption_information information = {0};
+       char *encryption_state = "Unknown";
+       int status = MDADM_STATUS_SUCCESS;
+       bool encryption_checked = true;
+       char devname[PATH_MAX];
+
+       if (!hba)
+               goto check_policy;
+
+       switch (hba->type) {
+       case SYS_DEV_NVME:
+       case SYS_DEV_VMD:
+               status = get_nvme_opal_encryption_information(disk_fd, &information, verbose);
+               break;
+       case SYS_DEV_SATA:
+       case SYS_DEV_SATA_VMD:
+               status = get_ata_encryption_information(disk_fd, &information, verbose);
+               break;
+       default:
+               encryption_checked = false;
+       }
+
+       if (status) {
+               fd2devname(disk_fd, devname);
+               pr_vrb("Failed to read encryption information of device %s\n", devname);
+               return MDADM_STATUS_ERROR;
+       }
+
+       if (encryption_checked) {
+               if (information.status == ENC_STATUS_LOCKED) {
+                       fd2devname(disk_fd, devname);
+                       pr_vrb("Device %s is in Locked state, cannot use. Aborting.\n", devname);
+                       return MDADM_STATUS_ERROR;
+               }
+               encryption_state = (char *)get_encryption_status_string(information.status);
+       }
+
+check_policy:
+       if (expected_policy) {
+               if (strcmp(expected_policy->value, encryption_state) == 0)
+                       return MDADM_STATUS_SUCCESS;
+
+               fd2devname(disk_fd, devname);
+               pr_vrb("Encryption status \"%s\" detected for disk %s, but \"%s\" status was detected eariler.\n",
+                      encryption_state, devname, expected_policy->value);
+               pr_vrb("Disks with different encryption status cannot be used.\n");
+               return MDADM_STATUS_ERROR;
+       }
+
+       pol_add(pols, (char *)type, encryption_state, "imsm");
+
+       return MDADM_STATUS_SUCCESS;
+}
+
+struct imsm_drive_policy {
+       char *type;
+       mdadm_status_t (*test_and_add_drive_policy)(const char * const type,
+                                                   struct dev_policy **pols, int disk_fd,
+                                                   struct sys_dev *hba, const int verbose);
+};
+
+struct imsm_drive_policy imsm_policies[] = {
+       {"controller", test_and_add_drive_controller_policy_imsm},
+       {"encryption", test_and_add_drive_encryption_policy_imsm}
+};
+
+mdadm_status_t test_and_add_drive_policies_imsm(struct dev_policy **pols, int disk_fd,
+                                               const int verbose)
+{
+       struct imsm_drive_policy *imsm_pol;
+       struct sys_dev *hba = NULL;
+       char path[PATH_MAX];
+       mdadm_status_t ret;
+       unsigned int i;
+
+       /* If imsm platform verification is disabled, do not search for hba. */
+       if (check_no_platform() != 1) {
+               if (!diskfd_to_devpath(disk_fd, 1, path)) {
+                       pr_vrb("IMSM: Failed to retrieve device path by file descriptor.\n");
+                       return MDADM_STATUS_ERROR;
+               }
+
+               hba = find_disk_attached_hba(disk_fd, path);
+               if (!hba) {
+                       pr_vrb("IMSM: Failed to find hba for %s\n", path);
+                       return MDADM_STATUS_ERROR;
+               }
+       }
+
+       for (i = 0; i < ARRAY_SIZE(imsm_policies); i++) {
+               imsm_pol = &imsm_policies[i];
+
+               ret = imsm_pol->test_and_add_drive_policy(imsm_pol->type, pols, disk_fd, hba,
+                                                         verbose);
+               if (ret != MDADM_STATUS_SUCCESS)
+                       /* Inherit error code */
+                       return ret;
+       }
+
+       return MDADM_STATUS_SUCCESS;
+}
+
+/**
+ * get_spare_criteria_imsm() - set spare criteria.
+ * @st: supertype.
+ * @mddev_path: path to md device devnode, it must be container.
+ * @c: spare_criteria struct to fill, not NULL.
+ *
+ * If superblock is not loaded, use mddev_path to load_container. It must be given in this case.
+ * Filles size and sector size accordingly to superblock.
+ */
+mdadm_status_t get_spare_criteria_imsm(struct supertype *st, char *mddev_path,
+                                      struct spare_criteria *c)
+{
+       mdadm_status_t ret = MDADM_STATUS_ERROR;
+       bool free_superblock = false;
+       unsigned long long size = 0;
+       struct intel_super *super;
+       struct extent *e;
+       struct dl *dl;
+       int i;
+
+       /* If no superblock and no mddev_path, we cannot load superblock. */
+       assert(st->sb || mddev_path);
+
+       if (mddev_path) {
+               int fd = open(mddev_path, O_RDONLY);
+               mdadm_status_t rv;
+
+               if (!is_fd_valid(fd))
+                       return MDADM_STATUS_ERROR;
+
+               if (!st->sb) {
+                       if (load_container_imsm(st, fd, st->devnm)) {
+                               close(fd);
+                               return MDADM_STATUS_ERROR;
+                       }
+                       free_superblock = true;
+               }
+
+               rv = mddev_test_and_add_drive_policies(st, &c->pols, fd, 0);
+               close(fd);
+
+               if (rv != MDADM_STATUS_SUCCESS)
+                       goto out;
        }
-       return drv;
+
+       super = st->sb;
+
+       /* find first active disk in array */
+       dl = super->disks;
+       while (dl && (is_failed(&dl->disk) || dl->index == -1))
+               dl = dl->next;
+
+       if (!dl)
+               goto out;
+
+       /* find last lba used by subarrays */
+       e = get_extents(super, dl, 0);
+       if (!e)
+               goto out;
+
+       for (i = 0; e[i].size; i++)
+               continue;
+       if (i > 0)
+               size = e[i - 1].start + e[i - 1].size;
+       free(e);
+
+       /* add the amount of space needed for metadata */
+       size += imsm_min_reserved_sectors(super);
+
+       c->min_size = size * 512;
+       c->sector_size = super->sector_size;
+       c->criteria_set = true;
+       ret = MDADM_STATUS_SUCCESS;
+
+out:
+       if (free_superblock)
+               free_super_imsm(st);
+
+       if (ret != MDADM_STATUS_SUCCESS)
+               c->criteria_set = false;
+
+       return ret;
 }
 
 static char *imsm_find_array_devnm_by_subdev(int subdev, char *container)
@@ -11242,10 +11595,15 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st,
  */
 static struct mdinfo *get_spares_for_grow(struct supertype *st)
 {
-       struct spare_criteria sc;
+       struct spare_criteria sc = {0};
+       struct mdinfo *spares;
+
+       get_spare_criteria_imsm(st, NULL, &sc);
+       spares = container_choose_spares(st, &sc, NULL, NULL, NULL, 0);
 
-       get_spare_criteria_imsm(st, &sc);
-       return container_choose_spares(st, &sc, NULL, NULL, NULL, 0);
+       dev_policy_free(sc.pols);
+
+       return spares;
 }
 
 /******************************************************************************
@@ -11372,8 +11730,10 @@ static int imsm_create_metadata_update_for_migration(
 {
        struct intel_super *super = st->sb;
        int update_memory_size;
+       int current_chunk_size;
        struct imsm_update_reshape_migration *u;
-       struct imsm_dev *dev;
+       struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
+       struct imsm_map *map = get_imsm_map(dev, MAP_0);
        int previous_level = -1;
 
        dprintf("(enter) New Level = %i\n", geo->level);
@@ -11390,23 +11750,15 @@ static int imsm_create_metadata_update_for_migration(
        u->new_disks[0] = -1;
        u->new_chunksize = -1;
 
-       dev = get_imsm_dev(super, u->subdev);
-       if (dev) {
-               struct imsm_map *map;
+       current_chunk_size = __le16_to_cpu(map->blocks_per_strip) / 2;
 
-               map = get_imsm_map(dev, MAP_0);
-               if (map) {
-                       int current_chunk_size =
-                               __le16_to_cpu(map->blocks_per_strip) / 2;
-
-                       if (geo->chunksize != current_chunk_size) {
-                               u->new_chunksize = geo->chunksize / 1024;
-                               dprintf("imsm: chunk size change from %i to %i\n",
-                                       current_chunk_size, u->new_chunksize);
-                       }
-                       previous_level = map->raid_level;
-               }
+       if (geo->chunksize != current_chunk_size) {
+               u->new_chunksize = geo->chunksize / 1024;
+               dprintf("imsm: chunk size change from %i to %i\n",
+                       current_chunk_size, u->new_chunksize);
        }
+       previous_level = map->raid_level;
+
        if (geo->level == 5 && previous_level == 0) {
                struct mdinfo *spares = NULL;
 
@@ -11447,6 +11799,96 @@ static void imsm_update_metadata_locally(struct supertype *st,
        }
 }
 
+/**
+ * imsm_analyze_expand() - check expand properties and calculate new size.
+ * @st: imsm supertype.
+ * @geo: new geometry params.
+ * @array: array info.
+ * @direction: reshape direction.
+ *
+ * Obtain free space after the &array and verify if expand to requested size is
+ * possible. If geo->size is set to %MAX_SIZE, assume that max free size is
+ * requested.
+ *
+ * Return:
+ * On success %IMSM_STATUS_OK is returned, geo->size and geo->raid_disks are
+ * updated.
+ * On error, %IMSM_STATUS_ERROR is returned.
+ */
+static imsm_status_t imsm_analyze_expand(struct supertype *st,
+                                        struct geo_params *geo,
+                                        struct mdinfo *array,
+                                        int direction)
+{
+       struct intel_super *super = st->sb;
+       struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
+       struct imsm_map *map = get_imsm_map(dev, MAP_0);
+       int data_disks = imsm_num_data_members(map);
+
+       unsigned long long current_size;
+       unsigned long long free_size;
+       unsigned long long new_size;
+       unsigned long long max_size;
+
+       const int chunk_kib = geo->chunksize / 1024;
+       imsm_status_t rv;
+
+       if (direction == ROLLBACK_METADATA_CHANGES) {
+               /**
+                * Accept size for rollback only.
+                */
+               new_size = geo->size * 2;
+               goto success;
+       }
+
+       if (data_disks == 0) {
+               pr_err("imsm: Cannot retrieve data disks.\n");
+               return IMSM_STATUS_ERROR;
+       }
+       current_size = array->custom_array_size / data_disks;
+
+       rv = imsm_get_free_size(super, dev->vol.map->num_members, 0, chunk_kib, &free_size, true);
+       if (rv != IMSM_STATUS_OK) {
+               pr_err("imsm: Cannot find free space for expand.\n");
+               return IMSM_STATUS_ERROR;
+       }
+       max_size = round_member_size_to_mb(free_size + current_size);
+
+       if (geo->size == MAX_SIZE)
+               new_size = max_size;
+       else
+               new_size = round_member_size_to_mb(geo->size * 2);
+
+       if (new_size == 0) {
+               pr_err("imsm: Rounded requested size is 0.\n");
+               return IMSM_STATUS_ERROR;
+       }
+
+       if (new_size > max_size) {
+               pr_err("imsm: Rounded requested size (%llu) is larger than free space available (%llu).\n",
+                      new_size, max_size);
+               return IMSM_STATUS_ERROR;
+       }
+
+       if (new_size == current_size) {
+               pr_err("imsm: Rounded requested size (%llu) is same as current size (%llu).\n",
+                      new_size, current_size);
+               return IMSM_STATUS_ERROR;
+       }
+
+       if (new_size < current_size) {
+               pr_err("imsm: Size reduction is not supported, rounded requested size (%llu) is smaller than current (%llu).\n",
+                      new_size, current_size);
+               return IMSM_STATUS_ERROR;
+       }
+
+success:
+       dprintf("imsm: New size per member is %llu.\n", new_size);
+       geo->size = data_disks * new_size;
+       geo->raid_disks = dev->vol.map->num_members;
+       return IMSM_STATUS_OK;
+}
+
 /***************************************************************************
 * Function:    imsm_analyze_change
 * Description: Function analyze change for single volume
@@ -11457,31 +11899,23 @@ static void imsm_update_metadata_locally(struct supertype *st,
 ****************************************************************************/
 enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
                                           struct geo_params *geo,
-                                          int direction)
+                                          int direction, struct context *c)
 {
        struct mdinfo info;
        int change = -1;
        int check_devs = 0;
        int chunk;
-       /* number of added/removed disks in operation result */
-       int devNumChange = 0;
        /* imsm compatible layout value for array geometry verification */
        int imsm_layout = -1;
-       int data_disks;
-       struct imsm_dev *dev;
-       struct imsm_map *map;
-       struct intel_super *super;
-       unsigned long long current_size;
-       unsigned long long free_size;
-       unsigned long long max_size;
-       int rv;
+       int raid_disks = geo->raid_disks;
+       imsm_status_t rv;
 
        getinfo_super_imsm_volume(st, &info, NULL);
-       if (geo->level != info.array.level && geo->level >= 0 &&
+       if (geo->level != info.array.level && geo->level >= IMSM_T_RAID0 &&
            geo->level != UnSet) {
                switch (info.array.level) {
-               case 0:
-                       if (geo->level == 5) {
+               case IMSM_T_RAID0:
+                       if (geo->level == IMSM_T_RAID5) {
                                change = CH_MIGRATION;
                                if (geo->layout != ALGORITHM_LEFT_ASYMMETRIC) {
                                        pr_err("Error. Requested Layout not supported (left-asymmetric layout is supported only)!\n");
@@ -11490,20 +11924,28 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
                                }
                                imsm_layout =  geo->layout;
                                check_devs = 1;
-                               devNumChange = 1; /* parity disk added */
-                       } else if (geo->level == 10) {
+                               raid_disks += 1; /* parity disk added */
+                       } else if (geo->level == IMSM_T_RAID10) {
+                               if (geo->level == IMSM_T_RAID10 && geo->raid_disks > 2 &&
+                                   !c->force) {
+                                       pr_err("Warning! VROC UEFI driver does not support RAID10 in requested layout.\n");
+                                       pr_err("Array won't be suitable as boot device.\n");
+                                       pr_err("Note: You can omit this check with \"--force\"\n");
+                                       if (ask("Do you want to continue") < 1)
+                                               return CH_ABORT;
+                               }
                                change = CH_TAKEOVER;
                                check_devs = 1;
-                               devNumChange = 2; /* two mirrors added */
+                               raid_disks *= 2; /* mirrors added */
                                imsm_layout = 0x102; /* imsm supported layout */
                        }
                        break;
-               case 1:
-               case 10:
+               case IMSM_T_RAID1:
+               case IMSM_T_RAID10:
                        if (geo->level == 0) {
                                change = CH_TAKEOVER;
                                check_devs = 1;
-                               devNumChange = -(geo->raid_disks/2);
+                               raid_disks /= 2;
                                imsm_layout = 0; /* imsm raid0 layout */
                        }
                        break;
@@ -11519,10 +11961,10 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
        if (geo->layout != info.array.layout &&
            (geo->layout != UnSet && geo->layout != -1)) {
                change = CH_MIGRATION;
-               if (info.array.layout == 0 && info.array.level == 5 &&
+               if (info.array.layout == 0 && info.array.level == IMSM_T_RAID5 &&
                    geo->layout == 5) {
                        /* reshape 5 -> 4 */
-               } else if (info.array.layout == 5 && info.array.level == 5 &&
+               } else if (info.array.layout == 5 && info.array.level == IMSM_T_RAID5 &&
                           geo->layout == 0) {
                        /* reshape 4 -> 5 */
                        geo->layout = 0;
@@ -11541,7 +11983,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
 
        if (geo->chunksize > 0 && geo->chunksize != UnSet &&
            geo->chunksize != info.array.chunk_size) {
-               if (info.array.level == 10) {
+               if (info.array.level == IMSM_T_RAID10) {
                        pr_err("Error. Chunk size change for RAID 10 is not supported.\n");
                        change = -1;
                        goto analyse_change_exit;
@@ -11556,98 +11998,26 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
                geo->chunksize = info.array.chunk_size;
        }
 
-       chunk = geo->chunksize / 1024;
-
-       super = st->sb;
-       dev = get_imsm_dev(super, super->current_vol);
-       map = get_imsm_map(dev, MAP_0);
-       data_disks = imsm_num_data_members(map);
-       /* compute current size per disk member
-        */
-       current_size = info.custom_array_size / data_disks;
-
-       if (geo->size > 0 && geo->size != MAX_SIZE) {
-               /* align component size
-                */
-               geo->size = imsm_component_size_alignment_check(
-                                   get_imsm_raid_level(dev->vol.map),
-                                   chunk * 1024, super->sector_size,
-                                   geo->size * 2);
-               if (geo->size == 0) {
-                       pr_err("Error. Size expansion is supported only (current size is %llu, requested size /rounded/ is 0).\n",
-                                  current_size);
-                       goto analyse_change_exit;
-               }
-       }
-
-       if (current_size != geo->size && geo->size > 0) {
+       if (geo->size > 0) {
                if (change != -1) {
                        pr_err("Error. Size change should be the only one at a time.\n");
                        change = -1;
                        goto analyse_change_exit;
                }
-               if ((super->current_vol + 1) != super->anchor->num_raid_devs) {
-                       pr_err("Error. The last volume in container can be expanded only (%i/%s).\n",
-                              super->current_vol, st->devnm);
-                       goto analyse_change_exit;
-               }
-               /* check the maximum available size
-                */
-               rv =  imsm_get_free_size(st, dev->vol.map->num_members,
-                                        0, chunk, &free_size);
-               if (rv == 0)
-                       /* Cannot find maximum available space
-                        */
-                       max_size = 0;
-               else {
-                       max_size = free_size + current_size;
-                       /* align component size
-                        */
-                       max_size = imsm_component_size_alignment_check(
-                                       get_imsm_raid_level(dev->vol.map),
-                                       chunk * 1024, super->sector_size,
-                                       max_size);
-               }
-               if (geo->size == MAX_SIZE) {
-                       /* requested size change to the maximum available size
-                        */
-                       if (max_size == 0) {
-                               pr_err("Error. Cannot find maximum available space.\n");
-                               change = -1;
-                               goto analyse_change_exit;
-                       } else
-                               geo->size = max_size;
-               }
 
-               if (direction == ROLLBACK_METADATA_CHANGES) {
-                       /* accept size for rollback only
-                       */
-               } else {
-                       /* round size due to metadata compatibility
-                       */
-                       geo->size = (geo->size >> SECT_PER_MB_SHIFT)
-                                   << SECT_PER_MB_SHIFT;
-                       dprintf("Prepare update for size change to %llu\n",
-                               geo->size );
-                       if (current_size >= geo->size) {
-                               pr_err("Error. Size expansion is supported only (current size is %llu, requested size /rounded/ is %llu).\n",
-                                      current_size, geo->size);
-                               goto analyse_change_exit;
-                       }
-                       if (max_size && geo->size > max_size) {
-                               pr_err("Error. Requested size is larger than maximum available size (maximum available size is %llu, requested size /rounded/ is %llu).\n",
-                                      max_size, geo->size);
-                               goto analyse_change_exit;
-                       }
-               }
-               geo->size *= data_disks;
-               geo->raid_disks = dev->vol.map->num_members;
+               rv = imsm_analyze_expand(st, geo, &info, direction);
+               if (rv != IMSM_STATUS_OK)
+                       goto analyse_change_exit;
+               raid_disks = geo->raid_disks;
                change = CH_ARRAY_SIZE;
        }
+
+       chunk = geo->chunksize / 1024;
+
        if (!validate_geometry_imsm(st,
                                    geo->level,
                                    imsm_layout,
-                                   geo->raid_disks + devNumChange,
+                                   raid_disks,
                                    &chunk,
                                    geo->size, INVALID_SECTORS,
                                    0, 0, info.consistency_policy, 1))
@@ -11658,8 +12028,8 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
                struct imsm_super *mpb = super->anchor;
 
                if (mpb->num_raid_devs > 1) {
-                       pr_err("Error. Cannot perform operation on %s- for this operation it MUST be single array in container\n",
-                              geo->dev_name);
+                       pr_err("Error. Cannot perform operation on %s- for this operation "
+                              "it MUST be single array in container\n", geo->dev_name);
                        change = -1;
                }
        }
@@ -11704,28 +12074,96 @@ int imsm_takeover(struct supertype *st, struct geo_params *geo)
        return 0;
 }
 
-static int imsm_reshape_super(struct supertype *st, unsigned long long size,
-                             int level,
-                             int layout, int chunksize, int raid_disks,
-                             int delta_disks, char *backup, char *dev,
-                             int direction, int verbose)
+/* Flush size update if size calculated by num_data_stripes is higher than
+ * imsm_dev_size to eliminate differences during reshape.
+ * Mdmon will recalculate them correctly.
+ * If subarray index is not set then check whole container.
+ * Returns:
+ *     0 - no error occurred
+ *     1 - error detected
+ */
+static int imsm_fix_size_mismatch(struct supertype *st, int subarray_index)
 {
+       struct intel_super *super = st->sb;
+       int tmp = super->current_vol;
        int ret_val = 1;
-       struct geo_params geo;
+       int i;
 
-       dprintf("(enter)\n");
+       for (i = 0; i < super->anchor->num_raid_devs; i++) {
+               if (subarray_index >= 0 && i != subarray_index)
+                       continue;
+               super->current_vol = i;
+               struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
+               struct imsm_map *map = get_imsm_map(dev, MAP_0);
+               unsigned int disc_count = imsm_num_data_members(map);
+               struct geo_params geo;
+               struct imsm_update_size_change *update;
+               unsigned long long calc_size = per_dev_array_size(map) * disc_count;
+               unsigned long long d_size = imsm_dev_size(dev);
+               int u_size;
+
+               if (calc_size == d_size)
+                       continue;
 
-       memset(&geo, 0, sizeof(struct geo_params));
+               /* There is a difference, confirm that imsm_dev_size is
+                * smaller and push update.
+                */
+               if (d_size > calc_size) {
+                       pr_err("imsm: dev size of subarray %d is incorrect\n",
+                               i);
+                       goto exit;
+               }
+               memset(&geo, 0, sizeof(struct geo_params));
+               geo.size = d_size;
+               u_size = imsm_create_metadata_update_for_size_change(st, &geo,
+                                                                    &update);
+               imsm_update_metadata_locally(st, update, u_size);
+               if (st->update_tail) {
+                       append_metadata_update(st, update, u_size);
+                       flush_metadata_updates(st);
+                       st->update_tail = &st->updates;
+               } else {
+                       imsm_sync_metadata(st);
+                       free(update);
+               }
+       }
+       ret_val = 0;
+exit:
+       super->current_vol = tmp;
+       return ret_val;
+}
 
-       geo.dev_name = dev;
+/**
+ * shape_to_geo() - fill geo_params from shape.
+ *
+ * @shape: array details.
+ * @geo: new geometry params.
+ * Returns: 0 on success, 1 otherwise.
+ */
+static void shape_to_geo(struct shape *shape, struct geo_params *geo)
+{
+       assert(shape);
+       assert(geo);
+
+       geo->dev_name = shape->dev;
+       geo->size = shape->size;
+       geo->level = shape->level;
+       geo->layout = shape->layout;
+       geo->chunksize = shape->chunk;
+       geo->raid_disks = shape->raiddisks;
+}
+
+static int imsm_reshape_super(struct supertype *st, struct shape *shape, struct context *c)
+{
+       int ret_val = 1;
+       struct geo_params geo = {0};
+
+       dprintf("(enter)\n");
+
+       shape_to_geo(shape, &geo);
        strcpy(geo.devnm, st->devnm);
-       geo.size = size;
-       geo.level = level;
-       geo.layout = layout;
-       geo.chunksize = chunksize;
-       geo.raid_disks = raid_disks;
-       if (delta_disks != UnSet)
-               geo.raid_disks += delta_disks;
+       if (shape->delta_disks != UnSet)
+               geo.raid_disks += shape->delta_disks;
 
        dprintf("for level      : %i\n", geo.level);
        dprintf("for raid_disks : %i\n", geo.raid_disks);
@@ -11736,10 +12174,15 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size,
                int old_raid_disks = 0;
 
                if (imsm_reshape_is_allowed_on_container(
-                           st, &geo, &old_raid_disks, direction)) {
+                           st, &geo, &old_raid_disks, shape->direction)) {
                        struct imsm_update_reshape *u = NULL;
                        int len;
 
+                       if (imsm_fix_size_mismatch(st, -1)) {
+                               dprintf("imsm: Cannot fix size mismatch\n");
+                               goto exit_imsm_reshape_super;
+                       }
+
                        len = imsm_create_metadata_update_for_reshape(
                                st, &geo, old_raid_disks, &u);
 
@@ -11785,7 +12228,7 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size,
                        goto exit_imsm_reshape_super;
                }
                super->current_vol = dev->index;
-               change = imsm_analyze_change(st, &geo, direction);
+               change = imsm_analyze_change(st, &geo, shape->direction, c);
                switch (change) {
                case CH_TAKEOVER:
                        ret_val = imsm_takeover(st, &geo);
@@ -11828,6 +12271,7 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size,
                                free(u);
                }
                break;
+               case CH_ABORT:
                default:
                        ret_val = 1;
                }
@@ -11845,14 +12289,14 @@ exit_imsm_reshape_super:
 static int read_completed(int fd, unsigned long long *val)
 {
        int ret;
-       char buf[50];
+       char buf[SYSFS_MAX_BUF_SIZE];
 
-       ret = sysfs_fd_get_str(fd, buf, 50);
+       ret = sysfs_fd_get_str(fd, buf, sizeof(buf));
        if (ret < 0)
                return ret;
 
        ret = COMPLETED_OK;
-       if (strncmp(buf, "none", 4) == 0) {
+       if (str_is_none(buf) == true) {
                ret = COMPLETED_NONE;
        } else if (strncmp(buf, "delayed", 7) == 0) {
                ret = COMPLETED_DELAYED;
@@ -11886,7 +12330,7 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
        unsigned long long to_complete = sra->reshape_progress;
        unsigned long long position_to_set = to_complete / ndata;
 
-       if (fd < 0) {
+       if (!is_fd_valid(fd)) {
                dprintf("cannot open reshape_position\n");
                return 1;
        }
@@ -11898,7 +12342,7 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
                                close(fd);
                                return 1;
                        }
-                       usleep(30000);
+                       sleep_for(0, MSEC_TO_NSEC(30), true);
                } else
                        break;
        } while (retry--);
@@ -11920,12 +12364,12 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
 
        do {
                int rc;
-               char action[20];
+               char action[SYSFS_MAX_BUF_SIZE];
                int timeout = 3000;
 
                sysfs_wait(fd, &timeout);
                if (sysfs_get_str(sra, NULL, "sync_action",
-                                 action, 20) > 0 &&
+                                 action, sizeof(action)) > 0 &&
                                strncmp(action, "reshape", 7) != 0) {
                        if (strncmp(action, "idle", 4) == 0)
                                break;
@@ -11972,7 +12416,8 @@ int check_degradation_change(struct mdinfo *info,
                        if (sd->disk.state & (1<<MD_DISK_FAULTY))
                                continue;
                        if (sd->disk.state & (1<<MD_DISK_SYNC)) {
-                               char sbuf[100];
+                               char sbuf[SYSFS_MAX_BUF_SIZE];
+                               int raid_disk = sd->disk.raid_disk;
 
                                if (sysfs_get_str(info,
                                        sd, "state", sbuf, sizeof(sbuf)) < 0 ||
@@ -11980,13 +12425,8 @@ int check_degradation_change(struct mdinfo *info,
                                        strstr(sbuf, "in_sync") == NULL) {
                                        /* this device is dead */
                                        sd->disk.state = (1<<MD_DISK_FAULTY);
-                                       if (sd->disk.raid_disk >= 0 &&
-                                           sources[sd->disk.raid_disk] >= 0) {
-                                               close(sources[
-                                                       sd->disk.raid_disk]);
-                                               sources[sd->disk.raid_disk] =
-                                                       -1;
-                                       }
+                                       if (raid_disk >= 0)
+                                               close_fd(&sources[raid_disk]);
                                        new_degraded++;
                                }
                        }
@@ -12042,6 +12482,7 @@ static int imsm_manage_reshape(
        unsigned long long start_buf_shift; /* [bytes] */
        int degraded = 0;
        int source_layout = 0;
+       int subarray_index = -1;
 
        if (!sra)
                return ret_val;
@@ -12055,6 +12496,7 @@ static int imsm_manage_reshape(
                    dv->dev->vol.migr_state == 1) {
                        dev = dv->dev;
                        migr_vol_qan++;
+                       subarray_index = dv->index;
                }
        }
        /* Only one volume can migrate at the same time */
@@ -12239,6 +12681,14 @@ static int imsm_manage_reshape(
 
        /* return '1' if done */
        ret_val = 1;
+
+       /* After the reshape eliminate size mismatch in metadata.
+        * Don't update md/component_size here, volume hasn't
+        * to take whole space. It is allowed by kernel.
+        * md/component_size will be set propoperly after next assembly.
+        */
+       imsm_fix_size_mismatch(st, subarray_index);
+
 abort:
        free(buf);
        /* See Grow.c: abort_reshape() for further explanation */
@@ -12249,6 +12699,474 @@ abort:
        return ret_val;
 }
 
+/*******************************************************************************
+ * Function:   calculate_bitmap_min_chunksize
+ * Description:        Calculates the minimal valid bitmap chunk size
+ * Parameters:
+ *     max_bits        : indicate how many bits can be used for the bitmap
+ *     data_area_size  : the size of the data area covered by the bitmap
+ *
+ * Returns:
+ *      The bitmap chunk size
+ ******************************************************************************/
+static unsigned long long
+calculate_bitmap_min_chunksize(unsigned long long max_bits,
+                              unsigned long long data_area_size)
+{
+       unsigned long long min_chunk =
+               4096; /* sub-page chunks don't work yet.. */
+       unsigned long long bits = data_area_size / min_chunk + 1;
+
+       while (bits > max_bits) {
+               min_chunk *= 2;
+               bits = (bits + 1) / 2;
+       }
+       return min_chunk;
+}
+
+/*******************************************************************************
+ * Function:   calculate_bitmap_chunksize
+ * Description:        Calculates the bitmap chunk size for the given device
+ * Parameters:
+ *     st      : supertype information
+ *     dev     : device for the bitmap
+ *
+ * Returns:
+ *      The bitmap chunk size
+ ******************************************************************************/
+static unsigned long long calculate_bitmap_chunksize(struct supertype *st,
+                                                    struct imsm_dev *dev)
+{
+       struct intel_super *super = st->sb;
+       unsigned long long min_chunksize;
+       unsigned long long result = IMSM_DEFAULT_BITMAP_CHUNKSIZE;
+       size_t dev_size = imsm_dev_size(dev);
+
+       min_chunksize = calculate_bitmap_min_chunksize(
+               IMSM_BITMAP_AREA_SIZE * super->sector_size, dev_size);
+
+       if (result < min_chunksize)
+               result = min_chunksize;
+
+       return result;
+}
+
+/*******************************************************************************
+ * Function:   init_bitmap_header
+ * Description:        Initialize the bitmap header structure
+ * Parameters:
+ *     st      : supertype information
+ *     bms     : bitmap header struct to initialize
+ *     dev     : device for the bitmap
+ *
+ * Returns:
+ *      0 : success
+ *     -1 : fail
+ ******************************************************************************/
+static int init_bitmap_header(struct supertype *st, struct bitmap_super_s *bms,
+                             struct imsm_dev *dev)
+{
+       int vol_uuid[4];
+
+       if (!bms || !dev)
+               return -1;
+
+       bms->magic = __cpu_to_le32(BITMAP_MAGIC);
+       bms->version = __cpu_to_le32(BITMAP_MAJOR_HI);
+       bms->daemon_sleep = __cpu_to_le32(IMSM_DEFAULT_BITMAP_DAEMON_SLEEP);
+       bms->sync_size = __cpu_to_le64(IMSM_BITMAP_AREA_SIZE);
+       bms->write_behind = __cpu_to_le32(0);
+
+       uuid_from_super_imsm(st, vol_uuid);
+       memcpy(bms->uuid, vol_uuid, 16);
+
+       bms->chunksize = calculate_bitmap_chunksize(st, dev);
+
+       return 0;
+}
+
+/*******************************************************************************
+ * Function:   validate_internal_bitmap_for_drive
+ * Description:        Verify if the bitmap header for a given drive.
+ * Parameters:
+ *     st      : supertype information
+ *     offset  : The offset from the beginning of the drive where to look for
+ *               the bitmap header.
+ *     d       : the drive info
+ *
+ * Returns:
+ *      0 : success
+ *     -1 : fail
+ ******************************************************************************/
+static int validate_internal_bitmap_for_drive(struct supertype *st,
+                                             unsigned long long offset,
+                                             struct dl *d)
+{
+       struct intel_super *super = st->sb;
+       int ret = -1;
+       int vol_uuid[4];
+       bitmap_super_t *bms;
+       int fd;
+
+       if (!d)
+               return -1;
+
+       void *read_buf;
+
+       if (posix_memalign(&read_buf, MAX_SECTOR_SIZE, IMSM_BITMAP_HEADER_SIZE))
+               return -1;
+
+       fd = d->fd;
+       if (!is_fd_valid(fd)) {
+               fd = open(d->devname, O_RDONLY, 0);
+
+               if (!is_fd_valid(fd)) {
+                       dprintf("cannot open the device %s\n", d->devname);
+                       goto abort;
+               }
+       }
+
+       if (lseek64(fd, offset * super->sector_size, SEEK_SET) < 0)
+               goto abort;
+       if (read(fd, read_buf, IMSM_BITMAP_HEADER_SIZE) !=
+           IMSM_BITMAP_HEADER_SIZE)
+               goto abort;
+
+       uuid_from_super_imsm(st, vol_uuid);
+
+       bms = read_buf;
+       if ((bms->magic != __cpu_to_le32(BITMAP_MAGIC)) ||
+           (bms->version != __cpu_to_le32(BITMAP_MAJOR_HI)) ||
+           (!same_uuid((int *)bms->uuid, vol_uuid, st->ss->swapuuid))) {
+               dprintf("wrong bitmap header detected\n");
+               goto abort;
+       }
+
+       ret = 0;
+abort:
+       if (!is_fd_valid(d->fd))
+               close_fd(&fd);
+
+       if (read_buf)
+               free(read_buf);
+
+       return ret;
+}
+
+/*******************************************************************************
+ * Function:   validate_internal_bitmap_imsm
+ * Description:        Verify if the bitmap header is in place and with proper data.
+ * Parameters:
+ *     st      : supertype information
+ *
+ * Returns:
+ *      0 : success or device w/o RWH_BITMAP
+ *     -1 : fail
+ ******************************************************************************/
+static int validate_internal_bitmap_imsm(struct supertype *st)
+{
+       struct intel_super *super = st->sb;
+       struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
+       unsigned long long offset;
+       struct dl *d;
+
+       if (dev->rwh_policy != RWH_BITMAP)
+               return 0;
+
+       offset = get_bitmap_header_sector(super, super->current_vol);
+       for (d = super->disks; d; d = d->next) {
+               if (d->index < 0 || is_failed(&d->disk))
+                       continue;
+
+               if (validate_internal_bitmap_for_drive(st, offset, d)) {
+                       pr_err("imsm: bitmap validation failed\n");
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+/*******************************************************************************
+ * Function:   add_internal_bitmap_imsm
+ * Description:        Mark the volume to use the bitmap and updates the chunk size value.
+ * Parameters:
+ *     st              : supertype information
+ *     chunkp          : bitmap chunk size
+ *     delay           : not used for imsm
+ *     write_behind    : not used for imsm
+ *     size            : not used for imsm
+ *     may_change      : not used for imsm
+ *     amajor          : not used for imsm
+ *
+ * Returns:
+ *      0 : success
+ *     -1 : fail
+ ******************************************************************************/
+static int add_internal_bitmap_imsm(struct supertype *st, int *chunkp,
+                                   int delay, int write_behind,
+                                   unsigned long long size, int may_change,
+                                   int amajor)
+{
+       struct intel_super *super = st->sb;
+       int vol_idx = super->current_vol;
+       struct imsm_dev *dev;
+
+       if (!super->devlist || vol_idx == -1 || !chunkp)
+               return -1;
+
+       dev = get_imsm_dev(super, vol_idx);
+       dev->rwh_policy = RWH_BITMAP;
+       *chunkp = calculate_bitmap_chunksize(st, dev);
+       return 0;
+}
+
+/*******************************************************************************
+ * Function:   locate_bitmap_imsm
+ * Description:        Seek 'fd' to start of write-intent-bitmap.
+ * Parameters:
+ *     st              : supertype information
+ *     fd              : file descriptor for the device
+ *     node_num        : not used for imsm
+ *
+ * Returns:
+ *      0 : success
+ *     -1 : fail
+ ******************************************************************************/
+static int locate_bitmap_imsm(struct supertype *st, int fd, int node_num)
+{
+       struct intel_super *super = st->sb;
+       unsigned long long offset;
+       int vol_idx = super->current_vol;
+
+       if (!super->devlist || vol_idx == -1)
+               return -1;
+
+       offset = get_bitmap_header_sector(super, super->current_vol);
+       dprintf("bitmap header offset is %llu\n", offset);
+
+       lseek64(fd, offset << 9, 0);
+
+       return 0;
+}
+
+/*******************************************************************************
+ * Function:   write_init_bitmap_imsm
+ * Description:        Write a bitmap header and prepares the area for the bitmap.
+ * Parameters:
+ *     st      : supertype information
+ *     fd      : file descriptor for the device
+ *     update  : not used for imsm
+ *
+ * Returns:
+ *      0 : success
+ *     -1 : fail
+ ******************************************************************************/
+static int write_init_bitmap_imsm(struct supertype *st, int fd,
+                                 enum bitmap_update update)
+{
+       struct intel_super *super = st->sb;
+       int vol_idx = super->current_vol;
+       int ret = 0;
+       unsigned long long offset;
+       bitmap_super_t bms = { 0 };
+       size_t written = 0;
+       size_t to_write;
+       ssize_t rv_num;
+       void *buf;
+
+       if (!super->devlist || !super->sector_size || vol_idx == -1)
+               return -1;
+
+       struct imsm_dev *dev = get_imsm_dev(super, vol_idx);
+
+       /* first clear the space for bitmap header */
+       unsigned long long bitmap_area_start =
+               get_bitmap_header_sector(super, vol_idx);
+
+       dprintf("zeroing area start (%llu) and size (%u)\n", bitmap_area_start,
+               IMSM_BITMAP_AND_HEADER_SIZE / super->sector_size);
+       if (zero_disk_range(fd, bitmap_area_start,
+                           IMSM_BITMAP_HEADER_SIZE / super->sector_size)) {
+               pr_err("imsm: cannot zeroing the space for the bitmap\n");
+               return -1;
+       }
+
+       /* The bitmap area should be filled with "1"s to perform initial
+        * synchronization.
+        */
+       if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE))
+               return -1;
+       memset(buf, 0xFF, MAX_SECTOR_SIZE);
+       offset = get_bitmap_sector(super, vol_idx);
+       lseek64(fd, offset << 9, 0);
+       while (written < IMSM_BITMAP_AREA_SIZE) {
+               to_write = IMSM_BITMAP_AREA_SIZE - written;
+               if (to_write > MAX_SECTOR_SIZE)
+                       to_write = MAX_SECTOR_SIZE;
+               rv_num = write(fd, buf, MAX_SECTOR_SIZE);
+               if (rv_num != MAX_SECTOR_SIZE) {
+                       ret = -1;
+                       dprintf("cannot initialize bitmap area\n");
+                       goto abort;
+               }
+               written += rv_num;
+       }
+
+       /* write a bitmap header */
+       init_bitmap_header(st, &bms, dev);
+       memset(buf, 0, MAX_SECTOR_SIZE);
+       memcpy(buf, &bms, sizeof(bitmap_super_t));
+       if (locate_bitmap_imsm(st, fd, 0)) {
+               ret = -1;
+               dprintf("cannot locate the bitmap\n");
+               goto abort;
+       }
+       if (write(fd, buf, MAX_SECTOR_SIZE) != MAX_SECTOR_SIZE) {
+               ret = -1;
+               dprintf("cannot write the bitmap header\n");
+               goto abort;
+       }
+       fsync(fd);
+
+abort:
+       free(buf);
+
+       return ret;
+}
+
+/*******************************************************************************
+ * Function:   is_vol_to_setup_bitmap
+ * Description:        Checks if a bitmap should be activated on the dev.
+ * Parameters:
+ *     info    : info about the volume to setup the bitmap
+ *     dev     : the device to check against bitmap creation
+ *
+ * Returns:
+ *      0 : bitmap should be set up on the device
+ *     -1 : otherwise
+ ******************************************************************************/
+static int is_vol_to_setup_bitmap(struct mdinfo *info, struct imsm_dev *dev)
+{
+       if (!dev || !info)
+               return -1;
+
+       if ((strcmp((char *)dev->volume, info->name) == 0) &&
+           (dev->rwh_policy == RWH_BITMAP))
+               return -1;
+
+       return 0;
+}
+
+/*******************************************************************************
+ * Function:   set_bitmap_sysfs
+ * Description:        Set the sysfs atributes of a given volume to activate the bitmap.
+ * Parameters:
+ *     info            : info about the volume where the bitmap should be setup
+ *     chunksize       : bitmap chunk size
+ *     location        : location of the bitmap
+ *
+ * Returns:
+ *      0 : success
+ *     -1 : fail
+ ******************************************************************************/
+static int set_bitmap_sysfs(struct mdinfo *info, unsigned long long chunksize,
+                           char *location)
+{
+       /* The bitmap/metadata is set to external to allow changing of value for
+        * bitmap/location. When external is used, the kernel will treat an offset
+        * related to the device's first lba (in opposition to the "internal" case
+        * when this value is related to the beginning of the superblock).
+        */
+       if (sysfs_set_str(info, NULL, "bitmap/metadata", "external")) {
+               dprintf("failed to set bitmap/metadata\n");
+               return -1;
+       }
+
+       /* It can only be changed when no bitmap is active.
+        * Should be bigger than 512 and must be power of 2.
+        * It is expecting the value in bytes.
+        */
+       if (sysfs_set_num(info, NULL, "bitmap/chunksize",
+                                         __cpu_to_le32(chunksize))) {
+               dprintf("failed to set bitmap/chunksize\n");
+               return -1;
+       }
+
+       /* It is expecting the value in sectors. */
+       if (sysfs_set_num(info, NULL, "bitmap/space",
+                                         __cpu_to_le64(IMSM_BITMAP_AREA_SIZE))) {
+               dprintf("failed to set bitmap/space\n");
+               return -1;
+       }
+
+       /* Determines the delay between the bitmap updates.
+        * It is expecting the value in seconds.
+        */
+       if (sysfs_set_num(info, NULL, "bitmap/time_base",
+                                         __cpu_to_le64(IMSM_DEFAULT_BITMAP_DAEMON_SLEEP))) {
+               dprintf("failed to set bitmap/time_base\n");
+               return -1;
+       }
+
+       /* It is expecting the value in sectors with a sign at the beginning. */
+       if (sysfs_set_str(info, NULL, "bitmap/location", location)) {
+               dprintf("failed to set bitmap/location\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+/*******************************************************************************
+ * Function:   set_bitmap_imsm
+ * Description:        Setup the bitmap for the given volume
+ * Parameters:
+ *     st      : supertype information
+ *     info    : info about the volume where the bitmap should be setup
+ *
+ * Returns:
+ *      0 : success
+ *     -1 : fail
+ ******************************************************************************/
+static int set_bitmap_imsm(struct supertype *st, struct mdinfo *info)
+{
+       struct intel_super *super = st->sb;
+       int prev_current_vol = super->current_vol;
+       struct imsm_dev *dev;
+       int ret = -1;
+       char location[16] = "";
+       unsigned long long chunksize;
+       struct intel_dev *dev_it;
+
+       for (dev_it = super->devlist; dev_it; dev_it = dev_it->next) {
+               super->current_vol = dev_it->index;
+               dev = get_imsm_dev(super, super->current_vol);
+
+               if (is_vol_to_setup_bitmap(info, dev)) {
+                       if (validate_internal_bitmap_imsm(st)) {
+                               dprintf("bitmap header validation failed\n");
+                               goto abort;
+                       }
+
+                       chunksize = calculate_bitmap_chunksize(st, dev);
+                       dprintf("chunk size is %llu\n", chunksize);
+
+                       snprintf(location, sizeof(location), "+%llu",
+                                get_bitmap_sector(super, super->current_vol));
+                       dprintf("bitmap offset is %s\n", location);
+
+                       if (set_bitmap_sysfs(info, chunksize, location)) {
+                               dprintf("cannot setup the bitmap\n");
+                               goto abort;
+                       }
+               }
+       }
+       ret = 0;
+abort:
+       super->current_vol = prev_current_vol;
+       return ret;
+}
+
 struct superswitch super_imsm = {
        .examine_super  = examine_super_imsm,
        .brief_examine_super = brief_examine_super_imsm,
@@ -12266,11 +13184,10 @@ struct superswitch super_imsm = {
        .update_subarray = update_subarray_imsm,
        .load_container = load_container_imsm,
        .default_geometry = default_geometry_imsm,
-       .get_disk_controller_domain = imsm_get_disk_controller_domain,
+       .test_and_add_drive_policies = test_and_add_drive_policies_imsm,
        .reshape_super  = imsm_reshape_super,
        .manage_reshape = imsm_manage_reshape,
        .recover_backup = recover_backup_imsm,
-       .copy_metadata = copy_metadata_imsm,
        .examine_badblocks = examine_badblocks_imsm,
        .match_home     = match_home_imsm,
        .uuid_from_super= uuid_from_super_imsm,
@@ -12291,10 +13208,16 @@ struct superswitch super_imsm = {
        .container_content = container_content_imsm,
        .validate_container = validate_container_imsm,
 
+       .add_internal_bitmap = add_internal_bitmap_imsm,
+       .locate_bitmap = locate_bitmap_imsm,
+       .write_bitmap = write_init_bitmap_imsm,
+       .set_bitmap = set_bitmap_imsm,
+
        .write_init_ppl = write_init_ppl_imsm,
        .validate_ppl   = validate_ppl_imsm,
 
        .external       = 1,
+       .swapuuid       = 0,
        .name = "imsm",
 
 /* for mdmon */
index 6af140bbf72197223e54fd37b9d1fd1ac28167ad..9b8a1bd63bb7a6657abe56b5d1e51f4a27431f14 100644 (file)
--- a/super0.c
+++ b/super0.c
@@ -288,7 +288,7 @@ static void export_examine_super0(struct supertype *st)
 {
        mdp_super_t *sb = st->sb;
 
-       printf("MD_LEVEL=%s\n", map_num(pers, sb->level));
+       printf("MD_LEVEL=%s\n", map_num_s(pers, sb->level));
        printf("MD_DEVICES=%d\n", sb->raid_disks);
        if (sb->minor_version >= 90)
                printf("MD_UUID=%08x:%08x:%08x:%08x\n",
@@ -491,7 +491,7 @@ static struct mdinfo *container_content0(struct supertype *st, char *subarray)
 }
 
 static int update_super0(struct supertype *st, struct mdinfo *info,
-                        char *update,
+                        enum update_opt update,
                         char *devname, int verbose,
                         int uuid_set, char *homehost)
 {
@@ -503,18 +503,37 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
        int uuid[4];
        mdp_super_t *sb = st->sb;
 
-       if (strcmp(update, "homehost") == 0 &&
-           homehost) {
-               /* note that 'homehost' is special as it is really
+       if (update == UOPT_HOMEHOST && homehost) {
+               /*
+                * note that 'homehost' is special as it is really
                 * a "uuid" update.
                 */
                uuid_set = 0;
-               update = "uuid";
+               update = UOPT_UUID;
                info->uuid[0] = sb->set_uuid0;
                info->uuid[1] = sb->set_uuid1;
        }
 
-       if (strcmp(update, "sparc2.2")==0 ) {
+       switch (update) {
+       case UOPT_UUID:
+               if (!uuid_set && homehost) {
+                       char buf[20];
+                       memcpy(info->uuid+2,
+                              sha1_buffer(homehost, strlen(homehost), buf),
+                              8);
+               }
+               sb->set_uuid0 = info->uuid[0];
+               sb->set_uuid1 = info->uuid[1];
+               sb->set_uuid2 = info->uuid[2];
+               sb->set_uuid3 = info->uuid[3];
+               if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) {
+                       struct bitmap_super_s *bm;
+                       bm = (struct bitmap_super_s *)(sb+1);
+                       uuid_from_super0(st, uuid);
+                       memcpy(bm->uuid, uuid, 16);
+               }
+               break;
+       case UOPT_SPARC22: {
                /* 2.2 sparc put the events in the wrong place
                 * So we copy the tail of the superblock
                 * up 4 bytes before continuing
@@ -527,12 +546,15 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
                if (verbose >= 0)
                        pr_err("adjusting superblock of %s for 2.2/sparc compatibility.\n",
                               devname);
-       } else if (strcmp(update, "super-minor") ==0) {
+               break;
+       }
+       case UOPT_SUPER_MINOR:
                sb->md_minor = info->array.md_minor;
                if (verbose > 0)
                        pr_err("updating superblock of %s with minor number %d\n",
                                devname, info->array.md_minor);
-       } else if (strcmp(update, "summaries") == 0) {
+               break;
+       case UOPT_SUMMARIES: {
                unsigned int i;
                /* set nr_disks, active_disks, working_disks,
                 * failed_disks, spare_disks based on disks[]
@@ -559,7 +581,9 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
                                        sb->spare_disks++;
                        } else if (i >= sb->raid_disks && sb->disks[i].number == 0)
                                sb->disks[i].state = 0;
-       } else if (strcmp(update, "force-one")==0) {
+               break;
+       }
+       case UOPT_SPEC_FORCE_ONE: {
                /* Not enough devices for a working array, so
                 * bring this one up-to-date.
                 */
@@ -569,7 +593,9 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
                if (sb->events_hi != ehi ||
                    sb->events_lo != elo)
                        rv = 1;
-       } else if (strcmp(update, "force-array")==0) {
+               break;
+       }
+       case UOPT_SPEC_FORCE_ARRAY:
                /* degraded array and 'force' requested, so
                 * maybe need to mark it 'clean'
                 */
@@ -579,7 +605,8 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
                        sb->state |= (1 << MD_SB_CLEAN);
                        rv = 1;
                }
-       } else if (strcmp(update, "assemble")==0) {
+               break;
+       case UOPT_SPEC_ASSEMBLE: {
                int d = info->disk.number;
                int wonly = sb->disks[d].state & (1<<MD_DISK_WRITEMOSTLY);
                int failfast = sb->disks[d].state & (1<<MD_DISK_FAILFAST);
@@ -609,7 +636,9 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
                        sb->reshape_position = info->reshape_progress;
                        rv = 1;
                }
-       } else if (strcmp(update, "linear-grow-new") == 0) {
+               break;
+       }
+       case UOPT_SPEC_LINEAR_GROW_NEW:
                memset(&sb->disks[info->disk.number], 0, sizeof(sb->disks[0]));
                sb->disks[info->disk.number].number = info->disk.number;
                sb->disks[info->disk.number].major = info->disk.major;
@@ -617,7 +646,8 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
                sb->disks[info->disk.number].raid_disk = info->disk.raid_disk;
                sb->disks[info->disk.number].state = info->disk.state;
                sb->this_disk = sb->disks[info->disk.number];
-       } else if (strcmp(update, "linear-grow-update") == 0) {
+               break;
+       case UOPT_SPEC_LINEAR_GROW_UPDATE:
                sb->raid_disks = info->array.raid_disks;
                sb->nr_disks = info->array.nr_disks;
                sb->active_disks = info->array.active_disks;
@@ -628,29 +658,15 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
                sb->disks[info->disk.number].minor = info->disk.minor;
                sb->disks[info->disk.number].raid_disk = info->disk.raid_disk;
                sb->disks[info->disk.number].state = info->disk.state;
-       } else if (strcmp(update, "resync") == 0) {
-               /* make sure resync happens */
+               break;
+       case UOPT_RESYNC:
+               /*
+                * make sure resync happens
+                */
                sb->state &= ~(1<<MD_SB_CLEAN);
                sb->recovery_cp = 0;
-       } else if (strcmp(update, "uuid") == 0) {
-               if (!uuid_set && homehost) {
-                       char buf[20];
-                       char *hash = sha1_buffer(homehost,
-                                                strlen(homehost),
-                                                buf);
-                       memcpy(info->uuid+2, hash, 8);
-               }
-               sb->set_uuid0 = info->uuid[0];
-               sb->set_uuid1 = info->uuid[1];
-               sb->set_uuid2 = info->uuid[2];
-               sb->set_uuid3 = info->uuid[3];
-               if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) {
-                       struct bitmap_super_s *bm;
-                       bm = (struct bitmap_super_s*)(sb+1);
-                       uuid_from_super0(st, uuid);
-                       memcpy(bm->uuid, uuid, 16);
-               }
-       } else if (strcmp(update, "metadata") == 0) {
+               break;
+       case UOPT_METADATA:
                /* Create some v1.0 metadata to match ours but make the
                 * ctime bigger.  Also update info->array.*_version.
                 * We need to arrange that store_super writes out
@@ -670,7 +686,8 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
                        uuid_from_super0(st, info->uuid);
                        st->other = super1_make_v0(st, info, st->sb);
                }
-       } else if (strcmp(update, "revert-reshape") == 0) {
+               break;
+       case UOPT_REVERT_RESHAPE:
                rv = -2;
                if (sb->minor_version <= 90)
                        pr_err("No active reshape to revert on %s\n",
@@ -683,7 +700,7 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
                        int parity = sb->level == 6 ? 2 : 1;
                        rv = 0;
 
-                       if (sb->level >= 4 && sb->level <= 6 &&
+                       if (is_level456(sb->level) &&
                            sb->reshape_position % (
                                    sb->new_chunk/512 *
                                    (sb->raid_disks - sb->delta_disks - parity))) {
@@ -702,16 +719,22 @@ static int update_super0(struct supertype *st, struct mdinfo *info,
                        sb->new_chunk = sb->chunk_size;
                        sb->chunk_size = tmp;
                }
-       } else if (strcmp(update, "no-bitmap") == 0) {
+               break;
+       case UOPT_NO_BITMAP:
                sb->state &= ~(1<<MD_SB_BITMAP_PRESENT);
-       } else if (strcmp(update, "_reshape_progress")==0)
+               break;
+       case UOPT_SPEC__RESHAPE_PROGRESS:
                sb->reshape_position = info->reshape_progress;
-       else if (strcmp(update, "writemostly")==0)
+               break;
+       case UOPT_SPEC_WRITEMOSTLY:
                sb->state |= (1<<MD_DISK_WRITEMOSTLY);
-       else if (strcmp(update, "readwrite")==0)
+               break;
+       case UOPT_SPEC_READWRITE:
                sb->state &= ~(1<<MD_DISK_WRITEMOSTLY);
-       else
+               break;
+       default:
                rv = -1;
+       }
 
        sb->sb_csum = calc_sb0_csum(sb);
        return rv;
@@ -926,7 +949,8 @@ static int write_init_super0(struct supertype *st)
        return rv;
 }
 
-static int compare_super0(struct supertype *st, struct supertype *tst)
+static int compare_super0(struct supertype *st, struct supertype *tst,
+                         int verbose)
 {
        /*
         * return:
@@ -1272,7 +1296,7 @@ static int validate_geometry0(struct supertype *st, int level,
        if (get_linux_version() < 3001000)
                tbmax = 2;
 
-       if (level == LEVEL_CONTAINER) {
+       if (is_container(level)) {
                if (verbose)
                        pr_err("0.90 metadata does not support containers\n");
                return 0;
@@ -1345,5 +1369,7 @@ struct superswitch super0 = {
        .locate_bitmap = locate_bitmap0,
        .write_bitmap = write_bitmap0,
        .free_super = free_super0,
+
+       .swapuuid = 0,
        .name = "0.90",
 };
index e0d80be1e5dfa22e3358b5bd1274e85719f08938..81d29a652f3645d7596c7026716b2e596aafd701 100644 (file)
--- a/super1.c
+++ b/super1.c
@@ -192,7 +192,7 @@ static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
        unsigned int disk_csum, csum;
        unsigned long long newcsum;
        int size = sizeof(*sb) + __le32_to_cpu(sb->max_dev)*2;
-       unsigned int *isuper = (unsigned int*)sb;
+       unsigned int *isuper = (unsigned int *)sb;
 
 /* make sure I can count... */
        if (offsetof(struct mdp_superblock_1,data_offset) != 128 ||
@@ -204,7 +204,7 @@ static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
        disk_csum = sb->sb_csum;
        sb->sb_csum = 0;
        newcsum = 0;
-       for (; size>=4; size -= 4 ) {
+       for (; size >= 4; size -= 4) {
                newcsum += __le32_to_cpu(*isuper);
                isuper++;
        }
@@ -319,7 +319,7 @@ static inline unsigned int choose_ppl_space(int chunk)
 static void examine_super1(struct supertype *st, char *homehost)
 {
        struct mdp_superblock_1 *sb = st->sb;
-       bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE);
+       bitmap_super_t *bms = (bitmap_super_t *)(((char *)sb) + MAX_SB_SIZE);
        time_t atime;
        unsigned int d;
        int role;
@@ -330,6 +330,7 @@ static void examine_super1(struct supertype *st, char *homehost)
        int layout;
        unsigned long long sb_offset;
        struct mdinfo info;
+       int inconsistent = 0;
 
        printf("          Magic : %08x\n", __le32_to_cpu(sb->magic));
        printf("        Version : 1");
@@ -342,8 +343,9 @@ static void examine_super1(struct supertype *st, char *homehost)
                printf(".0\n");
        printf("    Feature Map : 0x%x\n", __le32_to_cpu(sb->feature_map));
        printf("     Array UUID : ");
-       for (i=0; i<16; i++) {
-               if ((i&3)==0 && i != 0) printf(":");
+       for (i = 0; i < 16; i++) {
+               if ((i & 3) == 0 && i != 0)
+                       printf(":");
                printf("%02x", sb->set_uuid[i]);
        }
        printf("\n");
@@ -405,15 +407,21 @@ static void examine_super1(struct supertype *st, char *homehost)
 
        st->ss->getinfo_super(st, &info, NULL);
        if (info.space_after != 1 &&
-           !(__le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
-               printf("   Unused Space : before=%llu sectors, after=%llu sectors\n",
-                      info.space_before, info.space_after);
-
-       printf("          State : %s\n",
-              (__le64_to_cpu(sb->resync_offset)+1)? "active":"clean");
+           !(__le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET)) {
+               printf("   Unused Space : before=%llu sectors, ",
+                      info.space_before);
+               if (info.space_after < INT64_MAX)
+                       printf("after=%llu sectors\n", info.space_after);
+               else
+                       printf("after=-%llu sectors DEVICE TOO SMALL\n",
+                              UINT64_MAX - info.space_after);
+       }
+       printf("          State : %s%s\n",
+              (__le64_to_cpu(sb->resync_offset) + 1) ? "active":"clean",
+              (info.space_after > INT64_MAX)       ? " TRUNCATED DEVICE" : "");
        printf("    Device UUID : ");
-       for (i=0; i<16; i++) {
-               if ((i&3)==0 && i != 0)
+       for (i = 0; i < 16; i++) {
+               if ((i & 3)==0 && i != 0)
                        printf(":");
                printf("%02x", sb->device_uuid[i]);
        }
@@ -536,26 +544,6 @@ static void examine_super1(struct supertype *st, char *homehost)
                break;
        }
        printf("\n");
-#if 0
-       /* This turns out to just be confusing */
-       printf("    Array Slot : %d (", __le32_to_cpu(sb->dev_number));
-       for (i = __le32_to_cpu(sb->max_dev); i> 0 ; i--)
-               if (__le16_to_cpu(sb->dev_roles[i-1]) != MD_DISK_ROLE_SPARE)
-                       break;
-       for (d = 0; d < i; d++) {
-               int role = __le16_to_cpu(sb->dev_roles[d]);
-               if (d)
-                       printf(", ");
-               if (role == MD_DISK_ROLE_SPARE)
-                       printf("empty");
-               else
-                       if(role == MD_DISK_ROLE_FAULTY)
-                               printf("failed");
-                       else
-                               printf("%d", role);
-       }
-       printf(")\n");
-#endif
        printf("   Device Role : ");
        role = role_from_sb(sb);
        if (role >= MD_DISK_ROLE_FAULTY)
@@ -576,28 +564,36 @@ static void examine_super1(struct supertype *st, char *homehost)
                        if (role == d)
                                cnt++;
                }
-               if (cnt == 2)
+               if (cnt == 2 && __le32_to_cpu(sb->level) > 0)
                        printf("R");
                else if (cnt == 1)
                        printf("A");
                else if (cnt == 0)
                        printf(".");
-               else
+               else {
                        printf("?");
+                       inconsistent = 1;
+               }
        }
-#if 0
-       /* This is confusing too */
-       faulty = 0;
-       for (i = 0; i< __le32_to_cpu(sb->max_dev); i++) {
-               int role = __le16_to_cpu(sb->dev_roles[i]);
-               if (role == MD_DISK_ROLE_FAULTY)
-                       faulty++;
-       }
-       if (faulty)
-               printf(" %d failed", faulty);
-#endif
        printf(" ('A' == active, '.' == missing, 'R' == replacing)");
        printf("\n");
+       for (d = 0; d < __le32_to_cpu(sb->max_dev); d++) {
+               unsigned int r = __le16_to_cpu(sb->dev_roles[d]);
+               if (r <= MD_DISK_ROLE_MAX &&
+                   r > __le32_to_cpu(sb->raid_disks) + delta_extra)
+                       inconsistent = 1;
+       }
+       if (inconsistent) {
+               printf("WARNING Array state is inconsistent - each number should appear only once\n");
+               for (d = 0; d < __le32_to_cpu(sb->max_dev); d++)
+                       if (__le16_to_cpu(sb->dev_roles[d]) >=
+                           MD_DISK_ROLE_FAULTY)
+                               printf(" %d:-", d);
+                       else
+                               printf(" %d:%d", d,
+                                      __le16_to_cpu(sb->dev_roles[d]));
+               printf("\n");
+       }
 }
 
 static void brief_examine_super1(struct supertype *st, int verbose)
@@ -618,8 +614,7 @@ static void brief_examine_super1(struct supertype *st, int verbose)
 
        printf("ARRAY ");
        if (nm) {
-               printf("/dev/md/");
-               print_escape(nm);
+               printf(DEV_MD_DIR "%s", nm);
                putchar(' ');
        }
        if (verbose && c)
@@ -635,14 +630,10 @@ static void brief_examine_super1(struct supertype *st, int verbose)
                printf("num-devices=%d ", __le32_to_cpu(sb->raid_disks));
        printf("UUID=");
        for (i = 0; i < 16; i++) {
-               if ((i&3)==0 && i != 0)
+               if ((i & 3)==0 && i != 0)
                        printf(":");
                printf("%02x", sb->set_uuid[i]);
        }
-       if (sb->set_name[0]) {
-               printf(" name=");
-               print_quoted(sb->set_name);
-       }
        printf("\n");
 }
 
@@ -653,7 +644,7 @@ static void export_examine_super1(struct supertype *st)
        int len = 32;
        int layout;
 
-       printf("MD_LEVEL=%s\n", map_num(pers, __le32_to_cpu(sb->level)));
+       printf("MD_LEVEL=%s\n", map_num_s(pers, __le32_to_cpu(sb->level)));
        printf("MD_DEVICES=%d\n", __le32_to_cpu(sb->raid_disks));
        for (i = 0; i < 32; i++)
                if (sb->set_name[i] == '\n' || sb->set_name[i] == '\0') {
@@ -689,7 +680,7 @@ static void export_examine_super1(struct supertype *st)
        }
        printf("MD_UUID=");
        for (i = 0; i < 16; i++) {
-               if ((i&3) == 0 && i != 0)
+               if ((i & 3) == 0 && i != 0)
                        printf(":");
                printf("%02x", sb->set_uuid[i]);
        }
@@ -698,7 +689,7 @@ static void export_examine_super1(struct supertype *st)
               __le64_to_cpu(sb->utime) & 0xFFFFFFFFFFULL);
        printf("MD_DEV_UUID=");
        for (i = 0; i < 16; i++) {
-               if ((i&3) == 0 && i != 0)
+               if ((i & 3) == 0 && i != 0)
                        printf(":");
                printf("%02x", sb->device_uuid[i]);
        }
@@ -788,7 +779,7 @@ static int copy_metadata1(struct supertype *st, int from, int to)
                                /* have the header, can calculate
                                 * correct bitmap bytes */
                                bitmap_super_t *bms;
-                               bms = (void*)buf;
+                               bms = (void *)buf;
                                bytes = calc_bitmap_size(bms, 512);
                                if (n > bytes)
                                        n =  bytes;
@@ -843,7 +834,7 @@ err:
 static void detail_super1(struct supertype *st, char *homehost, char *subarray)
 {
        struct mdp_superblock_1 *sb = st->sb;
-       bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
+       bitmap_super_t *bms = (bitmap_super_t *)(((char *)sb) + MAX_SB_SIZE);
        int i;
        int l = homehost ? strlen(homehost) : 0;
 
@@ -856,7 +847,7 @@ static void detail_super1(struct supertype *st, char *homehost, char *subarray)
                printf("\n      Cluster Name : %-64s", bms->cluster_name);
        printf("\n              UUID : ");
        for (i = 0; i < 16; i++) {
-               if ((i&3) == 0 && i != 0)
+               if ((i & 3) == 0 && i != 0)
                        printf(":");
                printf("%02x", sb->set_uuid[i]);
        }
@@ -869,10 +860,6 @@ static void brief_detail_super1(struct supertype *st, char *subarray)
        struct mdp_superblock_1 *sb = st->sb;
        int i;
 
-       if (sb->set_name[0]) {
-               printf(" name=");
-               print_quoted(sb->set_name);
-       }
        printf(" UUID=");
        for (i = 0; i < 16; i++) {
                if ((i & 3) == 0 && i != 0)
@@ -915,7 +902,7 @@ static int examine_badblocks_super1(struct supertype *st, int fd, char *devname)
        }
 
        size = __le16_to_cpu(sb->bblog_size)* 512;
-       if (posix_memalign((void**)&bbl, 4096, size) != 0) {
+       if (posix_memalign((void **)&bbl, 4096, size) != 0) {
                pr_err("could not allocate badblocks list\n");
                return 0;
        }
@@ -963,7 +950,7 @@ static int match_home1(struct supertype *st, char *homehost)
 static void uuid_from_super1(struct supertype *st, int uuid[4])
 {
        struct mdp_superblock_1 *super = st->sb;
-       char *cuuid = (char*)uuid;
+       char *cuuid = (char *)uuid;
        int i;
        for (i = 0; i < 16; i++)
                cuuid[i] = super->set_uuid[i];
@@ -972,9 +959,9 @@ static void uuid_from_super1(struct supertype *st, int uuid[4])
 static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
 {
        struct mdp_superblock_1 *sb = st->sb;
-       struct bitmap_super_s *bsb = (void*)(((char*)sb)+MAX_SB_SIZE);
+       struct bitmap_super_s *bsb = (void *)(((char *)sb) + MAX_SB_SIZE);
        struct misc_dev_info *misc =
-               (void*)(((char*)sb)+MAX_SB_SIZE+BM_SUPER_SIZE);
+               (void *)(((char *)sb) + MAX_SB_SIZE+BM_SUPER_SIZE);
        int working = 0;
        unsigned int i;
        unsigned int role;
@@ -1142,7 +1129,7 @@ static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
        info->recovery_blocked = info->reshape_active;
 
        if (map)
-               for (i=0; i<map_disks; i++)
+               for (i = 0; i < map_disks; i++)
                        map[i] = 0;
        for (i = 0; i < __le32_to_cpu(sb->max_dev); i++) {
                role = __le16_to_cpu(sb->dev_roles[i]);
@@ -1184,7 +1171,7 @@ static struct mdinfo *container_content1(struct supertype *st, char *subarray)
 }
 
 static int update_super1(struct supertype *st, struct mdinfo *info,
-                        char *update, char *devname, int verbose,
+                        enum update_opt update, char *devname, int verbose,
                         int uuid_set, char *homehost)
 {
        /* NOTE: for 'assemble' and 'force' we need to return non-zero
@@ -1193,31 +1180,56 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
         */
        int rv = 0;
        struct mdp_superblock_1 *sb = st->sb;
-       bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
+       bitmap_super_t *bms = (bitmap_super_t *)(((char *)sb) + MAX_SB_SIZE);
 
-       if (strcmp(update, "homehost") == 0 &&
-           homehost) {
-               /* Note that 'homehost' is special as it is really
+       if (update == UOPT_HOMEHOST && homehost) {
+               /*
+                * Note that 'homehost' is special as it is really
                 * a "name" update.
                 */
                char *c;
-               update = "name";
+               update = UOPT_NAME;
                c = strchr(sb->set_name, ':');
                if (c)
-                       strncpy(info->name, c+1, 31 - (c-sb->set_name));
+                       snprintf(info->name, sizeof(info->name), "%s", c + 1);
                else
-                       strncpy(info->name, sb->set_name, 32);
-               info->name[32] = 0;
+                       snprintf(info->name, sizeof(info->name), "%s",
+                                sb->set_name);
        }
 
-       if (strcmp(update, "force-one")==0) {
+       switch (update) {
+       case UOPT_NAME: {
+               int namelen;
+
+               if (!info->name[0])
+                       snprintf(info->name, sizeof(info->name), "%d", info->array.md_minor);
+               memset(sb->set_name, 0, sizeof(sb->set_name));
+
+               namelen = strnlen(homehost, MD_NAME_MAX) + 1 + strnlen(info->name, MD_NAME_MAX);
+               if (homehost &&
+                   strchr(info->name, ':') == NULL &&
+                   namelen < MD_NAME_MAX) {
+                       strcpy(sb->set_name, homehost);
+                       strcat(sb->set_name, ":");
+                       strcat(sb->set_name, info->name);
+               } else {
+                       namelen = min((int)strnlen(info->name, MD_NAME_MAX),
+                                     (int)sizeof(sb->set_name) - 1);
+                       memcpy(sb->set_name, info->name, namelen);
+                       memset(&sb->set_name[namelen], '\0',
+                              sizeof(sb->set_name) - namelen);
+               }
+               break;
+       }
+       case UOPT_SPEC_FORCE_ONE:
                /* Not enough devices for a working array,
                 * so bring this one up-to-date
                 */
                if (sb->events != __cpu_to_le64(info->events))
                        rv = 1;
                sb->events = __cpu_to_le64(info->events);
-       } else if (strcmp(update, "force-array")==0) {
+               break;
+       case UOPT_SPEC_FORCE_ARRAY:
                /* Degraded array and 'force' requests to
                 * maybe need to mark it 'clean'.
                 */
@@ -1230,7 +1242,8 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                                rv = 1;
                        sb->resync_offset = MaxSector;
                }
-       } else if (strcmp(update, "assemble")==0) {
+               break;
+       case UOPT_SPEC_ASSEMBLE: {
                int d = info->disk.number;
                int want;
                if (info->disk.state & (1<<MD_DISK_ACTIVE))
@@ -1263,20 +1276,28 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                                __cpu_to_le64(info->reshape_progress);
                        rv = 1;
                }
-       } else if (strcmp(update, "linear-grow-new") == 0) {
-               unsigned int i;
+               break;
+       }
+       case UOPT_SPEC_LINEAR_GROW_NEW: {
+               int i;
                int fd;
-               unsigned int max = __le32_to_cpu(sb->max_dev);
+               int max = __le32_to_cpu(sb->max_dev);
+
+               if (max > MAX_DEVS)
+                       return -2;
 
                for (i = 0; i < max; i++)
                        if (__le16_to_cpu(sb->dev_roles[i]) >=
                            MD_DISK_ROLE_FAULTY)
                                break;
+               if (i != info->disk.number)
+                       return -2;
                sb->dev_number = __cpu_to_le32(i);
-               info->disk.number = i;
-               if (i >= max) {
-                       sb->max_dev = __cpu_to_le32(max+1);
-               }
+
+               if (i == max)
+                       sb->max_dev = __cpu_to_le32(max + 1);
+               if (i > max)
+                       return -2;
 
                random_uuid(sb->device_uuid);
 
@@ -1293,32 +1314,45 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                                sb->data_size = __cpu_to_le64(
                                        ds - __le64_to_cpu(sb->data_offset));
                        } else {
-                               ds -= 8*2;
-                               ds &= ~(unsigned long long)(4*2-1);
+                               ds -= 8 * 2;
+                               ds &= ~(unsigned long long)(4 * 2 - 1);
                                sb->super_offset = __cpu_to_le64(ds);
                                sb->data_size = __cpu_to_le64(
                                        ds - __le64_to_cpu(sb->data_offset));
                        }
                }
-       } else if (strcmp(update, "linear-grow-update") == 0) {
+               break;
+       }
+       case UOPT_SPEC_LINEAR_GROW_UPDATE: {
                int max = __le32_to_cpu(sb->max_dev);
+               int i = info->disk.number;
+               if (max > MAX_DEVS || i > MAX_DEVS)
+                       return -2;
+               if (i > max)
+                       return -2;
+               if (i == max)
+                       sb->max_dev = __cpu_to_le32(max + 1);
                sb->raid_disks = __cpu_to_le32(info->array.raid_disks);
-               if (info->array.raid_disks > max) {
-                       sb->max_dev = __cpu_to_le32(max+1);
-               }
                sb->dev_roles[info->disk.number] =
                        __cpu_to_le16(info->disk.raid_disk);
-       } else if (strcmp(update, "resync") == 0) {
+               break;
+       }
+       case UOPT_RESYNC:
                /* make sure resync happens */
-               sb->resync_offset = 0ULL;
-       } else if (strcmp(update, "uuid") == 0) {
+               sb->resync_offset = 0;
+               break;
+       case UOPT_UUID:
                copy_uuid(sb->set_uuid, info->uuid, super1.swapuuid);
 
                if (__le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)
                        memcpy(bms->uuid, sb->set_uuid, 16);
-       } else if (strcmp(update, "no-bitmap") == 0) {
+               break;
+       case UOPT_NO_BITMAP:
                sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
-       } else if (strcmp(update, "bbl") == 0) {
+               if (bms->version == BITMAP_MAJOR_CLUSTERED && !IsBitmapDirty(devname))
+                       sb->resync_offset = MaxSector;
+               break;
+       case UOPT_BBL: {
                /* only possible if there is room after the bitmap, or if
                 * there is no bitmap
                 */
@@ -1347,14 +1381,12 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                                bb_offset = bitmap_offset + bm_sectors;
                        while (bb_offset < (long)sb_offset + 8 + 32*2 &&
                               bb_offset + 8+8 <= (long)data_offset)
-                               /* too close to bitmap, and room to grow */
                                bb_offset += 8;
                        if (bb_offset + 8 <= (long)data_offset) {
                                sb->bblog_size = __cpu_to_le16(8);
                                sb->bblog_offset = __cpu_to_le32(bb_offset);
                        }
                } else {
-                       /* 1.0 - Put bbl just before super block */
                        if (bm_sectors && bitmap_offset < 0)
                                space = -bitmap_offset - bm_sectors;
                        else
@@ -1365,7 +1397,9 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                                sb->bblog_offset = __cpu_to_le32((unsigned)-8);
                        }
                }
-       } else if (strcmp(update, "no-bbl") == 0) {
+               break;
+       }
+       case UOPT_NO_BBL:
                if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
                        pr_err("Cannot remove active bbl from %s\n",devname);
                else {
@@ -1373,12 +1407,14 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                        sb->bblog_shift = 0;
                        sb->bblog_offset = 0;
                }
-       } else if (strcmp(update, "force-no-bbl") == 0) {
+               break;
+       case UOPT_FORCE_NO_BBL:
                sb->feature_map &= ~ __cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
                sb->bblog_size = 0;
                sb->bblog_shift = 0;
                sb->bblog_offset = 0;
-       } else if (strcmp(update, "ppl") == 0) {
+               break;
+       case UOPT_PPL: {
                unsigned long long sb_offset = __le64_to_cpu(sb->super_offset);
                unsigned long long data_offset = __le64_to_cpu(sb->data_offset);
                unsigned long long data_size = __le64_to_cpu(sb->data_size);
@@ -1428,37 +1464,26 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                sb->ppl.offset = __cpu_to_le16(offset);
                sb->ppl.size = __cpu_to_le16(space);
                sb->feature_map |= __cpu_to_le32(MD_FEATURE_PPL);
-       } else if (strcmp(update, "no-ppl") == 0) {
+               break;
+       }
+       case UOPT_NO_PPL:
                sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_PPL |
                                                   MD_FEATURE_MUTLIPLE_PPLS);
-       } else if (strcmp(update, "name") == 0) {
-               if (info->name[0] == 0)
-                       sprintf(info->name, "%d", info->array.md_minor);
-               memset(sb->set_name, 0, sizeof(sb->set_name));
-               if (homehost &&
-                   strchr(info->name, ':') == NULL &&
-                   strlen(homehost)+1+strlen(info->name) < 32) {
-                       strcpy(sb->set_name, homehost);
-                       strcat(sb->set_name, ":");
-                       strcat(sb->set_name, info->name);
-               } else {
-                       int namelen;
-
-                       namelen = min((int)strlen(info->name),
-                                     (int)sizeof(sb->set_name) - 1);
-                       memcpy(sb->set_name, info->name, namelen);
-                       memset(&sb->set_name[namelen], '\0',
-                              sizeof(sb->set_name) - namelen);
-               }
-       } else if (strcmp(update, "devicesize") == 0 &&
-                  __le64_to_cpu(sb->super_offset) <
-                  __le64_to_cpu(sb->data_offset)) {
-               /* set data_size to device size less data_offset */
+               break;
+       case UOPT_DEVICESIZE:
+               if (__le64_to_cpu(sb->super_offset) >=
+                   __le64_to_cpu(sb->data_offset))
+                       break;
+               /*
+                * set data_size to device size less data_offset
+                */
                struct misc_dev_info *misc = (struct misc_dev_info*)
                        (st->sb + MAX_SB_SIZE + BM_SUPER_SIZE);
                sb->data_size = __cpu_to_le64(
                        misc->device_size - __le64_to_cpu(sb->data_offset));
-       } else if (strncmp(update, "revert-reshape", 14) == 0) {
+               break;
+       case UOPT_SPEC_REVERT_RESHAPE_NOBACKUP:
+       case UOPT_REVERT_RESHAPE:
                rv = -2;
                if (!(sb->feature_map &
                      __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)))
@@ -1476,7 +1501,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                         * If that couldn't happen, the "-nobackup" version
                         * will be used.
                         */
-                       if (strcmp(update, "revert-reshape-nobackup") == 0 &&
+                       if (update == UOPT_SPEC_REVERT_RESHAPE_NOBACKUP &&
                            sb->reshape_position == 0 &&
                            (__le32_to_cpu(sb->delta_disks) > 0 ||
                             (__le32_to_cpu(sb->delta_disks) == 0 &&
@@ -1500,8 +1525,7 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                         * So we reject a revert-reshape unless the
                         * alignment is good.
                         */
-                       if (__le32_to_cpu(sb->level) >= 4 &&
-                           __le32_to_cpu(sb->level) <= 6) {
+                       if (is_level456(__le32_to_cpu(sb->level))) {
                                reshape_sectors =
                                        __le64_to_cpu(sb->reshape_position);
                                reshape_chunk = __le32_to_cpu(sb->new_chunk);
@@ -1540,28 +1564,40 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
                        }
                done:;
                }
-       } else if (strcmp(update, "_reshape_progress") == 0)
+               break;
+       case UOPT_SPEC__RESHAPE_PROGRESS:
                sb->reshape_position = __cpu_to_le64(info->reshape_progress);
-       else if (strcmp(update, "writemostly") == 0)
+               break;
+       case UOPT_SPEC_WRITEMOSTLY:
                sb->devflags |= WriteMostly1;
-       else if (strcmp(update, "readwrite") == 0)
+               break;
+       case UOPT_SPEC_READWRITE:
                sb->devflags &= ~WriteMostly1;
-       else if (strcmp(update, "failfast") == 0)
+               break;
+       case UOPT_SPEC_FAILFAST:
                sb->devflags |= FailFast1;
-       else if (strcmp(update, "nofailfast") == 0)
+               break;
+       case UOPT_SPEC_NOFAILFAST:
                sb->devflags &= ~FailFast1;
-       else if (strcmp(update, "layout-original") == 0 ||
-                strcmp(update, "layout-alternate") == 0) {
+               break;
+       case UOPT_LAYOUT_ORIGINAL:
+       case UOPT_LAYOUT_ALTERNATE:
+       case UOPT_LAYOUT_UNSPECIFIED:
                if (__le32_to_cpu(sb->level) != 0) {
                        pr_err("%s: %s only supported for RAID0\n",
-                              devname?:"", update);
+                              devname ?: "", map_num(update_options, update));
                        rv = -1;
+               } else if (update == UOPT_LAYOUT_UNSPECIFIED) {
+                       sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_RAID0_LAYOUT);
+                       sb->layout = 0;
                } else {
                        sb->feature_map |= __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT);
-                       sb->layout = __cpu_to_le32(update[7] == 'o' ? 1 : 2);
+                       sb->layout = __cpu_to_le32(update == UOPT_LAYOUT_ORIGINAL ? 1 : 2);
                }
-       } else
+               break;
+       default:
                rv = -1;
+       }
 
        sb->sb_csum = calc_sb_1_csum(sb);
 
@@ -1577,7 +1613,7 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
        char defname[10];
        int sbsize;
 
-       if (posix_memalign((void**)&sb, 4096, SUPER1_SIZE) != 0) {
+       if (posix_memalign((void **)&sb, 4096, SUPER1_SIZE) != 0) {
                pr_err("could not allocate superblock\n");
                return 0;
        }
@@ -1611,8 +1647,8 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
                name = defname;
        }
        if (homehost &&
-           strchr(name, ':')== NULL &&
-           strlen(homehost)+1+strlen(name) < 32) {
+           strchr(name, ':') == NULL &&
+           strlen(homehost) + 1 + strlen(name) < 32) {
                strcpy(sb->set_name, homehost);
                strcat(sb->set_name, ":");
                strcat(sb->set_name, name);
@@ -1691,7 +1727,7 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
 
        if (dk->number >= (int)__le32_to_cpu(sb->max_dev) &&
            __le32_to_cpu(sb->max_dev) < MAX_DEVS)
-               sb->max_dev = __cpu_to_le32(dk->number+1);
+               sb->max_dev = __cpu_to_le32(dk->number + 1);
 
        sb->dev_number = __cpu_to_le32(dk->number);
        sb->devflags = 0; /* don't copy another disks flags */
@@ -1705,7 +1741,10 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
        di->devname = devname;
        di->disk = *dk;
        di->data_offset = data_offset;
-       get_dev_size(fd, NULL, &di->dev_size);
+
+       if (is_fd_valid(fd))
+               get_dev_size(fd, NULL, &di->dev_size);
+
        di->next = NULL;
        *dip = di;
 
@@ -1772,8 +1811,8 @@ static int store_super1(struct supertype *st, int fd)
                return 4;
 
        if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
-               struct bitmap_super_s *bm = (struct bitmap_super_s*)
-                       (((char*)sb)+MAX_SB_SIZE);
+               struct bitmap_super_s *bm;
+               bm = (struct bitmap_super_s *)(((char *)sb) + MAX_SB_SIZE);
                if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC) {
                        locate_bitmap1(st, fd, 0);
                        if (awrite(&afd, bm, sizeof(*bm)) != sizeof(*bm))
@@ -1860,7 +1899,7 @@ static int write_empty_r5l_meta_block(struct supertype *st, int fd)
 
        init_afd(&afd, fd);
 
-       if (posix_memalign((void**)&mb, 4096, META_BLOCK_SIZE) != 0) {
+       if (posix_memalign((void **)&mb, 4096, META_BLOCK_SIZE) != 0) {
                pr_err("Could not allocate memory for the meta block.\n");
                return 1;
        }
@@ -1896,6 +1935,14 @@ fail_to_write:
        return 1;
 }
 
+static bool has_raid0_layout(struct mdp_superblock_1 *sb)
+{
+       if (sb->level == 0 && sb->layout != 0)
+               return true;
+       else
+               return false;
+}
+
 static int write_init_super1(struct supertype *st)
 {
        struct mdp_superblock_1 *sb = st->sb;
@@ -1907,12 +1954,17 @@ static int write_init_super1(struct supertype *st)
        unsigned long long sb_offset;
        unsigned long long data_offset;
        long bm_offset;
-       int raid0_need_layout = 0;
+       bool raid0_need_layout = false;
+
+       /* Since linux kernel v5.4, raid0 always has a layout */
+       if (has_raid0_layout(sb) && get_linux_version() >= 5004000)
+               raid0_need_layout = true;
 
        for (di = st->info; di; di = di->next) {
                if (di->disk.state & (1 << MD_DISK_JOURNAL))
                        sb->feature_map |= __cpu_to_le32(MD_FEATURE_JOURNAL);
-               if (sb->level == 0 && sb->layout != 0) {
+               if (has_raid0_layout(sb) && !raid0_need_layout) {
+
                        struct devinfo *di2 = st->info;
                        unsigned long long s1, s2;
                        s1 = di->dev_size;
@@ -1924,7 +1976,7 @@ static int write_init_super1(struct supertype *st)
                                s2 -= di2->data_offset;
                        s2 /= __le32_to_cpu(sb->chunksize);
                        if (s1 != s2)
-                               raid0_need_layout = 1;
+                               raid0_need_layout = true;
                }
        }
 
@@ -1961,11 +2013,6 @@ static int write_init_super1(struct supertype *st)
                                /* same array, so preserve events and
                                 * dev_number */
                                sb->events = refsb->events;
-                               /* bugs in 2.6.17 and earlier mean the
-                                * dev_number chosen in Manage must be preserved
-                                */
-                               if (get_linux_version() >= 2006018)
-                                       sb->dev_number = refsb->dev_number;
                        }
                        free_super1(refst);
                }
@@ -2110,7 +2157,8 @@ out:
        return rv;
 }
 
-static int compare_super1(struct supertype *st, struct supertype *tst)
+static int compare_super1(struct supertype *st, struct supertype *tst,
+                         int verbose)
 {
        /*
         * return:
@@ -2128,7 +2176,7 @@ static int compare_super1(struct supertype *st, struct supertype *tst)
                return 1;
 
        if (!first) {
-               if (posix_memalign((void**)&first, 4096, SUPER1_SIZE) != 0) {
+               if (posix_memalign((void **)&first, 4096, SUPER1_SIZE) != 0) {
                        pr_err("could not allocate superblock\n");
                        return 1;
                }
@@ -2172,6 +2220,7 @@ static int load_super1(struct supertype *st, int fd, char *devname)
                tst.ss = &super1;
                for (tst.minor_version = 0; tst.minor_version <= 2;
                     tst.minor_version++) {
+                       tst.ignore_hw_compat = st->ignore_hw_compat;
                        switch(load_super1(&tst, fd, devname)) {
                        case 0: super = tst.sb;
                                if (bestvers == -1 ||
@@ -2240,7 +2289,7 @@ static int load_super1(struct supertype *st, int fd, char *devname)
                return 1;
        }
 
-       if (posix_memalign((void**)&super, 4096, SUPER1_SIZE) != 0) {
+       if (posix_memalign((void **)&super, 4096, SUPER1_SIZE) != 0) {
                pr_err("could not allocate superblock\n");
                return 1;
        }
@@ -2278,16 +2327,30 @@ static int load_super1(struct supertype *st, int fd, char *devname)
                free(super);
                return 2;
        }
-       st->sb = super;
 
-       bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
+       bsb = (struct bitmap_super_s *)(((char *)super) + MAX_SB_SIZE);
 
        misc = (struct misc_dev_info*)
-         (((char*)super)+MAX_SB_SIZE+BM_SUPER_SIZE);
+               (((char *)super) + MAX_SB_SIZE+BM_SUPER_SIZE);
        misc->device_size = dsize;
        if (st->data_offset == INVALID_SECTORS)
                st->data_offset = __le64_to_cpu(super->data_offset);
 
+       if (st->minor_version >= 1 &&
+           st->ignore_hw_compat == 0 &&
+           ((role_from_sb(super) != MD_DISK_ROLE_JOURNAL &&
+             dsize < (__le64_to_cpu(super->data_offset) +
+                      __le64_to_cpu(super->size))) ||
+            dsize < (__le64_to_cpu(super->data_offset) +
+                     __le64_to_cpu(super->data_size)))) {
+               if (devname)
+                       pr_err("Device %s is not large enough for data described in superblock\n",
+                              devname);
+               free(super);
+               return 2;
+       }
+       st->sb = super;
+
        /* Now check on the bitmap superblock */
        if ((__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) == 0)
                return 0;
@@ -2306,8 +2369,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
        return 0;
 
  no_bitmap:
-       super->feature_map = __cpu_to_le32(__le32_to_cpu(super->feature_map)
-                                          ~MD_FEATURE_BITMAP_OFFSET);
+       super->feature_map = __cpu_to_le32(__le32_to_cpu(super->feature_map) &
+                                          ~MD_FEATURE_BITMAP_OFFSET);
        return 0;
 }
 
@@ -2365,7 +2428,7 @@ static __u64 avail_size1(struct supertype *st, __u64 devsize,
        if (__le32_to_cpu(super->feature_map) & MD_FEATURE_BITMAP_OFFSET) {
                /* hot-add. allow for actual size of bitmap */
                struct bitmap_super_s *bsb;
-               bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
+               bsb = (struct bitmap_super_s *)(((char *)super) + MAX_SB_SIZE);
                bmspace = calc_bitmap_size(bsb, 4096) >> 9;
        } else if (md_feature_any_ppl_on(super->feature_map)) {
                bmspace = __le16_to_cpu(super->ppl.size);
@@ -2434,7 +2497,7 @@ add_internal_bitmap1(struct supertype *st,
        int creating = 0;
        int len;
        struct mdp_superblock_1 *sb = st->sb;
-       bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
+       bitmap_super_t *bms = (bitmap_super_t *)(((char *)sb) + MAX_SB_SIZE);
        int uuid[4];
 
        if (__le64_to_cpu(sb->data_size) == 0)
@@ -2522,10 +2585,10 @@ add_internal_bitmap1(struct supertype *st,
        max_bits = (room * 512 - sizeof(bitmap_super_t)) * 8;
 
        min_chunk = 4096; /* sub-page chunks don't work yet.. */
-       bits = (size*512)/min_chunk +1;
+       bits = (size * 512) / min_chunk + 1;
        while (bits > max_bits) {
                min_chunk *= 2;
-               bits = (bits+1)/2;
+               bits = (bits + 1) / 2;
        }
        if (chunk == UnSet) {
                /* For practical purpose, 64Meg is a good
@@ -2543,8 +2606,8 @@ add_internal_bitmap1(struct supertype *st,
                /* start bitmap on a 4K boundary with enough space for
                 * the bitmap
                 */
-               bits = (size*512) / chunk + 1;
-               room = ((bits+7)/8 + sizeof(bitmap_super_t) +4095)/4096;
+               bits = (size * 512) / chunk + 1;
+               room = ((bits + 7) / 8 + sizeof(bitmap_super_t) + 4095) / 4096;
                room *= 8; /* convert 4K blocks to sectors */
                offset = -room - bbl_size;
        }
@@ -2578,8 +2641,9 @@ add_internal_bitmap1(struct supertype *st,
 
 static int locate_bitmap1(struct supertype *st, int fd, int node_num)
 {
-       unsigned long long offset;
+       unsigned long long offset, bm_sectors_per_node;
        struct mdp_superblock_1 *sb;
+       bitmap_super_t *bms;
        int mustfree = 0;
        int ret;
 
@@ -2594,8 +2658,13 @@ static int locate_bitmap1(struct supertype *st, int fd, int node_num)
                ret = 0;
        else
                ret = -1;
-       offset = __le64_to_cpu(sb->super_offset);
-       offset += (int32_t) __le32_to_cpu(sb->bitmap_offset) * (node_num + 1);
+
+       offset = __le64_to_cpu(sb->super_offset) + (int32_t)__le32_to_cpu(sb->bitmap_offset);
+       if (node_num) {
+               bms = (bitmap_super_t *)(((char *)sb) + MAX_SB_SIZE);
+               bm_sectors_per_node = calc_bitmap_size(bms, 4096) >> 9;
+               offset += bm_sectors_per_node * node_num;
+       }
        if (mustfree)
                free(sb);
        lseek64(fd, offset<<9, 0);
@@ -2605,7 +2674,7 @@ static int locate_bitmap1(struct supertype *st, int fd, int node_num)
 static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update)
 {
        struct mdp_superblock_1 *sb = st->sb;
-       bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE);
+       bitmap_super_t *bms = (bitmap_super_t *)(((char *)sb) + MAX_SB_SIZE);
        int rv = 0;
        void *buf;
        int towrite, n, len;
@@ -2633,7 +2702,17 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update
                }
 
                if (bms->version == BITMAP_MAJOR_CLUSTERED) {
-                       if (__cpu_to_le32(st->nodes) < bms->nodes) {
+                       if (st->nodes == 1) {
+                               /* the parameter for nodes is not valid */
+                               pr_err("Warning: cluster-md at least needs two nodes\n");
+                               return -EINVAL;
+                       } else if (st->nodes == 0) {
+                               /*
+                                * parameter "--nodes" is not specified, (eg, add a disk to
+                                * clustered raid)
+                                */
+                               break;
+                       } else if (__cpu_to_le32(st->nodes) < bms->nodes) {
                                /*
                                 * Since the nodes num is not increased, no
                                 * need to check the space enough or not,
@@ -2679,7 +2758,10 @@ static int write_bitmap1(struct supertype *st, int fd, enum bitmap_update update
 
        init_afd(&afd, fd);
 
-       locate_bitmap1(st, fd, 0);
+       if (locate_bitmap1(st, fd, 0) < 0) {
+               pr_err("Error: Invalid bitmap\n");
+               return -EINVAL;
+       }
 
        if (posix_memalign(&buf, 4096, 4096))
                return -ENOMEM;
@@ -2753,9 +2835,10 @@ static int validate_geometry1(struct supertype *st, int level,
        unsigned long long ldsize, devsize;
        int bmspace;
        unsigned long long headroom;
+       unsigned long long overhead;
        int fd;
 
-       if (level == LEVEL_CONTAINER) {
+       if (is_container(level)) {
                if (verbose)
                        pr_err("1.x metadata does not support containers\n");
                return 0;
@@ -2785,10 +2868,6 @@ static int validate_geometry1(struct supertype *st, int level,
        close(fd);
 
        devsize = ldsize >> 9;
-       if (devsize < 24) {
-               *freesize = 0;
-               return 0;
-       }
 
        /* creating:  allow suitable space for bitmap or PPL */
        if (consistency_policy == CONSISTENCY_POLICY_PPL)
@@ -2829,15 +2908,27 @@ static int validate_geometry1(struct supertype *st, int level,
        case 0: /* metadata at end.  Round down and subtract space to reserve */
                devsize = (devsize & ~(4ULL*2-1));
                /* space for metadata, bblog, bitmap/ppl */
-               devsize -= 8*2 + 8 + bmspace;
+               overhead = 8*2 + 8 + bmspace;
+               if (devsize < overhead) /* detect underflow */
+                       goto dev_too_small_err;
+               devsize -= overhead;
                break;
        case 1:
        case 2:
+               if (devsize < data_offset) /* detect underflow */
+                       goto dev_too_small_err;
                devsize -= data_offset;
                break;
        }
        *freesize = devsize;
        return 1;
+
+/* Error condition, device cannot even hold the overhead. */
+dev_too_small_err:
+       fprintf(stderr, "device %s is too small (%lluK) for "
+                       "required metadata!\n", subdev, devsize>>1);
+       *freesize = 0;
+       return 0;
 }
 
 void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0)
@@ -2857,16 +2948,16 @@ void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0
 
        copy_uuid(sb->set_uuid, info->uuid, super1.swapuuid);
        sprintf(sb->set_name, "%d", sb0->md_minor);
-       sb->ctime = __cpu_to_le32(info->array.ctime+1);
+       sb->ctime = __cpu_to_le32(info->array.ctime + 1);
        sb->level = __cpu_to_le32(info->array.level);
        sb->layout = __cpu_to_le32(info->array.layout);
        sb->size = __cpu_to_le64(info->component_size);
-       sb->chunksize = __cpu_to_le32(info->array.chunk_size/512);
+       sb->chunksize = __cpu_to_le32(info->array.chunk_size / 512);
        sb->raid_disks = __cpu_to_le32(info->array.raid_disks);
        if (info->array.level > 0)
                sb->data_size = sb->size;
        else
-               sb->data_size = st->ss->avail_size(st, st->devsize/512, 0);
+               sb->data_size = st->ss->avail_size(st, st->devsize / 512, 0);
        sb->resync_offset = MaxSector;
        sb->max_dev = __cpu_to_le32(MD_SB_DISKS);
        sb->dev_number = __cpu_to_le32(info->disk.number);
diff --git a/sysfs.c b/sysfs.c
index 2995713d644d572a447cacdddb1b5b2e043f98a5..20fe1e9efaeda3c9de931a0a21d5c78751aee7ad 100644 (file)
--- a/sysfs.c
+++ b/sysfs.c
@@ -74,6 +74,29 @@ void sysfs_free(struct mdinfo *sra)
        }
 }
 
+/**
+ * sysfs_get_container_devnm() - extract container device name.
+ * @mdi: md_info describes member array, with GET_VERSION option.
+ * @buf: buf to fill, must be MD_NAME_MAX.
+ *
+ * External array version is in format {/,-}<container_devnm>/<array_index>
+ * Extract container_devnm from it and safe it in @buf.
+ */
+void sysfs_get_container_devnm(struct mdinfo *mdi, char *buf)
+{
+       char *p;
+
+       assert(is_subarray(mdi->text_version));
+
+       /* Skip first special sign */
+       snprintf(buf, MD_NAME_MAX, "%s", mdi->text_version + 1);
+
+       /* Remove array index */
+       p = strchr(buf, '/');
+       if (p)
+               *p = 0;
+}
+
 int sysfs_open(char *devnm, char *devname, char *attr)
 {
        char fname[MAX_SYSFS_PATH_LEN];
@@ -148,7 +171,7 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
                strcpy(base, "metadata_version");
                if (load_sys(fname, buf, sizeof(buf)))
                        goto abort;
-               if (strncmp(buf, "none", 4) == 0) {
+               if (str_is_none(buf) == true) {
                        sra->array.major_version =
                                sra->array.minor_version = -1;
                        strcpy(sra->text_version, "");
@@ -244,7 +267,7 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
                        goto abort;
                if (strncmp(buf, "file", 4) == 0)
                        sra->bitmap_offset = 1;
-               else if (strncmp(buf, "none", 4) == 0)
+               else if (str_is_none(buf) == true)
                        sra->bitmap_offset = 0;
                else if (buf[0] == '+')
                        sra->bitmap_offset = strtol(buf+1, NULL, 10);
@@ -655,7 +678,7 @@ int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms)
        return sysfs_set_str(sra, NULL, "safe_mode_delay", delay);
 }
 
-int sysfs_set_array(struct mdinfo *info, int vers)
+int sysfs_set_array(struct mdinfo *info)
 {
        int rv = 0;
        char ver[100];
@@ -664,7 +687,7 @@ int sysfs_set_array(struct mdinfo *info, int vers)
        ver[0] = 0;
        if (info->array.major_version == -1 &&
            info->array.minor_version == -2) {
-               char buf[1024];
+               char buf[SYSFS_MAX_BUF_SIZE];
 
                strcat(strcpy(ver, "external:"), info->text_version);
 
@@ -675,13 +698,11 @@ int sysfs_set_array(struct mdinfo *info, int vers)
                 * version first, and preserve the flag
                 */
                if (sysfs_get_str(info, NULL, "metadata_version",
-                                 buf, 1024) > 0)
+                                 buf, sizeof(buf)) > 0)
                        if (strlen(buf) >= 9 && buf[9] == '-')
                                ver[9] = '-';
 
-               if ((vers % 100) < 2 ||
-                   sysfs_set_str(info, NULL, "metadata_version",
-                                 ver) < 0) {
+               if (sysfs_set_str(info, NULL, "metadata_version", ver) < 0) {
                        pr_err("This kernel does not support external metadata.\n");
                        return 1;
                }
@@ -689,7 +710,7 @@ int sysfs_set_array(struct mdinfo *info, int vers)
        if (info->array.level < 0)
                return 0; /* FIXME */
        rv |= sysfs_set_str(info, NULL, "level",
-                           map_num(pers, info->array.level));
+                           map_num_s(pers, info->array.level));
        if (info->reshape_active && info->delta_disks != UnSet)
                raid_disks -= info->delta_disks;
        rv |= sysfs_set_num(info, NULL, "raid_disks", raid_disks);
@@ -724,9 +745,10 @@ int sysfs_set_array(struct mdinfo *info, int vers)
        }
 
        if (info->consistency_policy == CONSISTENCY_POLICY_PPL) {
-               if (sysfs_set_str(info, NULL, "consistency_policy",
-                                 map_num(consistency_policies,
-                                         info->consistency_policy))) {
+               char *policy = map_num_s(consistency_policies,
+                                           info->consistency_policy);
+
+               if (sysfs_set_str(info, NULL, "consistency_policy", policy)) {
                        pr_err("This kernel does not support PPL. Falling back to consistency-policy=resync.\n");
                        info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
                }
@@ -762,7 +784,7 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
 
        rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
        rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
-       if (sra->array.level != LEVEL_CONTAINER) {
+       if (!is_container(sra->array.level)) {
                if (sra->consistency_policy == CONSISTENCY_POLICY_PPL) {
                        rv |= sysfs_set_num(sra, sd, "ppl_sector", sd->ppl_sector);
                        rv |= sysfs_set_num(sra, sd, "ppl_size", sd->ppl_size);
@@ -802,72 +824,6 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
        return rv;
 }
 
-#if 0
-int sysfs_disk_to_sg(int fd)
-{
-       /* from an open block device, try find and open its corresponding
-        * scsi_generic interface
-        */
-       struct stat st;
-       char path[256];
-       char sg_path[256];
-       char sg_major_minor[10];
-       char *c;
-       DIR *dir;
-       struct dirent *de;
-       int major, minor, rv;
-
-       if (fstat(fd, &st))
-               return -1;
-
-       snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/device",
-                major(st.st_rdev), minor(st.st_rdev));
-
-       dir = opendir(path);
-       if (!dir)
-               return -1;
-
-       de = readdir(dir);
-       while (de) {
-               if (strncmp("scsi_generic:", de->d_name,
-                           strlen("scsi_generic:")) == 0)
-                       break;
-               de = readdir(dir);
-       }
-       closedir(dir);
-
-       if (!de)
-               return -1;
-
-       snprintf(sg_path, sizeof(sg_path), "%s/%s/dev", path, de->d_name);
-       fd = open(sg_path, O_RDONLY);
-       if (fd < 0)
-               return fd;
-
-       rv = read(fd, sg_major_minor, sizeof(sg_major_minor));
-       close(fd);
-       if (rv < 0 || rv == sizeof(sg_major_minor))
-               return -1;
-       else
-               sg_major_minor[rv - 1] = '\0';
-
-       c = strchr(sg_major_minor, ':');
-       *c = '\0';
-       c++;
-       major = strtol(sg_major_minor, NULL, 10);
-       minor = strtol(c, NULL, 10);
-       snprintf(path, sizeof(path), "/dev/.tmp.md.%d:%d:%d",
-                (int) getpid(), major, minor);
-       if (mknod(path, S_IFCHR|0600, makedev(major, minor))==0) {
-                       fd = open(path, O_RDONLY);
-                       unlink(path);
-                       return fd;
-       }
-
-       return -1;
-}
-#endif
-
 int sysfs_disk_to_scsi_id(int fd, __u32 *id)
 {
        /* from an open block device, try to retrieve it scsi_id */
@@ -965,11 +921,11 @@ int sysfs_freeze_array(struct mdinfo *sra)
         * return 0 if this kernel doesn't support 'frozen'
         * return 1 if it worked.
         */
-       char buf[20];
+       char buf[SYSFS_MAX_BUF_SIZE];
 
        if (!sysfs_attribute_available(sra, NULL, "sync_action"))
                return 1; /* no sync_action == frozen */
-       if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0)
+       if (sysfs_get_str(sra, NULL, "sync_action", buf, sizeof(buf)) <= 0)
                return 0;
        if (strcmp(buf, "frozen\n") == 0)
                /* Already frozen */
@@ -1113,7 +1069,7 @@ void sysfsline(char *line)
                if (strncasecmp(w, "name=", 5) == 0) {
                        char *devname = w + 5;
 
-                       if (strncmp(devname, "/dev/md/", 8) == 0) {
+                       if (strncmp(devname, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0) {
                                if (sr->devname)
                                        pr_err("Only give one device per SYSFS line: %s\n",
                                                devname);
@@ -1165,3 +1121,32 @@ void sysfsline(char *line)
        sr->next = sysfs_rules;
        sysfs_rules = sr;
 }
+
+/**
+ * sysfs_is_libata_allow_tpm_enabled() - check if libata allow_tmp is enabled.
+ * @verbose: verbose flag.
+ *
+ * Check if libata allow_tmp flag is set, this is required for SATA Opal Security commands to work.
+ *
+ * Return: true if allow_tpm enable, false otherwise.
+ */
+bool sysfs_is_libata_allow_tpm_enabled(const int verbose)
+{
+       const char *path = "/sys/module/libata/parameters/allow_tpm";
+       const char *expected_value = "1";
+       int fd = open(path, O_RDONLY);
+       char buf[3];
+
+       if (!is_fd_valid(fd)) {
+               pr_vrb("Failed open file descriptor to %s. Cannot check libata allow_tpm param.\n",
+                      path);
+               return false;
+       }
+
+       sysfs_fd_get_str(fd, buf, sizeof(buf));
+       close(fd);
+
+       if (strncmp(buf, expected_value, 1) == 0)
+               return true;
+       return false;
+}
index 5c667d2a71f64bcfb8cac57cb43f7c7e15a91f0d..64b8254ad95316fc0e5b7822aaf16cac5ad6a602 100644 (file)
@@ -8,10 +8,10 @@
 [Unit]
 Description=Manage MD Reshape on /dev/%I
 DefaultDependencies=no
+Documentation=man:mdadm(8)
 
 [Service]
 ExecStart=BINDIR/mdadm --grow --continue /dev/%I
 StandardInput=null
 StandardOutput=null
 StandardError=null
-KillMode=none
index efeb3f639193b90358a7c2bcc9f23c87d9157ca9..e9381125d84822793bd6b85601720117d331a8cd 100644 (file)
@@ -2,6 +2,7 @@
 Description=Activate md array %I even though degraded
 DefaultDependencies=no
 ConditionPathExists=!/sys/devices/virtual/block/%i/md/sync_action
+Documentation=man:mdadm(8)
 
 [Service]
 Type=oneshot
index 854317f117000d9622d432d850b8d9ae08d0d3a0..70892a1f60182bad226d1092ece1ee0e2e6e4722 100644 (file)
@@ -7,11 +7,10 @@
 
 [Unit]
 Description=MD array scrubbing - continuation
-ConditionPathExistsGlob = /var/lib/mdcheck/MD_UUID_*
+ConditionPathExistsGlob=/var/lib/mdcheck/MD_UUID_*
+Documentation=man:mdadm(8)
 
 [Service]
 Type=oneshot
 Environment="MDADM_CHECK_DURATION=6 hours"
-EnvironmentFile=-/run/sysconfig/mdadm
-ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
 ExecStart=/usr/share/mdadm/mdcheck --continue --duration ${MDADM_CHECK_DURATION}
index 3bb3d130801f9c04acad46b70b6b26b819b40b7c..fc4fc4388c6c7377395818837e99102b51f025f4 100644 (file)
@@ -8,10 +8,9 @@
 [Unit]
 Description=MD array scrubbing
 Wants=mdcheck_continue.timer
+Documentation=man:mdadm(8)
 
 [Service]
 Type=oneshot
 Environment="MDADM_CHECK_DURATION=6 hours"
-EnvironmentFile=-/run/sysconfig/mdadm
-ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
 ExecStart=/usr/share/mdadm/mdcheck --duration ${MDADM_CHECK_DURATION}
index 85a3a7c58b842f2268c5e0da6802209de46a6e1d..020cc7e15e1fd65010621b4383ce7125baaba1b6 100644 (file)
@@ -6,23 +6,25 @@
 #  (at your option) any later version.
 
 [Unit]
-Description=MD Metadata Monitor on /dev/%I
+Description=MD Metadata Monitor on %I
 DefaultDependencies=no
 Before=initrd-switch-root.target
+Documentation=man:mdmon(8)
+# Allow mdmon to keep running after switchroot, until a new
+# instance is started.
+IgnoreOnIsolate=true
 
 [Service]
-# mdmon should never complain due to lack of a platform,
-# that is mdadm's job if at all.
-Environment=IMSM_NO_PLATFORM=1
 # The mdmon starting in the initramfs (with dracut at least)
 # cannot see sysfs after root is mounted, so we will have to
 # 'takeover'.  As the '--offroot --takeover' don't hurt when
 # not necessary, are are useful with root-on-md in dracut,
 # have them always present.
-ExecStart=BINDIR/mdmon --offroot --takeover %I
-Type=forking
+ExecStart=BINDIR/mdmon --foreground --offroot --takeover %I
 # Don't set the PIDFile.  It isn't necessary (systemd can work
 # it out) and systemd will remove it when transitioning from
 # initramfs to rootfs.
 #PIDFile=/run/mdadm/%I.pid
-KillMode=none
+# The default slice is system-mdmon.slice which Conflicts
+# with shutdown, causing mdmon to exit early.  So use system.slice.
+Slice=system.slice
index 373955a2e176785006087094035a61e2c2f30c35..ba86b44ed52fdeb409045816e5dfcccd8e7c8b56 100644 (file)
@@ -7,6 +7,7 @@
 
 [Unit]
 Description=Reminder for degraded MD arrays
+Documentation=man:mdadm(8)
 
 [Service]
 Environment=MDADM_MONITOR_ARGS=--scan
index 46f7b8801354cde02fc79c970da43a6432aaac2f..9c364785445b00097539e27ec31366b4f3343e78 100644 (file)
@@ -8,6 +8,7 @@
 [Unit]
 Description=MD array monitor
 DefaultDependencies=no
+Documentation=man:mdadm(8)
 
 [Service]
 Environment=  MDADM_MONITOR_ARGS=--scan
diff --git a/test b/test
index 711a3c7a2076d100ad908543a8b4935fa6882f56..338c2db44fa7c82576b4794c397369c8b909547e 100755 (executable)
--- a/test
+++ b/test
@@ -1,15 +1,19 @@
 #!/bin/bash
 #
 # run test suite for mdadm
-mdadm=$PWD/mdadm
+mdadm=`which mdadm`
 targetdir="/var/tmp"
 logdir="$targetdir"
 config=/tmp/mdadm.conf
 testdir=$PWD/tests
+system_speed_limit=`cat /proc/sys/dev/raid/speed_limit_max`
 devlist=
 
 savelogs=0
 exitonerror=1
+ctrl_c_error=0
+skipbroken=0
+loop=1
 prefix='[0-9][0-9]'
 
 # use loop devices by default if doesn't specify --dev
@@ -17,9 +21,6 @@ DEVTYPE=loop
 INTEGRITY=yes
 LVM_VOLGROUP=mdtest
 
-# make sure to test local mdmon, not system one
-export MDADM_NO_SYSTEMCTL=1
-
 # assume md0, md1, md2 exist in /dev
 md0=/dev/md0
 md1=/dev/md1
@@ -35,9 +36,13 @@ die() {
 
 ctrl_c() {
        exitonerror=1
+       ctrl_c_error=1
+}
+
+restore_system_speed_limit() {
+       echo $system_speed_limit > /proc/sys/dev/raid/speed_limit_max
 }
 
-# mdadm always adds --quiet, and we want to see any unexpected messages
 mdadm() {
        rm -f $targetdir/stderr
        case $* in
@@ -59,10 +64,10 @@ mdadm() {
                                        $mdadm --zero $args > /dev/null
                        }
                done
-               $mdadm 2> $targetdir/stderr --quiet "$@" --auto=yes
+               $mdadm 2> $targetdir/stderr "$@" --auto=yes
                ;;
        * )
-               $mdadm 2> $targetdir/stderr --quiet "$@"
+               $mdadm 2> $targetdir/stderr "$@"
                ;;
        esac
        rv=$?
@@ -79,28 +84,49 @@ mdadm() {
 do_test() {
        _script=$1
        _basename=`basename $_script`
+       _broken=0
+
        if [ -f "$_script" ]
        then
+               if [ -f "${_script}.broken" ]; then
+                       _broken=1
+                       _broken_msg=$(head -n1 "${_script}.broken" | tr -d '\n')
+                       if [ "$skipbroken" == "all" ]; then
+                               return
+                       elif [ "$skipbroken" == "always" ] &&
+                            [[ "$_broken_msg" == *always* ]]; then
+                               return
+                       fi
+               fi
+
                rm -f $targetdir/stderr
-               # this might have been reset: restore the default.
-               echo 2000 > /proc/sys/dev/raid/speed_limit_max
                do_clean
                # source script in a subshell, so it has access to our
                # namespace, but cannot change it.
                echo -ne "$_script... "
                if ( set -ex ; . $_script ) &> $targetdir/log
                then
-                       dmesg | grep -iq "error\|call trace\|segfault" &&
-                               die "dmesg prints errors when testing $_basename!"
+                       if [ -f "${_script}.inject_error" ]; then
+                               echo "dmesg checking is skipped because test inject error"
+                       else
+                               dmesg | grep -iq "error\|call trace\|segfault" &&
+                                       die "dmesg prints errors when testing $_basename!"
+                       fi
                        echo "succeeded"
                        _fail=0
                else
                        save_log fail
                        _fail=1
+                       if [ "$_broken" == "1" ]; then
+                               echo "  (KNOWN BROKEN TEST: $_broken_msg)"
+                       fi
                fi
+               restore_system_speed_limit
                [ "$savelogs" == "1" ] &&
                        mv -f $targetdir/log $logdir/$_basename.log
-               [ "$_fail" == "1" -a "$exitonerror" == "1" ] && exit 1
+               [ "$ctrl_c_error" == "1" ] && exit 1
+               [ "$_fail" == "1" -a "$exitonerror" == "1" \
+                 -a "$_broken" == "0" ] && exit 1
        fi
 }
 
@@ -114,9 +140,13 @@ do_help() {
                --raidtype=                 raid0|linear|raid1|raid456|raid10|ddf|imsm
                --disable-multipath         Disable any tests involving multipath
                --disable-integrity         Disable slow tests of RAID[56] consistency
+               --disable-linear            Disable any tests involving linear
                --logdir=directory          Directory to save all logfiles in
                --save-logs                 Usually use with --logdir together
                --keep-going | --no-error   Don't stop on error, ie. run all tests
+               --loop=N                    Run tests N times (0 to run forever)
+               --skip-broken               Skip tests that are known to be broken
+               --skip-always-broken        Skip tests that are known to always fail
                --dev=loop|lvm|ram|disk     Use loop devices (default), LVM, RAM or disk
                --disks=                    Provide a bunch of physical devices for test
                --volgroup=name             LVM volume group for LVM test
@@ -211,12 +241,24 @@ parse_args() {
                --keep-going | --no-error )
                        exitonerror=0
                        ;;
+               --loop=* )
+                       loop="${i##*=}"
+                       ;;
+               --skip-broken )
+                       skipbroken=all
+                       ;;
+               --skip-always-broken )
+                       skipbroken=always
+                       ;;
                --disable-multipath )
                        unset MULTIPATH
                        ;;
                --disable-integrity )
                        unset INTEGRITY
                        ;;
+               --disable-linear )
+                       unset LINEAR
+                       ;;
                --dev=* )
                        case ${i##*=} in
                        loop )
@@ -257,24 +299,44 @@ parse_args() {
        done
 }
 
+print_warning() {
+       cat <<-EOF
+       Warning! Tests are performed on system level mdadm!
+       If you want to test local build, you need to install it first!
+       EOF
+}
+
 main() {
+       print_warning
        do_setup
 
        echo "Testing on linux-$(uname -r) kernel"
        [ "$savelogs" == "1" ] &&
                echo "Saving logs to $logdir"
-       if [ "x$TESTLIST" != "x" ]
-       then
-               for script in ${TESTLIST[@]}
-               do
-                       do_test $testdir/$script
-               done
-       else
-               for script in $testdir/$prefix $testdir/$prefix*[^~]
-               do
-                       do_test $script
-               done
-       fi
+
+       while true; do
+               if [ "x$TESTLIST" != "x" ]
+               then
+                       for script in ${TESTLIST[@]}
+                       do
+                               do_test $testdir/$script
+                       done
+               else
+                       for script in $testdir/$prefix $testdir/$prefix*[^~]
+                       do
+                               case $script in
+                                *.broken) ;;
+                                *)
+                                    do_test $script
+                                esac
+                       done
+               fi
+
+               let loop=$loop-1
+               if [ "$loop" == "0" ]; then
+                       break
+               fi
+       done
 
        exit 0
 }
diff --git a/tests/00confnames b/tests/00confnames
new file mode 100644 (file)
index 0000000..191a905
--- /dev/null
@@ -0,0 +1,56 @@
+set -x -e
+. tests/templates/names_template
+
+# Test how <devname> is handled during Incremental assemblation with
+# config file and ARRAYLINE specified.
+
+names_create "/dev/md/name"
+local _UUID="$(mdadm -D --export /dev/md127 | grep MD_UUID | cut -d'=' -f2)"
+[[ "$_UUID" == "" ]] && echo "Cannot obtain UUID for $DEVNODE_NAME" && exit 1
+
+
+# 1. <devname> definition consistent with metadata name.
+names_make_conf $_UUID "/dev/md/name" $config
+mdadm -S "/dev/md127"
+mdadm -I $dev0 --config=$config
+names_verify "/dev/md127" "name" "name"
+mdadm -S "/dev/md127"
+
+# 2. Same as 1, but use short name form of <devname>.
+names_make_conf $_UUID "name" $config
+mdadm -I $dev0 --config=$config
+names_verify "/dev/md127" "name" "name"
+mdadm -S "/dev/md127"
+
+# 3. Same as 1, but use different <devname> than metadata provides.
+names_make_conf $_UUID "/dev/md/other" $config
+mdadm -I $dev0 --config=$config
+names_verify "/dev/md127" "other" "name"
+mdadm -S "/dev/md127"
+
+# 4. Same as 3, but use short name form of <devname>.
+names_make_conf $_UUID "other" $config
+mdadm -I $dev0 --config=$config
+names_verify "/dev/md127" "other" "name"
+mdadm -S "/dev/md127"
+
+# 5. Force particular node creation by setting <devname> to /dev/mdX.
+# Link is not created in this case.
+names_make_conf $_UUID "/dev/md4" $config
+mdadm -I $dev0 --config=$config
+names_verify "/dev/md4" "empty" "name"
+mdadm -S "/dev/md4"
+
+# 6. <devname> with some special symbols and locales.
+# <devname> should be ignored.
+names_make_conf $_UUID "tźż-\.,<>st+-" $config
+mdadm -I $dev0 --config=$config
+names_verify "/dev/md127" "name" "name"
+mdadm -S "/dev/md127"
+
+# 7. No <devname> set.
+# Metadata name and default node used.
+names_make_conf $_UUID "empty" $config
+mdadm -I $dev0 --config=$config
+names_verify "/dev/md127" "name" "name"
+mdadm -S "/dev/md127"
diff --git a/tests/00createnames b/tests/00createnames
new file mode 100644 (file)
index 0000000..a95e7d2
--- /dev/null
@@ -0,0 +1,44 @@
+set -x -e
+. tests/templates/names_template
+
+# Test how <devname> and --name= are handled for create mode.
+
+# The most trivial case.
+names_create "/dev/md/name"
+names_verify "/dev/md127" "name" "name"
+mdadm -S "/dev/md127"
+
+names_create "name"
+names_verify "/dev/md127" "name" "name"
+mdadm -S "/dev/md127"
+
+# Use 'mdX' as name.
+names_create "/dev/md/md0"
+names_verify "/dev/md127" "md0" "md0"
+mdadm -S "/dev/md127"
+
+names_create "md0"
+names_verify "/dev/md127" "md0" "md0"
+mdadm -S "/dev/md127"
+
+# <devnode> is used to create MD_DEVNAME but, name is used to create MD_NAME.
+names_create "/dev/md/devnode" "name"
+names_verify "/dev/md127" "devnode" "name"
+mdadm -S "/dev/md127"
+
+names_create "devnode" "name"
+names_verify "/dev/md127" "devnode" "name"
+mdadm -S "/dev/md127"
+
+# Devnode points to /dev/ directory. MD_DEVNAME doesn't exist.
+names_create "/dev/md0"
+names_verify "/dev/md0" "empty" "0"
+mdadm -S "/dev/md0"
+
+# Devnode points to /dev/ directory and name is set.
+names_create "/dev/md0" "name"
+names_verify "/dev/md0" "empty" "name"
+mdadm -S "/dev/md0"
+
+# Devnode is a special ignore keyword. Should be rejected.
+names_create "<ignore>" "name", "true"
index e3ac6555c9dd4d6365f72db5559142f08cb69a4c..5a1160851af2cfb2a0f09423a2b159793c43f00d 100644 (file)
@@ -1,6 +1,11 @@
 
 # create a simple linear
 
+if [ "$LINEAR" != "yes" ]; then
+  echo -ne 'skipping... '
+  exit 0
+fi
+
 mdadm -CR $md0 -l linear -n3 $dev0 $dev1 $dev2
 check linear
 testdev $md0 3 $mdsize2_l 1
index 7a066d8fb2b76bf92c4c32e7007847b144d7cd1b..d996befc5e8b486e7485c0f2e7b6391aba351f29 100644 (file)
@@ -4,7 +4,13 @@ set -x -e
 conf=$targetdir/mdadm.conf
 echo "CREATE names=yes" > $conf
 
-for i in linear raid0 raid1 raid4 raid5 raid6
+levels=(raid0 raid1 raid4 raid5 raid6)
+
+if [ "$LINEAR" == "yes" ]; then
+  levels+=( linear )
+fi
+
+for i in ${levels[@]}
 do
   mdadm -CR --config $conf /dev/md/$i -l $i -n 4 $dev4 $dev3 $dev2 $dev1
   check $i
index 8bc18985f91a19d481129614aa4a24dca633b3ce..6407c320fd65ccd743adbd0fb42b3df90a4836be 100644 (file)
@@ -6,11 +6,9 @@ check raid0
 testdev $md0 3 $mdsize2_l 512
 mdadm -S $md0
 
-# now with version-0.90 superblock
+# verify raid0 with layouts fail for 0.90
 mdadm -CR $md0 -e0.90 -l0 -n4 $dev0 $dev1 $dev2 $dev3
-check raid0
-testdev $md0 4 $mdsize0 512
-mdadm -S $md0
+check opposite_result
 
 # now with no superblock
 mdadm -B $md0 -l0 -n5 $dev0 $dev1 $dev2 $dev3 $dev4
@@ -18,12 +16,16 @@ check raid0
 testdev $md0 5 $size 512
 mdadm -S $md0
 
+if [ "$LINEAR" != "yes" ]; then
+  echo -ne 'skipping... '
+  exit 0
+fi
 
 # now same again with different chunk size
 for chunk in 4 32 256
 do
-  mdadm -CR $md0 -e0.90 -l raid0 --chunk $chunk -n3 $dev0 $dev1 $dev2
-  check raid0
+  mdadm -CR $md0 -e0.90 -l linear --chunk $chunk -n3 $dev0 $dev1 $dev2
+  check linear
   testdev $md0 3 $mdsize0 $chunk
   mdadm -S $md0
 
diff --git a/tests/00raid5-zero b/tests/00raid5-zero
new file mode 100644 (file)
index 0000000..7d0f05a
--- /dev/null
@@ -0,0 +1,12 @@
+
+if mdadm -CfR $md0 -l 5 -n3 $dev0 $dev1 $dev2 --write-zeroes ; then
+  check nosync
+  echo check > /sys/block/md0/md/sync_action;
+  check wait
+elif grep "zeroing [^ ]* failed: Operation not supported" \
+     $targetdir/stderr; then
+  echo "write-zeros not supported, skipping"
+else
+  echo >&2 "ERROR: mdadm return failure without not supported message"
+  exit 1
+fi
index 28b0fa13f8155acbe8cc83fc67e998b28fba52e9..80b63629e4f9a959afaf9e570e073000c287bcf8 100644 (file)
@@ -1,13 +1,24 @@
 #!/bin/bash
 
+levels=(raid0 raid1 raid4 raid5 raid6 raid10)
+
+if [ "$LINEAR" == "yes" ]; then
+  levels+=( linear )
+fi
+
 for metadata in 0.9 1.0 1.1 1.2
 do
-       for level in linear raid0 raid1 raid4 raid5 raid6 raid10
+       for level in ${levels[@]}
        do
+               if [[ $metadata == "0.9" && $level == "raid0" ]];
+               then
+                       continue
+               fi
                mdadm -CR $md0 -l $level -n 4 --metadata=$metadata \
                        $dev1 $dev2 $dev3 $dev4 --assume-clean
                check nosync
                check $level
+               udevadm settle
                mdadm -ro $md0
                check readonly
                state=$(cat /sys/block/md0/md/array_state)
index 873dba585e58a3d6c222a0f4177ee071032f14bd..c210d6e747f200b6b0960bbf59f4eda6bfeea8bd 100644 (file)
@@ -17,11 +17,7 @@ check wait
 mdadm $md0 --fail $dev0
 mdadm $md0 --remove $dev3 $dev0
 check recovery
-check state _UUU
-
-mdadm $md0 -a $dev3
-check recovery
 check wait
 check state UUUU
 
-mdadm -S $md0
\ No newline at end of file
+mdadm -S $md0
index e05c219d113a5715b046921687ae1e9bc8b7eef6..d17e2326d13f52daac75698015405cdd81f034d7 100644 (file)
@@ -1,6 +1,11 @@
 
 # create a liner array, and add more drives to to.
 
+if [ "$LINEAR" != "yes" ]; then
+  echo -ne 'skipping... '
+  exit 0
+fi
+
 for e in 0.90 1 1.1 1.2
 do
   case $e in
@@ -20,4 +25,6 @@ do
   testdev $md0 3 $sz 1
 
   mdadm -S $md0
+  mdadm --zero /dev/loop2
+  mdadm --zero /dev/loop3
 done
index f10a1a48ee5cdb2b1d23456c43deffc009dce9bd..38880a7fed101dcc072a7765d7dc54159a1d547c 100644 (file)
@@ -6,7 +6,13 @@ set -x -e
 # Here just test that a partly "-I" assembled array can
 # be completed with "-A"
 
-for l in 0 1 5 linear
+levels=(raid0 raid1 raid5)
+
+if [ "$LINEAR" == "yes" ]; then
+  levels+=( linear )
+fi
+
+for l in ${levels[@]}
 do
   mdadm -CR $md0 -l $l -n5 $dev0 $dev1 $dev2 $dev3 $dev4 --assume-clean
   mdadm -S md0
index 6744e3221062943989a2a74817c88e8de90d6ca0..f7c29e8c1ab6a4c23e9051f69300f0e3bcf72cd5 100644 (file)
@@ -64,13 +64,17 @@ mdadm --assemble --scan --config=$conf $md2
 $tst
 mdadm -S $md2
 
+if [ "$LINEAR" != "yes" ]; then
+  echo -ne 'skipping... '
+  exit 0
+fi
 
 ### Now for version 0...
 
 mdadm --zero-superblock $dev0 $dev1 $dev2
-mdadm -CR $md2 -l0 --metadata=0.90 -n3 $dev0 $dev1 $dev2
-check raid0
-tst="testdev $md2 3 $mdsize0 512"
+mdadm -CR $md2 -llinear --metadata=0.90 -n3 $dev0 $dev1 $dev2
+check linear
+tst="testdev $md2 3 $mdsize0 1"
 $tst
 
 uuid=`mdadm -Db $md2 | sed 's/.*UUID=//'`
index 73ee3b9fed918de030149f9a836ac469a696eefe..c495f34a0a79d62d763319d18482eabd342311b7 100644 (file)
@@ -1,7 +1,13 @@
 
 # create a raid0, re-assemble with a different super-minor
-mdadm -CR -e 0.90 $md0 -l0 -n3 $dev0 $dev1 $dev2
-testdev $md0 3 $mdsize0 512
+
+if [ "$LINEAR" != "yes" ]; then
+  echo -ne 'skipping... '
+  exit 0
+fi
+
+mdadm -CR -e 0.90 $md0 -llinear -n3 $dev0 $dev1 $dev2
+testdev $md0 3 $mdsize0 1
 minor1=`mdadm -E $dev0 | sed -n -e 's/.*Preferred Minor : //p'`
 mdadm -S /dev/md0
 
diff --git a/tests/04r5swap.broken b/tests/04r5swap.broken
new file mode 100644 (file)
index 0000000..e38987d
--- /dev/null
@@ -0,0 +1,7 @@
+always fails
+
+Fails with errors:
+
+  mdadm: /dev/loop0 has no superblock - assembly aborted
+
+   ERROR: no recovery happening
index 232fc1ffff4be7babad8c54bb76165b4a5099fba..2b72a303b6a05b3521d4ab6d8308209716f2025c 100644 (file)
@@ -8,10 +8,14 @@ set -xe
 
 dlist="$dev0 $dev1 $dev2 $dev3"
 
-for ls in raid0/4 linear/4 raid1/1 raid5/3 raid6/2
+for ls in linear/4 raid1/1 raid5/3 raid6/2
 do
   s=${ls#*/} l=${ls%/*}
-  mdadm -CR --assume-clean -e 0.90 $md0 --level $l -n 4 -c 64 $dlist
+  if [[ $l == 'raid1' ]]; then
+       mdadm -CR --assume-clean -e 0.90 $md0 --level $l -n 4 $dlist
+  else
+       mdadm -CR --assume-clean -e 0.90 $md0 --level $l -n 4 -c 64 $dlist
+  fi
   testdev $md0 $s 19904 64
   mdadm -S $md0
   mdadm -A $md0 --update=metadata $dlist
index 4d5e824d3e0e9c6ca97c7c3d9325f41a427d3a66..86eaab69e3a1c850bcc01cec353f85d18b975e00 100644 (file)
@@ -3,8 +3,8 @@ set -x
 # create an array with a name
 
 mdadm -CR $md0 -l0 -n2 --metadata=1 --name="Fred" $dev0 $dev1
-mdadm -E $dev0 | grep 'Name : [^:]*:Fred ' > /dev/null || exit 1
-mdadm -D $md0 | grep 'Name : [^:]*:Fred ' > /dev/null || exit 1
+mdadm -E $dev0 | grep 'Name : Fred' > /dev/null || exit 1
+mdadm -D $md0 | grep 'Name : Fred' > /dev/null || exit 1
 mdadm -S $md0
 
 mdadm -A $md0 --name="Fred" $devlist
diff --git a/tests/07autoassemble.broken b/tests/07autoassemble.broken
new file mode 100644 (file)
index 0000000..8be0940
--- /dev/null
@@ -0,0 +1,8 @@
+always fails
+
+Prints lots of messages, but the array doesn't assemble. Error
+possibly related to:
+
+  mdadm: /dev/md/1 is busy - skipping
+  mdadm: no recogniseable superblock on /dev/md/testing:0
+  mdadm: /dev/md/2 is busy - skipping
diff --git a/tests/07autodetect.broken b/tests/07autodetect.broken
new file mode 100644 (file)
index 0000000..294954a
--- /dev/null
@@ -0,0 +1,5 @@
+always fails
+
+Fails with error:
+
+    ERROR: no resync happening
diff --git a/tests/07changelevelintr.broken b/tests/07changelevelintr.broken
new file mode 100644 (file)
index 0000000..284b490
--- /dev/null
@@ -0,0 +1,9 @@
+always fails
+
+Fails with errors:
+
+  mdadm: this change will reduce the size of the array.
+         use --grow --array-size first to truncate array.
+         e.g. mdadm --grow /dev/md0 --array-size 56832
+
+  ERROR: no reshape happening
diff --git a/tests/07changelevels.broken b/tests/07changelevels.broken
new file mode 100644 (file)
index 0000000..9b930d9
--- /dev/null
@@ -0,0 +1,9 @@
+always fails
+
+Fails with errors:
+
+    mdadm: /dev/loop0 is smaller than given size. 18976K < 19968K + metadata
+    mdadm: /dev/loop1 is smaller than given size. 18976K < 19968K + metadata
+    mdadm: /dev/loop2 is smaller than given size. 18976K < 19968K + metadata
+
+    ERROR: /dev/md0 isn't a block device.
diff --git a/tests/07reshape5intr.broken b/tests/07reshape5intr.broken
new file mode 100644 (file)
index 0000000..efe52a6
--- /dev/null
@@ -0,0 +1,45 @@
+always fails
+
+This patch, recently added to md-next causes the test to always fail:
+
+7e6ba434cc60 ("md: don't unregister sync_thread with reconfig_mutex
+held")
+
+The new error is simply:
+
+   ERROR: no reshape happening
+
+Before the patch, the error seen is below.
+
+--
+
+fails infrequently
+
+Fails roughly 1 in 4 runs with errors:
+
+    mdadm: Merging with already-assembled /dev/md/0
+    mdadm: cannot re-read metadata from /dev/loop6 - aborting
+
+    ERROR: no reshape happening
+
+Also have seen a random deadlock:
+
+     INFO: task mdadm:109702 blocked for more than 30 seconds.
+           Not tainted 5.18.0-rc3-eid-vmlocalyes-dbg-00095-g3c2b5427979d #2040
+     "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+     task:mdadm           state:D stack:    0 pid:109702 ppid:     1 flags:0x00004000
+     Call Trace:
+      <TASK>
+      __schedule+0x67e/0x13b0
+      schedule+0x82/0x110
+      mddev_suspend+0x2e1/0x330
+      suspend_lo_store+0xbd/0x140
+      md_attr_store+0xcb/0x130
+      sysfs_kf_write+0x89/0xb0
+      kernfs_fop_write_iter+0x202/0x2c0
+      new_sync_write+0x222/0x330
+      vfs_write+0x3bc/0x4d0
+      ksys_write+0xd9/0x180
+      __x64_sys_write+0x43/0x50
+      do_syscall_64+0x3b/0x90
+      entry_SYSCALL_64_after_hwframe+0x44/0xae
diff --git a/tests/07revert-grow.broken b/tests/07revert-grow.broken
new file mode 100644 (file)
index 0000000..9b6db86
--- /dev/null
@@ -0,0 +1,31 @@
+always fails
+
+This patch, recently added to md-next causes the test to always fail:
+
+7e6ba434cc60 ("md: don't unregister sync_thread with reconfig_mutex held")
+
+The errors are:
+
+    mdadm: No active reshape to revert on /dev/loop0
+    ERROR: active raid5 not found
+
+Before the patch, the error seen is below.
+
+--
+
+fails rarely
+
+Fails about 1 in every 30 runs with errors:
+
+    mdadm: Merging with already-assembled /dev/md/0
+    mdadm: backup file /tmp/md-backup inaccessible: No such file or directory
+    mdadm: failed to add /dev/loop1 to /dev/md/0: Invalid argument
+    mdadm: failed to add /dev/loop2 to /dev/md/0: Invalid argument
+    mdadm: failed to add /dev/loop3 to /dev/md/0: Invalid argument
+    mdadm: failed to add /dev/loop0 to /dev/md/0: Invalid argument
+    mdadm: /dev/md/0 assembled from 1 drive - need all 5 to start it
+            (use --run to insist).
+
+    grep: /sys/block/md*/md/sync_action: No such file or directory
+
+    ERROR: active raid5 not found
diff --git a/tests/07revert-shrink.broken b/tests/07revert-shrink.broken
new file mode 100644 (file)
index 0000000..c33c39e
--- /dev/null
@@ -0,0 +1,9 @@
+always fails
+
+Fails with errors:
+
+    mdadm: this change will reduce the size of the array.
+           use --grow --array-size first to truncate array.
+           e.g. mdadm --grow /dev/md0 --array-size 53760
+
+    ERROR: active raid5 not found
diff --git a/tests/07testreshape5.broken b/tests/07testreshape5.broken
new file mode 100644 (file)
index 0000000..a8ce03e
--- /dev/null
@@ -0,0 +1,12 @@
+always fails
+
+Test seems to run 'test_stripe' at $dir directory, but $dir is never
+set. If $dir is adjusted to $PWD, the test still fails with:
+
+    mdadm: /dev/loop2 is not suitable for this array.
+    mdadm: create aborted
+    ++ return 1
+    ++ cmp -s -n 8192 /dev/md0 /tmp/RandFile
+    ++ echo cmp failed
+    cmp failed
+    ++ exit 2
diff --git a/tests/09imsm-assemble.broken b/tests/09imsm-assemble.broken
new file mode 100644 (file)
index 0000000..a6d4d5c
--- /dev/null
@@ -0,0 +1,6 @@
+fails infrequently
+
+Fails roughly 1 in 10 runs with errors:
+
+    mdadm: /dev/loop2 is still in use, cannot remove.
+    /dev/loop2 removal from /dev/md/container should have succeeded
diff --git a/tests/09imsm-create-fail-rebuild.broken b/tests/09imsm-create-fail-rebuild.broken
new file mode 100644 (file)
index 0000000..40c4b29
--- /dev/null
@@ -0,0 +1,5 @@
+always fails
+
+Fails with error:
+
+    **Error**: Array size mismatch - expected 3072, actual 16384
diff --git a/tests/09imsm-overlap b/tests/09imsm-overlap
deleted file mode 100644 (file)
index ff5d209..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-
-. tests/env-imsm-template
-
-# create raid arrays with varying degress of overlap
-mdadm -CR $container -e imsm -n 6 $dev0 $dev1 $dev2 $dev3 $dev4 $dev5
-imsm_check container 6
-
-size=1024
-level=1
-num_disks=2
-mdadm -CR $member0 $dev0 $dev1 -n $num_disks -l $level -z $size
-mdadm -CR $member1 $dev1 $dev2 -n $num_disks -l $level -z $size
-mdadm -CR $member2 $dev2 $dev3 -n $num_disks -l $level -z $size
-mdadm -CR $member3 $dev3 $dev4 -n $num_disks -l $level -z $size
-mdadm -CR $member4 $dev4 $dev5 -n $num_disks -l $level -z $size
-
-udevadm settle
-
-offset=0
-imsm_check member $member0 $num_disks $level $size 1024 $offset
-offset=$((offset+size+4096))
-imsm_check member $member1 $num_disks $level $size 1024 $offset
-offset=$((offset+size+4096))
-imsm_check member $member2 $num_disks $level $size 1024 $offset
-offset=$((offset+size+4096))
-imsm_check member $member3 $num_disks $level $size 1024 $offset
-offset=$((offset+size+4096))
-imsm_check member $member4 $num_disks $level $size 1024 $offset
diff --git a/tests/09imsm-overlap.broken b/tests/09imsm-overlap.broken
new file mode 100644 (file)
index 0000000..e7ccab7
--- /dev/null
@@ -0,0 +1,7 @@
+always fails
+
+Fails with errors:
+
+    **Error**: Offset mismatch - expected 15360, actual 0
+    **Error**: Offset mismatch - expected 15360, actual 0
+    /dev/md/vol3 failed check
diff --git a/tests/10ddf-assemble-missing.broken b/tests/10ddf-assemble-missing.broken
new file mode 100644 (file)
index 0000000..bfd8d10
--- /dev/null
@@ -0,0 +1,6 @@
+always fails
+
+Fails with errors:
+
+    ERROR: /dev/md/vol0 has unexpected state on /dev/loop10
+    ERROR: unexpected number of online disks on /dev/loop10
diff --git a/tests/10ddf-fail-create-race.broken b/tests/10ddf-fail-create-race.broken
new file mode 100644 (file)
index 0000000..6c0df02
--- /dev/null
@@ -0,0 +1,7 @@
+usually fails
+
+Fails about 9 out of 10 times with many errors:
+
+    mdadm: cannot open MISSING: No such file or directory
+    ERROR: non-degraded array found
+    ERROR: disk 0 not marked as failed in meta data
diff --git a/tests/10ddf-fail-two-spares.broken b/tests/10ddf-fail-two-spares.broken
new file mode 100644 (file)
index 0000000..eeea56d
--- /dev/null
@@ -0,0 +1,5 @@
+fails infrequently
+
+Fails roughly 1 in 3 with error:
+
+   ERROR: /dev/md/vol1 should be optimal in meta data
diff --git a/tests/10ddf-incremental-wrong-order.broken b/tests/10ddf-incremental-wrong-order.broken
new file mode 100644 (file)
index 0000000..a5af3ba
--- /dev/null
@@ -0,0 +1,9 @@
+always fails
+
+Fails with errors:
+    ERROR: sha1sum of /dev/md/vol0 has changed
+    ERROR: /dev/md/vol0 has unexpected state on /dev/loop10
+    ERROR: unexpected number of online disks on /dev/loop10
+    ERROR: /dev/md/vol0 has unexpected state on /dev/loop8
+    ERROR: unexpected number of online disks on /dev/loop8
+    ERROR: sha1sum of /dev/md/vol0 has changed
index 1edd50e4aba557a1886647cb3d0844c3a72ab69f..be20ab819d1b655da89a0df213e11f2d00f86a9b 100644 (file)
@@ -10,7 +10,6 @@ spare_list="$dev4"
 # Before: RAID 1 volume, 2 disks, 64k chunk size
 vol0_level=1
 vol0_comp_size=$((5 * 1024))
-vol0_chunk=64
 vol0_num_comps=$((num_disks - 1))
 vol0_offset=0
 
diff --git a/tests/14imsm-r1_2d-grow-r1_3d.broken b/tests/14imsm-r1_2d-grow-r1_3d.broken
new file mode 100644 (file)
index 0000000..4ef1d40
--- /dev/null
@@ -0,0 +1,5 @@
+always fails
+
+Fails with error:
+
+    mdadm/tests/func.sh: line 325: dvsize/chunk: division by 0 (error token is "chunk")
index d8296815d35068e8d836e237c1f50c10d35a5fee..27002e1c8363ee255ee8704cab1d0567dfd67e24 100644 (file)
@@ -10,7 +10,6 @@ device_list="$dev0 $dev1"
 # Before: RAID 1 volume, 2 disks, 64k chunk size
 vol0_level=1
 vol0_comp_size=$((5 * 1024))
-vol0_chunk=64
 vol0_num_comps=$((num_disks - 1))
 vol0_offset=0
 
diff --git a/tests/14imsm-r1_2d-takeover-r0_2d.broken b/tests/14imsm-r1_2d-takeover-r0_2d.broken
new file mode 100644 (file)
index 0000000..89cd4e5
--- /dev/null
@@ -0,0 +1,6 @@
+always fails
+
+Fails with error:
+
+    tests/func.sh: line 325: dvsize/chunk: division by 0 (error token
+               is "chunk")
index 72e4173eb209a0f83586cdf0401f948ebfc8518b..e38ed89ba120a2d3e0ba9770a70e303715d17739 100644 (file)
@@ -12,7 +12,7 @@ check wait
 imsm_check container $vol0_num_comps
 
 # Create RAID 1 volume
-mdadm --create --run $member0 --auto=md --level=1 --size=$vol0_comp_size --chunk=64 --raid-disks=$((vol0_num_comps + 1)) $dev0 missing
+mdadm --create --run $member0 --auto=md --level=1 --size=$vol0_comp_size --raid-disks=$((vol0_num_comps + 1)) $dev0 missing
 check wait
 
 # Test the member0
diff --git a/tests/18imsm-r10_4d-takeover-r0_2d.broken b/tests/18imsm-r10_4d-takeover-r0_2d.broken
new file mode 100644 (file)
index 0000000..a27399f
--- /dev/null
@@ -0,0 +1,5 @@
+fails rarely
+
+Fails about 1 run in 100 with message:
+
+   ERROR:  size is wrong for /dev/md/vol0: 2 * 5120 (chunk=128) = 20480, not 0
index fd5852ede9bc3cb340611308cac78b41b63b7e9c..049f19c95ecfa8a707beffe97243686638e9d56e 100644 (file)
@@ -9,7 +9,6 @@ device_list="$dev0 $dev1"
 # Before: RAID 1 volume, 2 disks
 vol0_level=1
 vol0_comp_size=$((5 * 1024))
-vol0_chunk=64
 vol0_num_comps=$(( $num_disks - 1 ))
 vol0_offset=0
 
diff --git a/tests/18imsm-r1_2d-takeover-r0_1d.broken b/tests/18imsm-r1_2d-takeover-r0_1d.broken
new file mode 100644 (file)
index 0000000..aa1982e
--- /dev/null
@@ -0,0 +1,6 @@
+always fails
+
+Fails with error:
+
+    tests/func.sh: line 325: dvsize/chunk: division by 0 (error token
+                       is "chunk")
diff --git a/tests/19raid6auto-repair.broken b/tests/19raid6auto-repair.broken
new file mode 100644 (file)
index 0000000..e91a142
--- /dev/null
@@ -0,0 +1,5 @@
+always fails
+
+Fails with:
+
+    "should detect errors"
diff --git a/tests/19raid6repair.broken b/tests/19raid6repair.broken
new file mode 100644 (file)
index 0000000..e91a142
--- /dev/null
@@ -0,0 +1,5 @@
+always fails
+
+Fails with:
+
+    "should detect errors"
diff --git a/tests/23rdev-lifetime b/tests/23rdev-lifetime
new file mode 100644 (file)
index 0000000..03b61de
--- /dev/null
@@ -0,0 +1,34 @@
+devname=${dev0##*/}
+devt=`cat /sys/block/$devname/dev`
+pid=""
+runtime=2
+
+clean_up_test() {
+       kill -9 $pid
+       echo clear > /sys/block/md0/md/array_state
+}
+
+trap 'clean_up_test' EXIT
+
+add_by_sysfs() {
+        while true; do
+                echo $devt > /sys/block/md0/md/new_dev
+        done
+}
+
+remove_by_sysfs(){
+        while true; do
+                echo remove > /sys/block/md0/md/dev-${devname}/state
+        done
+}
+
+echo md0 > /sys/module/md_mod/parameters/new_array || die "create md0 failed"
+
+add_by_sysfs &
+pid="$pid $!"
+
+remove_by_sysfs &
+pid="$pid $!"
+
+sleep $runtime
+exit 0
diff --git a/tests/24raid10deadlock b/tests/24raid10deadlock
new file mode 100644 (file)
index 0000000..ee330aa
--- /dev/null
@@ -0,0 +1,88 @@
+devs="$dev0 $dev1 $dev2 $dev3"
+runtime=120
+pid=""
+action_pid=""
+
+set_up_injection()
+{
+       echo -1 > /sys/kernel/debug/fail_make_request/times
+       echo 1 > /sys/kernel/debug/fail_make_request/probability
+       echo 0 > /sys/kernel/debug/fail_make_request/verbose
+       echo 1 > /sys/block/${1##*/}/make-it-fail
+}
+
+clean_up_injection()
+{
+       echo 0 > /sys/block/${1##*/}/make-it-fail
+       echo 0 > /sys/kernel/debug/fail_make_request/times
+       echo 0 > /sys/kernel/debug/fail_make_request/probability
+       echo 2 > /sys/kernel/debug/fail_make_request/verbose
+}
+
+test_rdev()
+{
+       while true; do
+               mdadm -f $md0 $1 &> /dev/null
+               mdadm -r $md0 $1 &> /dev/null
+               mdadm --zero-superblock $1 &> /dev/null
+               mdadm -a $md0 $1 &> /dev/null
+               sleep $2
+       done
+}
+
+test_write_action()
+{
+       while true; do
+               echo frozen > /sys/block/md0/md/sync_action
+               echo idle > /sys/block/md0/md/sync_action
+               sleep 0.1
+       done
+}
+
+set_up_test()
+{
+       fio -h &> /dev/null || die "fio not found"
+
+       # create a simple raid10
+       mdadm -Cv -R -n 4 -l10 $md0 $devs || die "create raid10 failed"
+}
+
+clean_up_test()
+{
+       clean_up_injection $dev0
+       pkill -9 fio
+       kill -9 $pid
+       kill -9 $action_pid
+
+       sleep 1
+
+       if ps $action_pid | tail -1 | awk '{print $3}' | grep D; then
+               die "thread that is writing sysfs is stuck in D state, deadlock is triggered"
+       fi
+       mdadm -S $md0
+}
+
+cat /sys/kernel/debug/fail_make_request/times || die "fault injection is not enabled"
+
+trap 'clean_up_test' EXIT
+
+set_up_test || die "set up test failed"
+
+# backgroup io pressure
+fio -filename=$md0 -rw=randwrite -direct=1 -name=test -bs=4k -numjobs=16 -iodepth=16 &
+
+# trigger add/remove device by io failure
+set_up_injection $dev0
+test_rdev $dev0 2 &
+pid="$pid $!"
+
+# add/remove device directly
+test_rdev $dev3 10 &
+pid="$pid $!"
+
+test_write_action &
+action_pid="$!"
+
+sleep $runtime
+
+exit 0
diff --git a/tests/24raid10deadlock.inject_error b/tests/24raid10deadlock.inject_error
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tests/24raid456deadlock b/tests/24raid456deadlock
new file mode 100644 (file)
index 0000000..80e6e97
--- /dev/null
@@ -0,0 +1,58 @@
+devs="$dev0 $dev1 $dev2 $dev3 $dev4 $dev5"
+runtime=120
+pid=""
+old=`cat /proc/sys/vm/dirty_background_ratio`
+
+test_write_action()
+{
+       while true; do
+               echo check > /sys/block/md0/md/sync_action &> /dev/null
+               sleep 0.1
+               echo idle > /sys/block/md0/md/sync_action &> /dev/null
+       done
+}
+
+test_write_back()
+{
+       fio -filename=$md0 -bs=4k -rw=write -numjobs=1 -name=test \
+               -time_based -runtime=$runtime &> /dev/null
+}
+
+set_up_test()
+{
+       fio -h &> /dev/null || die "fio not found"
+
+       # create a simple raid6
+       mdadm -Cv -R -n 6 -l6 $md0 $devs --assume-clean || die "create raid6 failed"
+
+       # trigger dirty pages write back
+       echo 0 > /proc/sys/vm/dirty_background_ratio
+}
+
+clean_up_test()
+{
+       echo $old > /proc/sys/vm/dirty_background_ratio
+
+       pkill -9 fio
+       kill -9 $pid
+
+       sleep 1
+
+       if ps $pid | tail -1 | awk '{print $3}' | grep D; then
+               die "thread that is writing sysfs is stuck in D state, deadlock is triggered"
+       fi
+       mdadm -S $md0
+}
+
+trap 'clean_up_test' EXIT
+
+set_up_test || die "set up test failed"
+
+test_write_back &
+
+test_write_action &
+pid="$!"
+
+sleep $runtime
+
+exit 0
diff --git a/tests/25raid456-recovery-while-reshape b/tests/25raid456-recovery-while-reshape
new file mode 100644 (file)
index 0000000..3f6251b
--- /dev/null
@@ -0,0 +1,33 @@
+devs="$dev0 $dev1 $dev2"
+
+set_up_test()
+{
+       mdadm -Cv -R -n 3 -l5 $md0 $devs --assume-clean --size=50M || die "create array failed"
+       mdadm -a $md0 $dev3 $dev4 || die "failed to bind new disk to array"
+       echo 1000 > /sys/block/md0/md/sync_speed_max
+}
+
+clean_up_test()
+{
+       mdadm -S $md0
+}
+
+trap 'clean_up_test' EXIT
+
+set_up_test || die "set up test failed"
+
+# trigger reshape
+mdadm --grow -l 6 $md0
+sleep 1
+
+# set up replacement
+echo frozen > /sys/block/md0/md/sync_action
+echo want_replacement > /sys/block/md0/md/rd0/state
+echo reshape > /sys/block/md0/md/sync_action
+sleep 1
+
+# reassemeble array
+mdadm -S $md0 || die "can't stop array"
+mdadm --assemble $md0 $devs $dev3 $dev4 || die "can't assemble array"
+
+exit 0
diff --git a/tests/25raid456-reshape-corrupt-data b/tests/25raid456-reshape-corrupt-data
new file mode 100644 (file)
index 0000000..fdb875f
--- /dev/null
@@ -0,0 +1,35 @@
+devs="$dev0 $dev1 $dev2"
+
+set_up_test()
+{
+       mdadm -Cv -R -n 3 -l5 $md0 $devs --size=50M || die "create array failed"
+       mdadm -a $md0 $dev3 || die "failed to bind new disk to array"
+       mkfs.xfs -f $md0 || die "mkfs failed"
+       xfs_ncheck $md0 || die "check fs failed"
+}
+
+clean_up_test()
+{
+       mdadm -S $md0
+}
+
+trap 'clean_up_test' EXIT
+
+set_up_test || die "set up test failed"
+
+# trigger reshape
+echo 1000 > /sys/block/md0/md/sync_speed_max
+mdadm --grow -l 6 $md0
+sleep 1
+
+# stop and start reshape
+echo frozen > /sys/block/md0/md/sync_action
+echo system > /sys/block/md0/md/sync_speed_max
+echo reshape > /sys/block/md0/md/sync_action
+
+mdadm -W $md0
+
+# check if data is corrupted
+xfs_ncheck $md0 || die "data is corrupted after reshape"
+
+exit 0
diff --git a/tests/25raid456-reshape-deadlock b/tests/25raid456-reshape-deadlock
new file mode 100644 (file)
index 0000000..bfa0cc5
--- /dev/null
@@ -0,0 +1,34 @@
+devs="$dev0 $dev1 $dev2"
+
+set_up_test()
+{
+       mdadm -Cv -R -n 3 -l5 $md0 $devs --size=50M || die "create array failed"
+       mdadm -a $md0 $dev3 || die "failed to bind new disk to array"
+       echo 1000 > /sys/block/md0/md/sync_speed_max
+}
+
+clean_up_test()
+{
+       echo idle > /sys/block/md0/md/sync_action
+       mdadm -S $md0
+}
+
+trap 'clean_up_test' EXIT
+
+set_up_test || die "set up test failed"
+
+# trigger reshape
+mdadm --grow -l 6 $md0
+sleep 1
+
+# stop reshape
+echo frozen > /sys/block/md0/md/sync_action
+
+# read accross reshape
+dd if=$md0 of=/dev/NULL bs=1m count=100 iflag=direct &> /dev/null &
+sleep 2
+
+# suspend array
+echo 1 > /sys/block/md0/md/suspend_lo
+
+exit 0
diff --git a/tests/25raid456-reshape-while-recovery b/tests/25raid456-reshape-while-recovery
new file mode 100644 (file)
index 0000000..b9f871f
--- /dev/null
@@ -0,0 +1,32 @@
+devs="$dev0 $dev1 $dev2"
+
+set_up_test()
+{
+       mdadm -Cv -R -n 3 -l5 $md0 $devs --assume-clean --size=50M || die "create array failed"
+       mdadm -a $md0 $dev3 $dev4 || die "failed to bind new disk to array"
+       echo 1000 > /sys/block/md0/md/sync_speed_max
+}
+
+clean_up_test()
+{
+       mdadm -S $md0
+}
+
+trap 'clean_up_test' EXIT
+
+set_up_test || die "set up test failed"
+
+# set up replacement
+echo want_replacement > /sys/block/md0/md/rd0/state
+sleep 1
+
+# trigger reshape
+echo frozen > /sys/block/md0/md/sync_action
+mdadm --grow -l 6 $md0
+sleep 1
+
+# reassemeble array
+mdadm -S $md0 || die "can't stop array"
+mdadm --assemble $md0 $devs $dev3 $dev4 || die "can't assemble array"
+
+exit 0
index 9710a53b8a7344393e7f6ab6c32c0c49da766e9e..b474442b6abe296b0f9b276f4824492b1d08d144 100644 (file)
@@ -123,6 +123,17 @@ check_env() {
        modprobe multipath 2> /dev/null
        grep -sq 'Personalities : .*multipath' /proc/mdstat &&
                MULTIPATH="yes"
+       if [ "$MULTIPATH" != "yes" ]; then
+               echo "test: skipping tests for multipath, which is removed in upstream 6.8+ kernels"
+       fi
+
+       # Check whether to run linear tests
+       modprobe linear 2> /dev/null
+       grep -sq 'Personalities : .*linear' /proc/mdstat &&
+               LINEAR="yes"
+       if [ "$LINEAR" != "yes" ]; then
+               echo "test: skipping tests for linear, which is removed in upstream 6.8+ kernels"
+       fi
 }
 
 do_setup() {
@@ -170,7 +181,6 @@ do_setup() {
                                dd if=/dev/zero of=$targetdir/mdtest$d count=$sz bs=1K > /dev/null 2>&1
                        # make sure udev doesn't touch
                        mdadm --zero $targetdir/mdtest$d 2> /dev/null
-                       [ -b /dev/loop$d ] || mknod /dev/loop$d b 7 $d
                        if [ $d -eq 7 ]
                        then
                                losetup /dev/loop$d $targetdir/mdtest6 # for multipath use
@@ -203,7 +213,6 @@ do_setup() {
        path1=$dev7
        ulimit -c unlimited
        [ -f /proc/mdstat ] || modprobe md_mod
-       echo 2000 > /proc/sys/dev/raid/speed_limit_max
        echo 0 > /sys/module/md_mod/parameters/start_ro
 }
 
index 428e448e0a0f16720f5303a0b6b927aa9fcd5424..1a8676e0630f56a761d5549bf868e6c4d9e094fe 100644 (file)
@@ -42,13 +42,21 @@ check wait
 imsm_check container $num_disks
 
 # Create first volume inside the container
-mdadm --create --run $member0 --auto=md --level=$vol0_level --size=$vol0_comp_size --chunk=$vol0_chunk --raid-disks=$num_disks $device_list
+if [[ ! -z $vol0_chunk ]]; then
+       mdadm --create --run $member0 --auto=md --level=$vol0_level --size=$vol0_comp_size --chunk=$vol0_chunk --raid-disks=$num_disks $device_list
+else
+       mdadm --create --run $member0 --auto=md --level=$vol0_level --size=$vol0_comp_size --raid-disks=$num_disks $device_list
+fi
 check wait
 
 # Create second volume inside the container (if defined)
-if [ ! -z $vol1_chunk ]; then
-    mdadm --create --run $member1 --auto=md --level=$vol1_level --size=$vol1_comp_size --chunk=$vol1_chunk --raid-disks=$num_disks $device_list
-    check wait
+if [ ! -z $vol1_level ]; then
+       if [ ! -z $vol1_chunk ]; then
+               mdadm --create --run $member1 --auto=md --level=$vol1_level --size=$vol1_comp_size --chunk=$vol1_chunk --raid-disks=$num_disks $device_list
+       else
+               mdadm --create --run $member1 --auto=md --level=$vol1_level --size=$vol1_comp_size --raid-disks=$num_disks $device_list
+       fi
+       check wait
 fi
 
 # Wait for any RESYNC to complete
@@ -59,7 +67,7 @@ imsm_check member $member0 $num_disks $vol0_level $vol0_comp_size $((vol0_comp_s
 testdev $member0 $vol0_num_comps $vol0_comp_size $vol0_chunk
 
 # Test second volume (if defined)
-if [ ! -z $vol1_chunk ]; then
+if [ ! -z $vol1_level ]; then
     imsm_check member $member1 $num_disks $vol1_level $vol1_comp_size $((vol1_comp_size * vol1_num_comps)) $vol1_offset $vol1_chunk
     testdev $member1 $vol1_num_comps $vol1_comp_size $vol1_chunk
 fi
diff --git a/tests/templates/names_template b/tests/templates/names_template
new file mode 100644 (file)
index 0000000..1b6cd14
--- /dev/null
@@ -0,0 +1,75 @@
+# NAME is optional. Testing with native 1.2 superblock.
+function names_create() {
+       local DEVNAME=$1
+       local NAME=$2
+       local NEG_TEST=$3
+
+       if [[ -z "$NAME" ]]; then
+               mdadm -CR "$DEVNAME" -l0 -n 1 $dev0 --force
+       else
+               mdadm -CR "$DEVNAME" --name="$NAME" --metadata=1.2 -l0 -n 1 $dev0 --force
+       fi
+
+       if [[ "$NEG_TEST" == "true" ]]; then
+               [[ "$?" == "0" ]] && return 0
+               echo "Negative verification failed"
+               exit 1
+       fi
+
+       if [[ "$?" != "0" ]]; then
+               echo "Cannot create device."
+               exit 1
+       fi
+}
+
+# Three properties to check:
+# - devnode name
+# - link in /dev/md/ (MD_DEVNAME property from --detail --export)
+# - name in metadata (MD_NAME property from --detail --export)- that works only with 1.2 sb.
+function names_verify() {
+       local DEVNODE_NAME="$1"
+       local WANTED_LINK="$2"
+       local WANTED_NAME="$3"
+
+       local RES="$(mdadm -D --export $DEVNODE_NAME | grep MD_DEVNAME)"
+       if [[ "$?" != "0" ]]; then
+               echo "Cannot get details for $DEVNODE_NAME - unexpected devnode."
+               exit 1
+       fi
+
+       if [[ "$WANTED_LINK" != "empty" ]]; then
+               local EXPECTED="MD_DEVNAME=$WANTED_LINK"
+       fi
+
+       if [[ "$RES" != "$EXPECTED" ]]; then
+               echo "$RES doesn't match $EXPECTED."
+               exit 1
+       fi
+
+       local RES="$(mdadm -D --export $DEVNODE_NAME | grep MD_NAME)"
+       if [[ "$?" != "0" ]]; then
+               echo "Cannot get metadata from $dev0."
+               exit 1
+       fi
+
+       local EXPECTED="MD_NAME=$(hostname):$WANTED_NAME"
+       if [[ "$RES" != "$EXPECTED" ]]; then
+               echo "$RES doesn't match $EXPECTED."
+               exit 1
+       fi
+}
+
+# Generate ARRAYLINE for tested array.
+names_make_conf() {
+       local UUID="$1"
+       local WANTED_DEVNAME="$2"
+       local CONF="$3"
+
+       local LINE="ARRAY metadata=1.2 UUID=$UUID"
+
+       if [[ "$WANTED_DEVNAME" != "empty" ]]; then
+               LINE="$LINE $WANTED_DEVNAME"
+       fi
+
+       echo $LINE > $CONF
+}
index c8fa8e89ef69ea183081c5562e70a5801d4e3735..4e64b249b2db561f446618cacaaa7e1ac2e494cd 100644 (file)
@@ -3,7 +3,7 @@
 SUBSYSTEM!="block", GOTO="md_end"
 
 # handle md arrays
-ACTION!="add|change", GOTO="md_end"
+ACTION=="remove", GOTO="md_end"
 KERNEL!="md*", GOTO="md_end"
 
 # partitions have no md/{array_state,metadata_version}, but should not
@@ -15,6 +15,7 @@ ENV{DEVTYPE}=="partition", GOTO="md_ignore_state"
 ATTR{md/metadata_version}=="external:[A-Za-z]*", ATTR{md/array_state}=="inactive", GOTO="md_ignore_state"
 TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end"
 ATTR{md/array_state}=="clear*|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
+ATTR{md/sync_action}=="reshape", ENV{RESHAPE_ACTIVE}="yes"
 LABEL="md_ignore_state"
 
 IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
@@ -37,6 +38,8 @@ ENV{MD_LEVEL}=="raid[1-9]*", ENV{SYSTEMD_WANTS}+="mdmonitor.service"
 
 # Tell systemd to run mdmon for our container, if we need it.
 ENV{MD_LEVEL}=="raid[1-9]*", ENV{MD_CONTAINER}=="?*", PROGRAM="/usr/bin/readlink $env{MD_CONTAINER}", ENV{MD_MON_THIS}="%c"
-ENV{MD_MON_THIS}=="?*", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@%c.service"
+ENV{MD_MON_THIS}=="?*", TEST=="/etc/initrd-release", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@initrd-%c.service"
+ENV{MD_MON_THIS}=="?*", TEST!="/etc/initrd-release", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@%c.service"
+ENV{RESHAPE_ACTIVE}=="yes", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdadm-grow-continue@%c.service"
 
 LABEL="md_end"
index 9f055ed022d531371ebbe12d52a601214fdf5b8a..d4a7f0a5a0493b3526729ddef4a1ba7e628fc655 100644 (file)
@@ -11,6 +11,11 @@ SUBSYSTEM!="block", GOTO="md_inc_end"
 ENV{SYSTEMD_READY}=="0", GOTO="md_inc_end"
 
 # handle potential components of arrays (the ones supported by md)
+# For member devices which are md/dm devices, we don't need to
+# handle add event. Because md/dm devices need to do some init jobs.
+# Then the change event happens.
+# When adding md/dm devices, ID_FS_TYPE can only be linux_raid_member
+# after change event happens.
 ENV{ID_FS_TYPE}=="linux_raid_member", GOTO="md_inc"
 
 # "noiswmd" on kernel command line stops mdadm from handling
@@ -23,15 +28,19 @@ IMPORT{cmdline}="nodmraid"
 ENV{nodmraid}=="?*", GOTO="md_inc_end"
 ENV{ID_FS_TYPE}=="ddf_raid_member", GOTO="md_inc"
 ENV{noiswmd}=="?*", GOTO="md_inc_end"
-ENV{ID_FS_TYPE}=="isw_raid_member", GOTO="md_inc"
+ENV{ID_FS_TYPE}=="isw_raid_member", ACTION!="change", GOTO="md_inc"
 GOTO="md_inc_end"
 
 LABEL="md_inc"
 
+# Bare disks are ready when add event happens, the raid can be assembled.
+ACTION=="change", KERNEL!="dm-*|md*", GOTO="md_inc_end"
+
 # remember you can limit what gets auto/incrementally assembled by
 # mdadm.conf(5)'s 'AUTO' and selectively whitelist using 'ARRAY'
-ACTION=="add|change", IMPORT{program}="BINDIR/mdadm --incremental --export $devnode --offroot $env{DEVLINKS}"
-ACTION=="add|change", ENV{MD_STARTED}=="*unsafe*", ENV{MD_FOREIGN}=="no", ENV{SYSTEMD_WANTS}+="mdadm-last-resort@$env{MD_DEVICE}.timer"
+ACTION!="remove", IMPORT{program}="BINDIR/mdadm --incremental --export $devnode --offroot $env{DEVLINKS}"
+ACTION!="remove", ENV{MD_STARTED}=="*unsafe*", ENV{MD_FOREIGN}=="no", ENV{SYSTEMD_WANTS}+="mdadm-last-resort@$env{MD_DEVICE}.timer"
+
 ACTION=="remove", ENV{ID_PATH}=="?*", RUN+="BINDIR/mdadm -If $name --path $env{ID_PATH}"
 ACTION=="remove", ENV{ID_PATH}!="?*", RUN+="BINDIR/mdadm -If $name"
 
index 13c23d848077ef5e29846d9a3484574e9b38f99f..2e185cee97db3a4bf07db25b0a6709085c0a6493 100644 (file)
@@ -13,7 +13,7 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with mdraid-safe-timeouts.  If not, see
-# <http://www.gnu.org/licenses/>.
+# <https://www.gnu.org/licenses/>.
 
 # This file causes block devices with Linux RAID (mdadm) signatures to
 # attempt to set safe timeouts for the drives involved
@@ -50,7 +50,7 @@ ENV{DEVTYPE}!="partition", GOTO="md_timeouts_end"
 
 IMPORT{program}="/sbin/mdadm --examine --export $devnode"
 
-ACTION=="add|change", \
+ACTION!="remove", \
   ENV{ID_FS_TYPE}=="linux_raid_member", \
   ENV{MD_LEVEL}=="raid[1-9]*", \
   TEST=="/sys/block/$parent/device/timeout", \
diff --git a/udev.c b/udev.c
new file mode 100644 (file)
index 0000000..066e6ab
--- /dev/null
+++ b/udev.c
@@ -0,0 +1,197 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2022 Mateusz Grzonka <mateusz.grzonka@intel.com>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include       "mdadm.h"
+#include       "udev.h"
+#include       "md_p.h"
+#include       "md_u.h"
+#include       <sys/wait.h>
+#include       <signal.h>
+#include       <limits.h>
+#include       <syslog.h>
+
+#ifndef NO_LIBUDEV
+#include       <libudev.h>
+#endif
+
+static char *unblock_path;
+
+/*
+ * udev_is_available() - Checks for udev in the system.
+ *
+ * Function looks whether udev directories are available and MDADM_NO_UDEV env defined.
+ *
+ * Return:
+ * true if udev is available,
+ * false if not
+ */
+bool udev_is_available(void)
+{
+       struct stat stb;
+
+       if (stat("/dev/.udev", &stb) != 0 &&
+           stat("/run/udev", &stb) != 0)
+               return false;
+       if (check_env("MDADM_NO_UDEV") == 1)
+               return false;
+       return true;
+}
+
+#ifndef NO_LIBUDEV
+
+static struct udev *udev;
+static struct udev_monitor *udev_monitor;
+
+/*
+ * udev_release() - Drops references of udev and udev_monitor.
+ */
+static void udev_release(void)
+{
+       udev_monitor_unref(udev_monitor);
+       udev_unref(udev);
+}
+
+/*
+ * udev_initialize() - Initializes udev and udev_monitor structures.
+ *
+ * Function initializes udev, udev_monitor, and sets udev_monitor filter for block devices.
+ *
+ * Return:
+ * UDEV_STATUS_SUCCESS on success
+ * UDEV_STATUS_ERROR on error
+ * UDEV_STATUS_ERROR_NO_UDEV when udev not available
+ */
+static enum udev_status udev_initialize(void)
+{
+       if (!udev_is_available()) {
+               pr_err("No udev.\n");
+               return UDEV_STATUS_ERROR_NO_UDEV;
+       }
+
+       udev = udev_new();
+       if (!udev) {
+               pr_err("Cannot initialize udev.\n");
+               return UDEV_STATUS_ERROR;
+       }
+
+       udev_monitor = udev_monitor_new_from_netlink(udev, "udev");
+       if (!udev_monitor) {
+               pr_err("Cannot initialize udev monitor.\n");
+               udev = udev_unref(udev);
+               return UDEV_STATUS_ERROR;
+       }
+
+       if (udev_monitor_filter_add_match_subsystem_devtype(udev_monitor, "block", 0) < 0) {
+               pr_err("Cannot add udev monitor event filter for md devices.\n");
+               udev_release();
+               return UDEV_STATUS_ERROR;
+       }
+       if (udev_monitor_enable_receiving(udev_monitor) < 0) {
+               pr_err("Cannot enable receiving udev events through udev monitor.\n");
+               udev_release();
+               return UDEV_STATUS_ERROR;
+       }
+       atexit(udev_release);
+       return UDEV_STATUS_SUCCESS;
+}
+
+/*
+ * udev_wait_for_events() - Waits for events from udev.
+ * @seconds: Timeout in seconds.
+ *
+ * Function waits udev events, wakes up on event or timeout.
+ *
+ * Return:
+ * UDEV_STATUS_SUCCESS on detected event
+ * UDEV_STATUS_TIMEOUT on timeout
+ * UDEV_STATUS_ERROR on error
+ */
+enum udev_status udev_wait_for_events(int seconds)
+{
+       int fd;
+       fd_set readfds;
+       struct timeval tv;
+       int ret;
+
+       if (!udev || !udev_monitor) {
+               ret = udev_initialize();
+               if (ret != UDEV_STATUS_SUCCESS)
+                       return ret;
+       }
+
+       fd = udev_monitor_get_fd(udev_monitor);
+       if (fd < 0) {
+               pr_err("Cannot access file descriptor associated with udev monitor.\n");
+               return UDEV_STATUS_ERROR;
+       }
+
+       FD_ZERO(&readfds);
+       FD_SET(fd, &readfds);
+       tv.tv_sec = seconds;
+       tv.tv_usec = 0;
+
+       if (select(fd + 1, &readfds, NULL, NULL, &tv) > 0 && FD_ISSET(fd, &readfds))
+               if (udev_monitor_receive_device(udev_monitor))
+                       return UDEV_STATUS_SUCCESS; /* event detected */
+       return UDEV_STATUS_TIMEOUT;
+}
+#endif
+
+/*
+ * udev_block() - Block udev from examining newly created arrays.
+ *
+ * When array is created, we don't want udev to examine it immediately.
+ * Function creates /run/mdadm/creating-mdXXX and expects that udev rule
+ * will notice it and act accordingly.
+ *
+ * Return:
+ * UDEV_STATUS_SUCCESS when successfully blocked udev
+ * UDEV_STATUS_ERROR on error
+ */
+enum udev_status udev_block(char *devnm)
+{
+       int fd;
+       char *path = xcalloc(1, BUFSIZ);
+
+       snprintf(path, BUFSIZ, "/run/mdadm/creating-%s", devnm);
+
+       fd = open(path, O_CREAT | O_RDWR, 0600);
+       if (!is_fd_valid(fd)) {
+               pr_err("Cannot block udev, error creating blocking file.\n");
+               pr_err("%s: %s\n", strerror(errno), path);
+               free(path);
+               return UDEV_STATUS_ERROR;
+       }
+
+       close(fd);
+       unblock_path = path;
+       return UDEV_STATUS_SUCCESS;
+}
+
+/*
+ * udev_unblock() - Unblock udev.
+ */
+void udev_unblock(void)
+{
+       if (unblock_path)
+               unlink(unblock_path);
+       free(unblock_path);
+       unblock_path = NULL;
+}
diff --git a/udev.h b/udev.h
new file mode 100644 (file)
index 0000000..ae0a361
--- /dev/null
+++ b/udev.h
@@ -0,0 +1,40 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2022 Mateusz Grzonka <mateusz.grzonka@intel.com>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef MONITOR_UDEV_H
+#define MONITOR_UDEV_H
+
+enum udev_status {
+       UDEV_STATUS_ERROR_NO_UDEV = -2,
+       UDEV_STATUS_ERROR,
+       UDEV_STATUS_SUCCESS = 0,
+       UDEV_STATUS_TIMEOUT
+};
+
+bool udev_is_available(void);
+
+#ifndef NO_LIBUDEV
+enum udev_status udev_wait_for_events(int seconds);
+#endif
+
+enum udev_status udev_block(char *devnm);
+void udev_unblock(void);
+
+#endif
diff --git a/util.c b/util.c
index 07f9dc344edf334e9f43865360d15d42fcf11fad..4fbf11c4e2bd71fad9df89b9b259ad50ac4b28d7 100644 (file)
--- a/util.c
+++ b/util.c
@@ -35,9 +35,8 @@
 #include       <poll.h>
 #include       <ctype.h>
 #include       <dirent.h>
-#include       <signal.h>
 #include       <dlfcn.h>
-
+#include       <limits.h>
 
 /*
  * following taken from linux/blkpg.h because they aren't
@@ -167,7 +166,7 @@ retry:
                pr_err("error %d when get PW mode on lock %s\n", errno, str);
                /* let's try several times if EAGAIN happened */
                if (dlm_lock_res->lksb.sb_status == EAGAIN && retry_count < 10) {
-                       sleep(10);
+                       sleep_for(10, 0, true);
                        retry_count++;
                        goto retry;
                }
@@ -268,7 +267,7 @@ int md_array_active(int fd)
                 * GET_ARRAY_INFO doesn't provide access to the proper state
                 * information, so fallback to a basic check for raid_disks != 0
                 */
-               ret = ioctl(fd, GET_ARRAY_INFO, &array);
+               ret = md_get_array_info(fd, &array);
        }
 
        return !ret;
@@ -306,43 +305,6 @@ int md_get_disk_info(int fd, struct mdu_disk_info_s *disk)
        return ioctl(fd, GET_DISK_INFO, disk);
 }
 
-/*
- * Parse a 128 bit uuid in 4 integers
- * format is 32 hexx nibbles with options :.<space> separator
- * If not exactly 32 hex digits are found, return 0
- * else return 1
- */
-int parse_uuid(char *str, int uuid[4])
-{
-       int hit = 0; /* number of Hex digIT */
-       int i;
-       char c;
-       for (i = 0; i < 4; i++)
-               uuid[i] = 0;
-
-       while ((c = *str++) != 0) {
-               int n;
-               if (c >= '0' && c <= '9')
-                       n = c-'0';
-               else if (c >= 'a' && c <= 'f')
-                       n = 10 + c - 'a';
-               else if (c >= 'A' && c <= 'F')
-                       n = 10 + c - 'A';
-               else if (strchr(":. -", c))
-                       continue;
-               else return 0;
-
-               if (hit<32) {
-                       uuid[hit/8] <<= 4;
-                       uuid[hit/8] += n;
-               }
-               hit++;
-       }
-       if (hit == 32)
-               return 1;
-       return 0;
-}
-
 int get_linux_version()
 {
        struct utsname name;
@@ -459,29 +421,25 @@ int parse_layout_10(char *layout)
 
 int parse_layout_faulty(char *layout)
 {
+       int ln, mode;
+       char *m;
+
+       if (!layout)
+               return -1;
+
        /* Parse the layout string for 'faulty' */
-       int ln = strcspn(layout, "0123456789");
-       char *m = xstrdup(layout);
-       int mode;
+       ln = strcspn(layout, "0123456789");
+       m = xstrdup(layout);
        m[ln] = 0;
        mode = map_name(faultylayout, m);
+       free(m);
+
        if (mode == UnSet)
                return -1;
 
        return mode | (atoi(layout+ln)<< ModeShift);
 }
 
-long parse_num(char *num)
-{
-       /* Either return a valid number, or -1 */
-       char *c;
-       long rv = strtol(num, &c, 10);
-       if (rv < 0 || *c || !num[0])
-               return -1;
-       else
-               return rv;
-}
-
 int parse_cluster_confirm_arg(char *input, char **devname, int *slot)
 {
        char *dev;
@@ -611,56 +569,6 @@ int enough(int level, int raid_disks, int layout, int clean, char *avail)
        }
 }
 
-const int uuid_zero[4] = { 0, 0, 0, 0 };
-
-int same_uuid(int a[4], int b[4], int swapuuid)
-{
-       if (swapuuid) {
-               /* parse uuids are hostendian.
-                * uuid's from some superblocks are big-ending
-                * if there is a difference, we need to swap..
-                */
-               unsigned char *ac = (unsigned char *)a;
-               unsigned char *bc = (unsigned char *)b;
-               int i;
-               for (i = 0; i < 16; i += 4) {
-                       if (ac[i+0] != bc[i+3] ||
-                           ac[i+1] != bc[i+2] ||
-                           ac[i+2] != bc[i+1] ||
-                           ac[i+3] != bc[i+0])
-                               return 0;
-               }
-               return 1;
-       } else {
-               if (a[0]==b[0] &&
-                   a[1]==b[1] &&
-                   a[2]==b[2] &&
-                   a[3]==b[3])
-                       return 1;
-               return 0;
-       }
-}
-
-void copy_uuid(void *a, int b[4], int swapuuid)
-{
-       if (swapuuid) {
-               /* parse uuids are hostendian.
-                * uuid's from some superblocks are big-ending
-                * if there is a difference, we need to swap..
-                */
-               unsigned char *ac = (unsigned char *)a;
-               unsigned char *bc = (unsigned char *)b;
-               int i;
-               for (i = 0; i < 16; i += 4) {
-                       ac[i+0] = bc[i+3];
-                       ac[i+1] = bc[i+2];
-                       ac[i+2] = bc[i+1];
-                       ac[i+3] = bc[i+0];
-               }
-       } else
-               memcpy(a, b, 16);
-}
-
 char *__fname_from_uuid(int id[4], int swap, char *buf, char sep)
 {
        int i, j;
@@ -681,19 +589,21 @@ char *__fname_from_uuid(int id[4], int swap, char *buf, char sep)
 
 }
 
-char *fname_from_uuid(struct supertype *st, struct mdinfo *info,
-                     char *buf, char sep)
+/**
+ * fname_from_uuid() - generate uuid string. Should not be used with super1.
+ * @info: info with uuid
+ * @buf: buf to fill.
+ *
+ * This routine should not be used with super1. See detail_fname_from_uuid() for details. It does
+ * not use superswitch swapuuid as it should be 0 but it has to do UUID conversion if host is big
+ * endian- left for backward compatibility.
+ */
+char *fname_from_uuid(struct mdinfo *info, char *buf)
 {
-       // dirty hack to work around an issue with super1 superblocks...
-       // super1 superblocks need swapuuid set in order for assembly to
-       // work, but can't have it set if we want this printout to match
-       // all the other uuid printouts in super1.c, so we force swapuuid
-       // to 1 to make our printout match the rest of super1
 #if __BYTE_ORDER == BIG_ENDIAN
-       return __fname_from_uuid(info->uuid, 1, buf, sep);
+       return __fname_from_uuid(info->uuid, true, buf, ':');
 #else
-       return __fname_from_uuid(info->uuid, (st->ss == &super1) ? 1 :
-                                st->ss->swapuuid, buf, sep);
+       return __fname_from_uuid(info->uuid, false, buf, ':');
 #endif
 }
 
@@ -815,23 +725,33 @@ int stat_is_blkdev(char *devname, dev_t *rdev)
        return 1;
 }
 
+/**
+ * ask() - prompt user for "yes/no" dialog.
+ * @mesg: message to be printed, without '?' sign.
+ * Returns: 1 if 'Y/y', 0 otherwise.
+ *
+ * The default value is 'N/n', thus the caps on "N" on prompt.
+ */
 int ask(char *mesg)
 {
-       char *add = "";
-       int i;
-       for (i = 0; i < 5; i++) {
-               char buf[100];
-               fprintf(stderr, "%s%s", mesg, add);
-               fflush(stderr);
-               if (fgets(buf, 100, stdin)==NULL)
-                       return 0;
-               if (buf[0]=='y' || buf[0]=='Y')
-                       return 1;
-               if (buf[0]=='n' || buf[0]=='N')
-                       return 0;
-               add = "(y/n) ";
+       char buf[3] = {0};
+
+       fprintf(stderr, "%s [y/N]? ", mesg);
+       fflush(stderr);
+       if (fgets(buf, 3, stdin) == NULL)
+               return 0;
+       if (strlen(buf) == 1) {
+               pr_err("assuming no.\n");
+               return 0;
        }
-       pr_err("assuming 'no'\n");
+       if (buf[1] != '\n')
+               goto bad_option;
+       if (toupper(buf[0]) == 'Y')
+               return 1;
+       if (toupper(buf[0]) == 'N')
+               return 0;
+bad_option:
+       pr_err("bad option.\n");
        return 0;
 }
 
@@ -1034,12 +954,12 @@ dev_t devnm2devid(char *devnm)
        /* First look in /sys/block/$DEVNM/dev for %d:%d
         * If that fails, try parsing out a number
         */
-       char path[100];
+       char path[PATH_MAX];
        char *ep;
        int fd;
        int mjr,mnr;
 
-       sprintf(path, "/sys/block/%s/dev", devnm);
+       snprintf(path, sizeof(path), "/sys/block/%s/dev", devnm);
        fd = open(path, O_RDONLY);
        if (fd >= 0) {
                char buf[20];
@@ -1065,47 +985,74 @@ dev_t devnm2devid(char *devnm)
        return 0;
 }
 
-char *get_md_name(char *devnm)
+/**
+ * is_devname_numbered() - helper for numbered devname verification.
+ * @devname: path or name to check.
+ * @pref: expected devname prefix.
+ * @pref_len: prefix len.
+ */
+static bool is_devname_numbered(const char *devname, const char *pref, const int pref_len)
 {
-       /* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */
-       /* if dev < 0, want /dev/md/d%d or find mdp in /proc/devices ... */
+       int val;
 
-       static char devname[50];
-       struct stat stb;
-       dev_t rdev = devnm2devid(devnm);
-       char *dn;
+       assert(devname && pref);
 
-       if (rdev == 0)
-               return 0;
-       if (strncmp(devnm, "md_", 3) == 0) {
-               snprintf(devname, sizeof(devname), "/dev/md/%s",
-                       devnm + 3);
-               if (stat(devname, &stb) == 0 &&
-                   (S_IFMT&stb.st_mode) == S_IFBLK && (stb.st_rdev == rdev))
-                       return devname;
-       }
-       snprintf(devname, sizeof(devname), "/dev/%s", devnm);
-       if (stat(devname, &stb) == 0 && (S_IFMT&stb.st_mode) == S_IFBLK &&
-           (stb.st_rdev == rdev))
-               return devname;
+       if (strncmp(devname, pref, pref_len) != 0)
+               return false;
 
-       snprintf(devname, sizeof(devname), "/dev/md/%s", devnm+2);
-       if (stat(devname, &stb) == 0 && (S_IFMT&stb.st_mode) == S_IFBLK &&
-           (stb.st_rdev == rdev))
-               return devname;
+       if (parse_num(&val, devname + pref_len) != 0)
+               return false;
+
+       if (val > 127)
+               return false;
+
+       return true;
+}
+
+/**
+ * is_devname_md_numbered() - check if &devname is numbered MD device (md).
+ * @devname: path or name to check.
+ */
+bool is_devname_md_numbered(const char *devname)
+{
+       return is_devname_numbered(devname, DEV_NUM_PREF, DEV_NUM_PREF_LEN);
+}
 
-       dn = map_dev(major(rdev), minor(rdev), 0);
-       if (dn)
-               return dn;
-       snprintf(devname, sizeof(devname), "/dev/.tmp.%s", devnm);
-       if (mknod(devname, S_IFBLK | 0600, rdev) == -1)
-               if (errno != EEXIST)
-                       return NULL;
+/**
+ * is_devname_md_d_numbered() - check if &devname is secondary numbered MD device (md_d).
+ * @devname: path or name to check.
+ */
+bool is_devname_md_d_numbered(const char *devname)
+{
+       static const char d_dev[] = DEV_NUM_PREF "_d";
+
+       return is_devname_numbered(devname, d_dev, sizeof(d_dev) - 1);
+}
+
+/**
+ * get_md_name() - Get main dev node of the md device.
+ * @devnm: Md device name or path.
+ *
+ * Function checks if the full name was passed and returns md name
+ * if it is the MD device.
+ *
+ * Return: Main dev node of the md device or NULL if not found.
+ */
+char *get_md_name(char *devnm)
+{
+       static char devname[NAME_MAX];
+       struct stat stb;
 
-       if (stat(devname, &stb) == 0 && (S_IFMT&stb.st_mode) == S_IFBLK &&
-           (stb.st_rdev == rdev))
+       if (strncmp(devnm, "/dev/", 5) == 0)
+               snprintf(devname, sizeof(devname), "%s", devnm);
+       else
+               snprintf(devname, sizeof(devname), "/dev/%s", devnm);
+
+       if (!is_mddev(devname))
+               return NULL;
+       if (stat(devname, &stb) == 0 && (S_IFMT&stb.st_mode) == S_IFBLK)
                return devname;
-       unlink(devname);
+
        return NULL;
 }
 
@@ -1124,6 +1071,20 @@ int get_maj_min(char *dev, int *major, int *minor)
                *e == 0);
 }
 
+/**
+ * is_bit_set() - get bit value by index.
+ * @val: value.
+ * @index: index of the bit (LSB numbering).
+ *
+ * Return: bit value.
+ */
+bool is_bit_set(int *val, unsigned char index)
+{
+       if ((*val) & (1 << index))
+               return true;
+       return false;
+}
+
 int dev_open(char *dev, int flags)
 {
        /* like 'open', but if 'dev' matches %d:%d, create a temp
@@ -1182,7 +1143,7 @@ int open_dev_excl(char *devnm)
        int i;
        int flags = O_RDWR;
        dev_t devid = devnm2devid(devnm);
-       long delay = 1000;
+       unsigned int delay = 1; // miliseconds
 
        sprintf(buf, "%d:%d", major(devid), minor(devid));
        for (i = 0; i < 25; i++) {
@@ -1195,8 +1156,8 @@ int open_dev_excl(char *devnm)
                }
                if (errno != EBUSY)
                        return fd;
-               usleep(delay);
-               if (delay < 200000)
+               sleep_for(0, MSEC_TO_NSEC(delay), true);
+               if (delay < 200)
                        delay *= 2;
        }
        return -1;
@@ -1220,7 +1181,7 @@ void wait_for(char *dev, int fd)
 {
        int i;
        struct stat stb_want;
-       long delay = 1000;
+       unsigned int delay = 1; // miliseconds
 
        if (fstat(fd, &stb_want) != 0 ||
            (stb_want.st_mode & S_IFMT) != S_IFBLK)
@@ -1232,8 +1193,8 @@ void wait_for(char *dev, int fd)
                    (stb.st_mode & S_IFMT) == S_IFBLK &&
                    (stb.st_rdev == stb_want.st_rdev))
                        return;
-               usleep(delay);
-               if (delay < 200000)
+               sleep_for(0, MSEC_TO_NSEC(delay), true);
+               if (delay < 200)
                        delay *= 2;
        }
        if (i == 25)
@@ -1260,6 +1221,11 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
        int i;
        char *subarray = NULL;
        char container[32] = "";
+       char *devnm = NULL;
+
+       devnm = fd2devnm(fd);
+       if (!devnm)
+               return NULL;
 
        sra = sysfs_read(fd, NULL, GET_VERSION);
 
@@ -1305,47 +1271,13 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
                if (subarrayp)
                        *subarrayp = subarray;
                strcpy(st->container_devnm, container);
-               strcpy(st->devnm, fd2devnm(fd));
+               strncpy(st->devnm, devnm, MD_NAME_MAX - 1);
        } else
                free(subarray);
 
        return st;
 }
 
-int dev_size_from_id(dev_t id, unsigned long long *size)
-{
-       char buf[20];
-       int fd;
-
-       sprintf(buf, "%d:%d", major(id), minor(id));
-       fd = dev_open(buf, O_RDONLY);
-       if (fd < 0)
-               return 0;
-       if (get_dev_size(fd, NULL, size)) {
-               close(fd);
-               return 1;
-       }
-       close(fd);
-       return 0;
-}
-
-int dev_sector_size_from_id(dev_t id, unsigned int *size)
-{
-       char buf[20];
-       int fd;
-
-       sprintf(buf, "%d:%d", major(id), minor(id));
-       fd = dev_open(buf, O_RDONLY);
-       if (fd < 0)
-               return 0;
-       if (get_dev_sector_size(fd, NULL, size)) {
-               close(fd);
-               return 1;
-       }
-       close(fd);
-       return 0;
-}
-
 struct supertype *dup_super(struct supertype *orig)
 {
        struct supertype *st;
@@ -1630,7 +1562,7 @@ int open_container(int fd)
        /* 'fd' is a block device.  Find out if it is in use
         * by a container, and return an open fd on that container.
         */
-       char path[256];
+       char path[288];
        char *e;
        DIR *dir;
        struct dirent *de;
@@ -1898,7 +1830,7 @@ int remove_disk(int mdfd, struct supertype *st,
 
        /* Remove the disk given by 'info' from the array */
        if (st->ss->external)
-               rv = sysfs_set_str(sra, info, "slot", "none");
+               rv = sysfs_set_str(sra, info, "slot", STR_COMMON_NONE);
        else
                rv = ioctl(mdfd, HOT_REMOVE_DISK, makedev(info->disk.major,
                                                          info->disk.minor));
@@ -1918,7 +1850,7 @@ int hot_remove_disk(int mdfd, unsigned long dev, int force)
        while ((ret = ioctl(mdfd, HOT_REMOVE_DISK, dev)) == -1 &&
               errno == EBUSY &&
               cnt-- > 0)
-               usleep(10000);
+               sleep_for(0, MSEC_TO_NSEC(10), true);
 
        return ret;
 }
@@ -1931,7 +1863,7 @@ int sys_hot_remove_disk(int statefd, int force)
        while ((ret = write(statefd, "remove", 6)) == -1 &&
               errno == EBUSY &&
               cnt-- > 0)
-               usleep(10000);
+               sleep_for(0, MSEC_TO_NSEC(10), true);
        return ret == 6 ? 0 : -1;
 }
 
@@ -1945,8 +1877,8 @@ int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
        int rv;
 
        if (st->ss->external)
-               return sysfs_set_array(info, 9003);
-               
+               return sysfs_set_array(info);
+
        memset(&inf, 0, sizeof(inf));
        inf.major_version = info->array.major_version;
        inf.minor_version = info->array.minor_version;
@@ -2002,10 +1934,11 @@ int mdmon_running(char *devnm)
 
 int start_mdmon(char *devnm)
 {
-       int i, skipped;
+       int i;
        int len;
        pid_t pid;
        int status;
+       char *prefix = in_initrd() ? "initrd-" : "";
        char pathbuf[1024];
        char *paths[4] = {
                pathbuf,
@@ -2016,7 +1949,10 @@ int start_mdmon(char *devnm)
 
        if (check_env("MDADM_NO_MDMON"))
                return 0;
+       if (continue_via_systemd(devnm, MDMON_SERVICE, prefix))
+               return 0;
 
+       /* That failed, try running mdmon directly */
        len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf)-1);
        if (len > 0) {
                char *sl;
@@ -2030,51 +1966,9 @@ int start_mdmon(char *devnm)
        } else
                pathbuf[0] = '\0';
 
-       /* First try to run systemctl */
-       if (!check_env("MDADM_NO_SYSTEMCTL"))
-               switch(fork()) {
-               case 0:
-                       /* FIXME yuk. CLOSE_EXEC?? */
-                       skipped = 0;
-                       for (i = 3; skipped < 20; i++)
-                               if (close(i) < 0)
-                                       skipped++;
-                               else
-                                       skipped = 0;
-
-                       /* Don't want to see error messages from
-                        * systemctl.  If the service doesn't exist,
-                        * we start mdmon ourselves.
-                        */
-                       close(2);
-                       open("/dev/null", O_WRONLY);
-                       snprintf(pathbuf, sizeof(pathbuf), "mdmon@%s.service",
-                                devnm);
-                       status = execl("/usr/bin/systemctl", "systemctl",
-                                      "start",
-                                      pathbuf, NULL);
-                       status = execl("/bin/systemctl", "systemctl", "start",
-                                      pathbuf, NULL);
-                       exit(1);
-               case -1: pr_err("cannot run mdmon. Array remains readonly\n");
-                       return -1;
-               default: /* parent - good */
-                       pid = wait(&status);
-                       if (pid >= 0 && status == 0)
-                               return 0;
-               }
-
-       /* That failed, try running mdmon directly */
        switch(fork()) {
        case 0:
-               /* FIXME yuk. CLOSE_EXEC?? */
-               skipped = 0;
-               for (i = 3; skipped < 20; i++)
-                       if (close(i) < 0)
-                               skipped++;
-                       else
-                               skipped = 0;
-
+               manage_fork_fds(1);
                for (i = 0; paths[i]; i++)
                        if (paths[i][0]) {
                                execl(paths[i], paths[i],
@@ -2172,6 +2066,65 @@ void append_metadata_update(struct supertype *st, void *buf, int len)
 unsigned int __invalid_size_argument_for_IOC = 0;
 #endif
 
+/**
+ * disk_fd_matches_criteria() - check if device matches spare criteria.
+ * @st: supertype, not NULL.
+ * @disk_fd: file descriptor of the disk.
+ * @sc: criteria to test.
+ *
+ * Return: true if disk matches criteria, false otherwise.
+ */
+bool disk_fd_matches_criteria(struct supertype *st, int disk_fd, struct spare_criteria *sc)
+{
+       unsigned int dev_sector_size = 0;
+       unsigned long long dev_size = 0;
+
+       if (!sc->criteria_set)
+               return true;
+
+       if (!get_dev_size(disk_fd, NULL, &dev_size) || dev_size < sc->min_size)
+               return false;
+
+       if (!get_dev_sector_size(disk_fd, NULL, &dev_sector_size) ||
+           sc->sector_size != dev_sector_size)
+               return false;
+
+       if (drive_test_and_add_policies(st, &sc->pols, disk_fd, 0))
+               return false;
+
+       return true;
+}
+
+/**
+ * devid_matches_criteria() - check if device referenced by devid matches spare criteria.
+ * @st: supertype, not NULL.
+ * @devid: devid of the device to check.
+ * @sc: criteria to test.
+ *
+ * Return: true if disk matches criteria, false otherwise.
+ */
+bool devid_matches_criteria(struct supertype *st, dev_t devid, struct spare_criteria *sc)
+{
+       char buf[NAME_MAX];
+       bool ret;
+       int fd;
+
+       if (!sc->criteria_set)
+               return true;
+
+       snprintf(buf, NAME_MAX, "%d:%d", major(devid), minor(devid));
+
+       fd = dev_open(buf, O_RDONLY);
+       if (!is_fd_valid(fd))
+               return false;
+
+       /* Error code inherited */
+       ret = disk_fd_matches_criteria(st, fd, sc);
+
+       close(fd);
+       return ret;
+}
+
 /* Pick all spares matching given criteria from a container
  * if min_size == 0 do not check size
  * if domlist == NULL do not check domains
@@ -2195,28 +2148,13 @@ struct mdinfo *container_choose_spares(struct supertype *st,
        dp = &disks->devs;
        disks->array.spare_disks = 0;
        while (*dp) {
-               int found = 0;
+               bool found = false;
+
                d = *dp;
                if (d->disk.state == 0) {
-                       /* check if size is acceptable */
-                       unsigned long long dev_size;
-                       unsigned int dev_sector_size;
-                       int size_valid = 0;
-                       int sector_size_valid = 0;
-
                        dev_t dev = makedev(d->disk.major,d->disk.minor);
 
-                       if (!criteria->min_size ||
-                          (dev_size_from_id(dev,  &dev_size) &&
-                           dev_size >= criteria->min_size))
-                               size_valid = 1;
-
-                       if (!criteria->sector_size ||
-                           (dev_sector_size_from_id(dev, &dev_sector_size) &&
-                            criteria->sector_size == dev_sector_size))
-                               sector_size_valid = 1;
-
-                       found = size_valid && sector_size_valid;
+                       found = devid_matches_criteria(st, dev, criteria);
 
                        /* check if domain matches */
                        if (found && domlist) {
@@ -2225,7 +2163,8 @@ struct mdinfo *container_choose_spares(struct supertype *st,
                                        pol_add(&pol, pol_domain,
                                                spare_group, NULL);
                                if (domain_test(domlist, pol, metadata) != 1)
-                                       found = 0;
+                                       found = false;
+
                                dev_policy_free(pol);
                        }
                }
@@ -2279,17 +2218,84 @@ void enable_fds(int devices)
        setrlimit(RLIMIT_NOFILE, &lim);
 }
 
+/* Close all opened descriptors if needed and redirect
+ * streams to /dev/null.
+ * For debug purposed, leave STDOUT and STDERR untouched
+ * Returns:
+ *     1- if any error occurred
+ *     0- otherwise
+ */
+void manage_fork_fds(int close_all)
+{
+       DIR *dir;
+       struct dirent *dirent;
+
+       close(0);
+       open("/dev/null", O_RDWR);
+
+#ifndef DEBUG
+       dup2(0, 1);
+       dup2(0, 2);
+#endif
+
+       if (close_all == 0)
+               return;
+
+       dir = opendir("/proc/self/fd");
+       if (!dir) {
+               pr_err("Cannot open /proc/self/fd directory.\n");
+               return;
+       }
+       for (dirent = readdir(dir); dirent; dirent = readdir(dir)) {
+               int fd = -1;
+
+               if ((strcmp(dirent->d_name, ".") == 0) ||
+                   (strcmp(dirent->d_name, "..")) == 0)
+                       continue;
+
+               fd = strtol(dirent->d_name, NULL, 10);
+               if (fd > 2)
+                       close(fd);
+       }
+}
+
+/* In a systemd/udev world, it is best to get systemd to
+ * run daemon rather than running in the background.
+ * Returns:
+ *     1- if systemd service has been started
+ *     0- otherwise
+ */
+int continue_via_systemd(char *devnm, char *service_name, char *prefix)
+{
+       int pid, status;
+       char pathbuf[1024];
+
+       /* Simply return that service cannot be started */
+       if (check_env("MDADM_NO_SYSTEMCTL"))
+               return 0;
+       switch (fork()) {
+       case  0:
+               manage_fork_fds(1);
+               snprintf(pathbuf, sizeof(pathbuf),
+                        "%s@%s%s.service", service_name, prefix ?: "", devnm);
+               status = execl("/usr/bin/systemctl", "systemctl", "restart",
+                              pathbuf, NULL);
+               status = execl("/bin/systemctl", "systemctl", "restart",
+                              pathbuf, NULL);
+               exit(1);
+       case -1: /* Just do it ourselves. */
+               break;
+       default: /* parent - good */
+               pid = wait(&status);
+               if (pid >= 0 && status == 0)
+                       return 1;
+       }
+       return 0;
+}
+
 int in_initrd(void)
 {
-       /* This is based on similar function in systemd. */
-       struct statfs s;
-       /* statfs.f_type is signed long on s390x and MIPS, causing all
-          sorts of sign extension problems with RAMFS_MAGIC being
-          defined as 0x858458f6 */
-       return  statfs("/", &s) >= 0 &&
-               ((unsigned long)s.f_type == TMPFS_MAGIC ||
-                ((unsigned long)s.f_type & 0xFFFFFFFFUL) ==
-                ((unsigned long)RAMFS_MAGIC & 0xFFFFFFFFUL));
+       return access("/etc/initrd-release", F_OK) >= 0;
 }
 
 void reopen_mddev(int mdfd)
@@ -2436,3 +2442,72 @@ out:
        close(fd_zero);
        return ret;
 }
+
+/**
+ * sleep_for() - Sleeps for specified time.
+ * @sec: Seconds to sleep for.
+ * @nsec: Nanoseconds to sleep for, has to be less than one second.
+ * @wake_after_interrupt: If set, wake up if interrupted.
+ *
+ * Function immediately returns if error different than EINTR occurs.
+ */
+void sleep_for(unsigned int sec, long nsec, bool wake_after_interrupt)
+{
+       struct timespec delay = {.tv_sec = sec, .tv_nsec = nsec};
+
+       assert(nsec < MSEC_TO_NSEC(1000));
+
+       do {
+               errno = 0;
+               nanosleep(&delay, &delay);
+               if (errno != 0 && errno != EINTR) {
+                       pr_err("Error sleeping for %us %ldns: %s\n", sec, nsec, strerror(errno));
+                       return;
+               }
+       } while (!wake_after_interrupt && errno == EINTR);
+}
+
+/* is_directory() - Checks if directory provided by path is indeed a regular directory.
+ * @path: directory path to be checked
+ *
+ * Doesn't accept symlinks.
+ *
+ * Return: true if is a directory, false if not
+ */
+bool is_directory(const char *path)
+{
+       struct stat st;
+
+       if (lstat(path, &st) != 0) {
+               pr_err("%s: %s\n", strerror(errno), path);
+               return false;
+       }
+
+       if (!S_ISDIR(st.st_mode))
+               return false;
+
+       return true;
+}
+
+/*
+ * is_file() - Checks if file provided by path is indeed a regular file.
+ * @path: file path to be checked
+ *
+ * Doesn't accept symlinks.
+ *
+ * Return: true if is  a file, false if not
+ */
+bool is_file(const char *path)
+{
+       struct stat st;
+
+       if (lstat(path, &st) != 0) {
+               pr_err("%s: %s\n", strerror(errno), path);
+               return false;
+       }
+
+       if (!S_ISREG(st.st_mode))
+               return false;
+
+       return true;
+}
diff --git a/uuid.c b/uuid.c
new file mode 100644 (file)
index 0000000..94b5abd
--- /dev/null
+++ b/uuid.c
@@ -0,0 +1,112 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *    Author: Neil Brown
+ *    Email: <neilb@suse.de>
+ */
+
+#include       <string.h>
+
+const int uuid_zero[4] = { 0, 0, 0, 0 };
+
+int same_uuid(int a[4], int b[4], int swapuuid)
+{
+       if (swapuuid) {
+               /* parse uuids are hostendian.
+                * uuid's from some superblocks are big-ending
+                * if there is a difference, we need to swap..
+                */
+               unsigned char *ac = (unsigned char *)a;
+               unsigned char *bc = (unsigned char *)b;
+               int i;
+               for (i = 0; i < 16; i += 4) {
+                       if (ac[i+0] != bc[i+3] ||
+                           ac[i+1] != bc[i+2] ||
+                           ac[i+2] != bc[i+1] ||
+                           ac[i+3] != bc[i+0])
+                               return 0;
+               }
+               return 1;
+       } else {
+               if (a[0]==b[0] &&
+                   a[1]==b[1] &&
+                   a[2]==b[2] &&
+                   a[3]==b[3])
+                       return 1;
+               return 0;
+       }
+}
+
+void copy_uuid(void *a, int b[4], int swapuuid)
+{
+       if (swapuuid) {
+               /* parse uuids are hostendian.
+                * uuid's from some superblocks are big-ending
+                * if there is a difference, we need to swap..
+                */
+               unsigned char *ac = (unsigned char *)a;
+               unsigned char *bc = (unsigned char *)b;
+               int i;
+               for (i = 0; i < 16; i += 4) {
+                       ac[i+0] = bc[i+3];
+                       ac[i+1] = bc[i+2];
+                       ac[i+2] = bc[i+1];
+                       ac[i+3] = bc[i+0];
+               }
+       } else
+               memcpy(a, b, 16);
+}
+
+/*
+ * Parse a 128 bit uuid in 4 integers
+ * format is 32 hexx nibbles with options :.<space> separator
+ * If not exactly 32 hex digits are found, return 0
+ * else return 1
+ */
+int parse_uuid(char *str, int uuid[4])
+{
+       int hit = 0; /* number of Hex digIT */
+       int i;
+       char c;
+       for (i = 0; i < 4; i++)
+               uuid[i] = 0;
+
+       while ((c = *str++) != 0) {
+               int n;
+               if (c >= '0' && c <= '9')
+                       n = c-'0';
+               else if (c >= 'a' && c <= 'f')
+                       n = 10 + c - 'a';
+               else if (c >= 'A' && c <= 'F')
+                       n = 10 + c - 'A';
+               else if (strchr(":. -", c))
+                       continue;
+               else return 0;
+
+               if (hit<32) {
+                       uuid[hit/8] <<= 4;
+                       uuid[hit/8] += n;
+               }
+               hit++;
+       }
+       if (hit == 32)
+               return 1;
+       return 0;
+}