]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - util.c
util: remove rounding error where reporting "human sizes".
[thirdparty/mdadm.git] / util.c
diff --git a/util.c b/util.c
index a92a663bf23ffb4f74d77bdd4cbaa1fe59a6457c..89f6b0f454ac2c96ae36eb7eea8b7b999f2a9830 100644 (file)
--- a/util.c
+++ b/util.c
@@ -1,7 +1,7 @@
 /*
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2001-2012 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
 #include       <sys/utsname.h>
 #include       <sys/wait.h>
 #include       <sys/un.h>
+#include       <sys/resource.h>
+#include       <sys/vfs.h>
+#include       <linux/magic.h>
 #include       <ctype.h>
 #include       <dirent.h>
 #include       <signal.h>
 
-int __offroot;
-
 /*
  * following taken from linux/blkpg.h because they aren't
  * anywhere else and it isn't safe to #include linux/ * stuff.
@@ -43,10 +44,10 @@ int __offroot;
 
 /* The argument structure */
 struct blkpg_ioctl_arg {
-        int op;
-        int flags;
-        int datalen;
-        void *data;
+       int op;
+       int flags;
+       int datalen;
+       void *data;
 };
 
 /* The subfunctions (for the op field) */
@@ -127,21 +128,21 @@ int parse_uuid(char *str, int uuid[4])
 
 int md_get_version(int fd)
 {
-    struct stat stb;
-    mdu_version_t vers;
+       struct stat stb;
+       mdu_version_t vers;
 
-    if (fstat(fd, &stb)<0)
-       return -1;
-    if ((S_IFMT&stb.st_mode) != S_IFBLK)
-       return -1;
+       if (fstat(fd, &stb)<0)
+               return -1;
+       if ((S_IFMT&stb.st_mode) != S_IFBLK)
+               return -1;
 
-    if (ioctl(fd, RAID_VERSION, &vers) == 0)
-       return  (vers.major*10000) + (vers.minor*100) + vers.patchlevel;
-    if (errno == EACCES)
-           return -1;
-    if (major(stb.st_rdev) == MD_MAJOR)
-       return (3600);
-    return -1;
+       if (ioctl(fd, RAID_VERSION, &vers) == 0)
+               return  (vers.major*10000) + (vers.minor*100) + vers.patchlevel;
+       if (errno == EACCES)
+               return -1;
+       if (major(stb.st_rdev) == MD_MAJOR)
+               return (3600);
+       return -1;
 }
 
 int get_linux_version()
@@ -188,13 +189,13 @@ int mdadm_version(char *version)
        return (a*1000000)+(b*1000)+c;
 }
 
-long long parse_size(char *size)
+unsigned long long parse_size(char *size)
 {
        /* parse 'size' which should be a number optionally
         * followed by 'K', 'M', or 'G'.
         * Without a suffix, K is assumed.
         * Number returned is in sectors (half-K)
-        * -1 returned on error.
+        * INVALID_SECTORS returned on error.
         */
        char *c;
        long long s = strtoll(size, &c, 10);
@@ -213,10 +214,14 @@ long long parse_size(char *size)
                        c++;
                        s *= 1024 * 1024 * 2;
                        break;
+               case 's': /* sectors */
+                       c++;
+                       break;
                }
-       }
+       } else
+               s = INVALID_SECTORS;
        if (*c)
-               s = -1;
+               s = INVALID_SECTORS;
        return s;
 }
 
@@ -302,7 +307,7 @@ int test_partition(int fd)
        if (ioctl(fd, BLKPG, &a) == 0)
                /* Very unlikely, but not a partition */
                return 0;
-       if (errno == ENXIO)
+       if (errno == ENXIO || errno == ENOTTY)
                /* not a partition */
                return 0;
 
@@ -343,14 +348,15 @@ int enough(int level, int raid_disks, int layout, int clean, char *avail)
                        /* there must be one of the 'copies' form 'first' */
                        int n = copies;
                        int cnt = 0;
+                       int this = first;
                        while (n--) {
-                               if (avail[first])
+                               if (avail[this])
                                        cnt++;
-                               first = (first+1) % raid_disks;
+                               this = (this+1) % raid_disks;
                        }
                        if (cnt == 0)
                                return 0;
-
+                       first = (first+(layout&255)) % raid_disks;
                } while (first != 0);
                return 1;
 
@@ -381,7 +387,6 @@ int enough_fd(int fd)
 {
        struct mdu_array_info_s array;
        struct mdu_disk_info_s disk;
-       int avail_disks = 0;
        int i, rv;
        char *avail;
 
@@ -401,7 +406,6 @@ int enough_fd(int fd)
                        continue;
                if (disk.raid_disk < 0 || disk.raid_disk >= array.raid_disks)
                        continue;
-               avail_disks++;
                avail[disk.raid_disk] = 1;
        }
        /* This is used on an active array, so assume it is clean */
@@ -505,7 +509,8 @@ int check_ext2(int fd, char *name)
         */
        unsigned char sb[1024];
        time_t mtime;
-       int size, bsize;
+       unsigned long long size;
+       int bsize;
        if (lseek(fd, 1024,0)!= 1024)
                return 0;
        if (read(fd, sb, 1024)!= 1024)
@@ -516,10 +521,10 @@ int check_ext2(int fd, char *name)
        mtime = sb[44]|(sb[45]|(sb[46]|sb[47]<<8)<<8)<<8;
        bsize = sb[24]|(sb[25]|(sb[26]|sb[27]<<8)<<8)<<8;
        size = sb[4]|(sb[5]|(sb[6]|sb[7]<<8)<<8)<<8;
+       size <<= bsize;
        pr_err("%s appears to contain an ext2fs file system\n",
                name);
-       fprintf(stderr,"    size=%dK  mtime=%s",
-               size*(1<<bsize), ctime(&mtime));
+       cont_err("size=%lluK  mtime=%s", size, ctime(&mtime));
        return 1;
 }
 
@@ -532,7 +537,7 @@ int check_reiser(int fd, char *name)
         *
         */
        unsigned char sb[1024];
-       unsigned long size;
+       unsigned long long size;
        if (lseek(fd, 64*1024, 0) != 64*1024)
                return 0;
        if (read(fd, sb, 1024) != 1024)
@@ -542,7 +547,7 @@ int check_reiser(int fd, char *name)
                return 0;
        pr_err("%s appears to contain a reiserfs file system\n",name);
        size = sb[0]|(sb[1]|(sb[2]|sb[3]<<8)<<8)<<8;
-       fprintf(stderr, "    size = %luK\n", size*4);
+       cont_err("size = %lluK\n", size*4);
 
        return 1;
 }
@@ -554,8 +559,8 @@ int check_raid(int fd, char *name)
        char *level;
        struct supertype *st = guess_super(fd);
 
-       if (!st) return 0;
-       st->ignore_hw_compat = 1;
+       if (!st)
+               return 0;
        st->ss->load_super(st, fd, name);
        /* Looks like a raid array .. */
        pr_err("%s appears to be part of a raid array:\n",
@@ -565,8 +570,8 @@ int check_raid(int fd, char *name)
        crtime = info.array.ctime;
        level = map_num(pers, info.array.level);
        if (!level) level = "-unknown-";
-       fprintf(stderr, "    level=%s devices=%d ctime=%s",
-               level, info.array.raid_disks, ctime(&crtime));
+       cont_err("level=%s devices=%d ctime=%s",
+                level, info.array.raid_disks, ctime(&crtime));
        return 1;
 }
 
@@ -666,13 +671,13 @@ char *human_size(long long bytes)
        if (bytes < 5000*1024)
                buf[0] = 0;
        else if (bytes < 2*1024LL*1024LL*1024LL) {
-               long cMiB = (bytes / ( (1LL<<20) / 200LL ) +1) /2;
+               long cMiB = (bytes * 200LL / (1LL<<20) + 1) / 2;
                long cMB  = (bytes / ( 1000000LL / 200LL ) +1) /2;
                snprintf(buf, sizeof(buf), " (%ld.%02ld MiB %ld.%02ld MB)",
                        cMiB/100 , cMiB % 100,
                        cMB/100, cMB % 100);
        } else {
-               long cGiB = (bytes / ( (1LL<<30) / 200LL ) +1) /2;
+               long cGiB = (bytes * 200LL / (1LL<<30) +1) / 2;
                long cGB  = (bytes / (1000000000LL/200LL ) +1) /2;
                snprintf(buf, sizeof(buf), " (%ld.%02ld GiB %ld.%02ld GB)",
                        cGiB/100 , cGiB % 100,
@@ -681,24 +686,49 @@ char *human_size(long long bytes)
        return buf;
 }
 
-char *human_size_brief(long long bytes)
+char *human_size_brief(long long bytes, int prefix)
 {
        static char buf[30];
 
+       /* We convert bytes to either centi-M{ega,ibi}bytes or
+        * centi-G{igi,ibi}bytes, with appropriate rounding,
+        * and then print 1/100th of those as a decimal.
+        * We allow upto 2048Megabytes before converting to
+        * gigabytes, as that shows more precision and isn't
+        * too large a number.
+        * Terabytes are not yet handled.
+        *
+        * If prefix == IEC, we mean prefixes like kibi,mebi,gibi etc.
+        * If prefix == JEDEC, we mean prefixes like kilo,mega,giga etc.
+        */
+
        if (bytes < 5000*1024)
-               snprintf(buf, sizeof(buf), "%ld.%02ldKiB",
-                       (long)(bytes>>10), (long)(((bytes&1023)*100+512)/1024)
-                       );
-       else if (bytes < 2*1024LL*1024LL*1024LL)
-               snprintf(buf, sizeof(buf), "%ld.%02ldMiB",
-                       (long)(bytes>>20),
-                       (long)((bytes&0xfffff)+0x100000/200)/(0x100000/100)
-                       );
+               buf[0] = 0;
+       else if (prefix == IEC) {
+               if (bytes < 2*1024LL*1024LL*1024LL) {
+                       long cMiB = (bytes * 200LL / (1LL<<20) +1) /2;
+                       snprintf(buf, sizeof(buf), "%ld.%02ldMiB",
+                               cMiB/100 , cMiB % 100);
+               } else {
+                       long cGiB = (bytes * 200LL / (1LL<<30) +1) /2;
+                       snprintf(buf, sizeof(buf), "%ld.%02ldGiB",
+                                       cGiB/100 , cGiB % 100);
+               }
+       }
+       else if (prefix == JEDEC) {
+               if (bytes < 2*1024LL*1024LL*1024LL) {
+                       long cMB  = (bytes / ( 1000000LL / 200LL ) +1) /2;
+                       snprintf(buf, sizeof(buf), "%ld.%02ldMB",
+                                       cMB/100, cMB % 100);
+               } else {
+                       long cGB  = (bytes / (1000000000LL/200LL ) +1) /2;
+                       snprintf(buf, sizeof(buf), "%ld.%02ldGB",
+                                       cGB/100 , cGB % 100);
+               }
+       }
        else
-               snprintf(buf, sizeof(buf), "%ld.%02ldGiB",
-                       (long)(bytes>>30),
-                       (long)(((bytes>>10)&0xfffff)+0x100000/200)/(0x100000/100)
-                       );
+               buf[0] = 0;
+
        return buf;
 }
 
@@ -749,43 +779,79 @@ int get_data_disks(int level, int layout, int raid_disks)
        return data_disks;
 }
 
+int devnm2devid(char *devnm)
+{
+       /* First look in /sys/block/$DEVNM/dev for %d:%d
+        * If that fails, try parsing out a number
+        */
+       char path[100];
+       char *ep;
+       int fd;
+       int mjr,mnr;
+
+       sprintf(path, "/sys/block/%s/dev", devnm);
+       fd = open(path, O_RDONLY);
+       if (fd >= 0) {
+               char buf[20];
+               int n = read(fd, buf, sizeof(buf));
+               close(fd);
+               if (n > 0)
+                       buf[n] = 0;
+               if (n > 0 && sscanf(buf, "%d:%d\n", &mjr, &mnr) == 2)
+                       return makedev(mjr, mnr);
+       }
+       if (strncmp(devnm, "md_d", 4) == 0 &&
+           isdigit(devnm[4]) &&
+           (mnr = strtoul(devnm+4, &ep, 10)) >= 0 &&
+           ep > devnm && *ep == 0)
+               return makedev(get_mdp_major(), mnr << MdpMinorShift);
+
+       if (strncmp(devnm, "md", 2) == 0 &&
+           isdigit(devnm[2]) &&
+           (mnr = strtoul(devnm+2, &ep, 10)) >= 0 &&
+           ep > devnm && *ep == 0)
+               return makedev(MD_MAJOR, mnr);
+
+       return 0;
+}
+
 #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
-char *get_md_name(int dev)
+char *get_md_name(char *devnm)
 {
        /* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */
        /* if dev < 0, want /dev/md/d%d or find mdp in /proc/devices ... */
+
        static char devname[50];
        struct stat stb;
-       dev_t rdev;
+       dev_t rdev = devnm2devid(devnm);
        char *dn;
 
-       if (dev < 0) {
-               int mdp =  get_mdp_major();
-               if (mdp < 0) return NULL;
-               rdev = makedev(mdp, (-1-dev)<<6);
-               snprintf(devname, sizeof(devname), "/dev/md/d%d", -1-dev);
-               if (stat(devname, &stb) == 0
-                   && (S_IFMT&stb.st_mode) == S_IFBLK
-                   && (stb.st_rdev == rdev))
-                       return devname;
-       } else {
-               rdev = makedev(MD_MAJOR, dev);
-               snprintf(devname, sizeof(devname), "/dev/md%d", dev);
-               if (stat(devname, &stb) == 0
-                   && (S_IFMT&stb.st_mode) == S_IFBLK
-                   && (stb.st_rdev == rdev))
-                       return devname;
-
-               snprintf(devname, sizeof(devname), "/dev/md/%d", dev);
+       if (rdev == 0)
+               return 0;
+       if (strncmp(devnm, "md_", 3) == 0) {
+               snprintf(devname, sizeof(devname), "/dev/md/%s",
+                       devnm + 3);
                if (stat(devname, &stb) == 0
                    && (S_IFMT&stb.st_mode) == S_IFBLK
                    && (stb.st_rdev == rdev))
                        return devname;
        }
+       snprintf(devname, sizeof(devname), "/dev/%s", devnm);
+       if (stat(devname, &stb) == 0
+           && (S_IFMT&stb.st_mode) == S_IFBLK
+           && (stb.st_rdev == rdev))
+               return devname;
+
+       snprintf(devname, sizeof(devname), "/dev/md/%s", devnm+2);
+       if (stat(devname, &stb) == 0
+           && (S_IFMT&stb.st_mode) == S_IFBLK
+           && (stb.st_rdev == rdev))
+               return devname;
+
        dn = map_dev(major(rdev), minor(rdev), 0);
        if (dn)
                return dn;
-       snprintf(devname, sizeof(devname), "/dev/.tmp.md%d", dev);
+       snprintf(devname, sizeof(devname), "/dev/.tmp.%s", devnm);
        if (mknod(devname, S_IFBLK | 0600, rdev) == -1)
                if (errno != EEXIST)
                        return NULL;
@@ -803,41 +869,22 @@ void put_md_name(char *name)
        if (strncmp(name, "/dev/.tmp.md", 12) == 0)
                unlink(name);
 }
+#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
 
-int find_free_devnum(int use_partitions)
+int get_maj_min(char *dev, int *major, int *minor)
 {
-       int devnum;
-       for (devnum = 127; devnum != 128;
-            devnum = devnum ? devnum-1 : (1<<20)-1) {
-               char *dn;
-               int _devnum;
-               char nbuf[50];
-
-               _devnum = use_partitions ? (-1-devnum) : devnum;
-               if (mddev_busy(_devnum))
-                       continue;
-               sprintf(nbuf, "%s%d", use_partitions?"mdp":"md", devnum);
-               if (!conf_name_is_free(nbuf))
-                       continue;
-               /* make sure it is new to /dev too, at least as a
-                * non-standard */
-               dn = map_dev(dev2major(_devnum), dev2minor(_devnum), 0);
-               if (dn && ! is_standard(dn, NULL))
-                       continue;
-               break;
-       }
-       if (devnum == 128)
-               return NoMdDev;
-       return use_partitions ? (-1-devnum) : devnum;
+       char *e;
+       *major = strtoul(dev, &e, 0);
+       return (e > dev && *e == ':' && e[1] &&
+               (*minor = strtoul(e+1, &e, 0)) >= 0 &&
+               *e == 0);
 }
-#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
 
 int dev_open(char *dev, int flags)
 {
        /* like 'open', but if 'dev' matches %d:%d, create a temp
         * block device and open that
         */
-       char *e;
        int fd = -1;
        char devname[32];
        int major;
@@ -846,22 +893,15 @@ int dev_open(char *dev, int flags)
        if (!dev) return -1;
        flags |= O_DIRECT;
 
-       major = strtoul(dev, &e, 0);
-       if (e > dev && *e == ':' && e[1] &&
-           (minor = strtoul(e+1, &e, 0)) >= 0 &&
-           *e == 0) {
-               char *path = map_dev(major, minor, 0);
-               if (path)
-                       fd = open(path, flags);
-               if (fd < 0) {
-                       snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d",
-                                (int)getpid(), major, minor);
-                       if (mknod(devname, S_IFBLK|0600, makedev(major, minor)) == 0) {
-                               fd = open(devname, flags);
-                               unlink(devname);
-                       }
+       if (get_maj_min(dev, &major, &minor)) {
+               snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d",
+                        (int)getpid(), major, minor);
+               if (mknod(devname, S_IFBLK|0600, makedev(major, minor)) == 0) {
+                       fd = open(devname, flags);
+                       unlink(devname);
                }
                if (fd < 0) {
+                       /* Try /tmp as /dev appear to be read-only */
                        snprintf(devname, sizeof(devname), "/tmp/.tmp.md.%d:%d:%d",
                                 (int)getpid(), major, minor);
                        if (mknod(devname, S_IFBLK|0600, makedev(major, minor)) == 0) {
@@ -874,26 +914,30 @@ int dev_open(char *dev, int flags)
        return fd;
 }
 
-int open_dev_flags(int devnum, int flags)
+int open_dev_flags(char *devnm, int flags)
 {
+       int devid;
        char buf[20];
 
-       sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum));
+       devid = devnm2devid(devnm);
+       sprintf(buf, "%d:%d", major(devid), minor(devid));
        return dev_open(buf, flags);
 }
 
-int open_dev(int devnum)
+int open_dev(char *devnm)
 {
-       return open_dev_flags(devnum, O_RDONLY);
+       return open_dev_flags(devnm, O_RDONLY);
 }
 
-int open_dev_excl(int devnum)
+int open_dev_excl(char *devnm)
 {
        char buf[20];
        int i;
        int flags = O_RDWR;
+       int devid = devnm2devid(devnm);
+       long delay = 1000;
 
-       sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum));
+       sprintf(buf, "%d:%d", major(devid), minor(devid));
        for (i = 0 ; i < 25 ; i++) {
                int fd = dev_open(buf, flags|O_EXCL);
                if (fd >= 0)
@@ -904,7 +948,9 @@ int open_dev_excl(int devnum)
                }
                if (errno != EBUSY)
                        return fd;
-               usleep(200000);
+               usleep(delay);
+               if (delay < 200000)
+                       delay *= 2;
        }
        return -1;
 }
@@ -927,6 +973,7 @@ void wait_for(char *dev, int fd)
 {
        int i;
        struct stat stb_want;
+       long delay = 1000;
 
        if (fstat(fd, &stb_want) != 0 ||
            (stb_want.st_mode & S_IFMT) != S_IFBLK)
@@ -938,7 +985,9 @@ void wait_for(char *dev, int fd)
                    (stb.st_mode & S_IFMT) == S_IFBLK &&
                    (stb.st_rdev == stb_want.st_rdev))
                        return;
-               usleep(200000);
+               usleep(delay);
+               if (delay < 200000)
+                       delay *= 2;
        }
        if (i == 25)
                dprintf("%s: timeout waiting for %s\n", __func__, dev);
@@ -964,9 +1013,9 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
        char version[20];
        int i;
        char *subarray = NULL;
-       int container = NoMdDev;
+       char container[32] = "";
 
-       sra = sysfs_read(fd, 0, GET_VERSION);
+       sra = sysfs_read(fd, NULL, GET_VERSION);
 
        if (sra) {
                vers = sra->array.major_version;
@@ -992,7 +1041,7 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
                        *subarray++ = '\0';
                        subarray = xstrdup(subarray);
                }
-               container = devname2devnum(dev);
+               strcpy(container, dev);
                if (sra)
                        sysfs_free(sra);
                sra = sysfs_read(-1, container, GET_VERSION);
@@ -1011,8 +1060,8 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
                st->sb = NULL;
                if (subarrayp)
                        *subarrayp = subarray;
-               st->container_dev = container;
-               st->devnum = fd2devnum(fd);
+               strcpy(st->container_devnm, container);
+               strcpy(st->devnm, fd2devnm(fd));
        } else
                free(subarray);
 
@@ -1047,6 +1096,8 @@ struct supertype *dup_super(struct supertype *orig)
        st->ss = orig->ss;
        st->max_devs = orig->max_devs;
        st->minor_version = orig->minor_version;
+       st->ignore_hw_compat = orig->ignore_hw_compat;
+       st->data_offset = orig->data_offset;
        st->sb = NULL;
        st->info = NULL;
        return st;
@@ -1064,7 +1115,7 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type)
        int i;
 
        st = xcalloc(1, sizeof(*st));
-       st->container_dev = NoMdDev;
+       st->container_devnm[0] = 0;
 
        for (i = 0 ; superlist[i]; i++) {
                int rv;
@@ -1094,7 +1145,6 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type)
                rv = superlist[bestsuper]->load_super(st, fd, NULL);
                if (rv == 0) {
                        superlist[bestsuper]->free_super(st);
-                       st->ignore_hw_compat = 0;
                        return st;
                }
        }
@@ -1282,18 +1332,6 @@ int check_partitions(int fd, char *dname, unsigned long long freesize,
        return 0;
 }
 
-void get_one_disk(int mdfd, mdu_array_info_t *ainf, mdu_disk_info_t *disk)
-{
-       int d;
-
-       ioctl(mdfd, GET_ARRAY_INFO, ainf);
-       for (d = 0 ; d < MAX_DISKS ; d++) {
-               if (ioctl(mdfd, GET_DISK_INFO, disk) == 0 &&
-                   (disk->major || disk->minor))
-                       return;
-       }
-}
-
 int open_container(int fd)
 {
        /* 'fd' is a block device.  Find out if it is in use
@@ -1322,6 +1360,20 @@ int open_container(int fd)
                        continue;
                if (de->d_name[0] == '.')
                        continue;
+               /* Need to make sure it is a container and not a volume */
+               sprintf(e, "/%s/md/metadata_version", de->d_name);
+               dfd = open(path, O_RDONLY);
+               if (dfd < 0)
+                       continue;
+               n = read(dfd, buf, sizeof(buf));
+               close(dfd);
+               if (n <= 0 || (unsigned)n >= sizeof(buf))
+                       continue;
+               buf[n] = 0;
+               if (strncmp(buf, "external", 8) != 0 ||
+                   n < 10 ||
+                   buf[9] == '/')
+                       continue;
                sprintf(e, "/%s/dev", de->d_name);
                dfd = open(path, O_RDONLY);
                if (dfd < 0)
@@ -1358,13 +1410,47 @@ struct superswitch *version_to_superswitch(char *vers)
        return NULL;
 }
 
+int metadata_container_matches(char *metadata, char *devnm)
+{
+       /* Check if 'devnm' is the container named in 'metadata'
+        * which is
+        *   /containername/componentname  or
+        *   -containername/componentname
+        */
+       int l;
+       if (*metadata != '/' && *metadata != '-')
+               return 0;
+       l = strlen(devnm);
+       if (strncmp(metadata+1, devnm, l) != 0)
+               return 0;
+       if (metadata[l+1] != '/')
+               return 0;
+       return 1;
+}
+
+int metadata_subdev_matches(char *metadata, char *devnm)
+{
+       /* Check if 'devnm' is the subdev named in 'metadata'
+        * which is
+        *   /containername/subdev  or
+        *   -containername/subdev
+        */
+       char *sl;
+       if (*metadata != '/' && *metadata != '-')
+               return 0;
+       sl = strchr(metadata+1, '/');
+       if (!sl)
+               return 0;
+       if (strcmp(sl+1, devnm) == 0)
+               return 1;
+       return 0;
+}
+
 int is_container_member(struct mdstat_ent *mdstat, char *container)
 {
        if (mdstat->metadata_version == NULL ||
            strncmp(mdstat->metadata_version, "external:", 9) != 0 ||
-           !is_subarray(mdstat->metadata_version+9) ||
-           strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 ||
-           mdstat->metadata_version[10+strlen(container)] != '/')
+           !metadata_container_matches(mdstat->metadata_version+9, container))
                return 0;
 
        return 1;
@@ -1397,6 +1483,7 @@ int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet)
        struct mdinfo *mdi;
        struct mdinfo *info;
        int fd, err = 1;
+       char *_devnm;
 
        fd = open(dev, O_RDWR|O_EXCL);
        if (fd < 0) {
@@ -1406,15 +1493,16 @@ int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet)
                return -1;
        }
 
-       st->devnum = fd2devnum(fd);
-       if (st->devnum == NoMdDev) {
+       _devnm = fd2devnm(fd);
+       if (_devnm == NULL) {
                if (!quiet)
                        pr_err("Failed to determine device number for %s\n",
                               dev);
                goto close_fd;
        }
+       strcpy(st->devnm, _devnm);
 
-       mdi = sysfs_read(fd, st->devnum, GET_VERSION|GET_LEVEL);
+       mdi = sysfs_read(fd, st->devnm, GET_VERSION|GET_LEVEL);
        if (!mdi) {
                if (!quiet)
                        pr_err("Failed to read sysfs for %s\n",
@@ -1436,8 +1524,7 @@ int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet)
                goto free_sysfs;
        }
 
-       st->devname = devnum2devname(st->devnum);
-       if (!st->devname) {
+       if (st->devnm[0] == 0) {
                if (!quiet)
                        pr_err("Failed to allocate device name\n");
                goto free_sysfs;
@@ -1446,14 +1533,14 @@ int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet)
        if (!st->ss->load_container) {
                if (!quiet)
                        pr_err("%s is not a container\n", dev);
-               goto free_name;
+               goto free_sysfs;
        }
 
        if (st->ss->load_container(st, fd, NULL)) {
                if (!quiet)
                        pr_err("Failed to load metadata for %s\n",
                                dev);
-               goto free_name;
+               goto free_sysfs;
        }
 
        info = st->ss->container_content(st, subarray);
@@ -1470,9 +1557,6 @@ int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet)
  free_super:
        if (err)
                st->ss->free_super(st);
- free_name:
-       if (err)
-               free(st->devname);
  free_sysfs:
        sysfs_free(mdi);
  close_fd:
@@ -1570,16 +1654,14 @@ unsigned long long min_recovery_start(struct mdinfo *array)
        return recovery_start;
 }
 
-int mdmon_pid(int devnum)
+int mdmon_pid(char *devnm)
 {
        char path[100];
        char pid[10];
        int fd;
        int n;
-       char *devname = devnum2devname(devnum);
 
-       sprintf(path, "%s/%s.pid", MDMON_DIR, devname);
-       free(devname);
+       sprintf(path, "%s/%s.pid", MDMON_DIR, devnm);
 
        fd = open(path, O_RDONLY | O_NOATIME, 0);
 
@@ -1592,9 +1674,9 @@ int mdmon_pid(int devnum)
        return atoi(pid);
 }
 
-int mdmon_running(int devnum)
+int mdmon_running(char *devnm)
 {
-       int pid = mdmon_pid(devnum);
+       int pid = mdmon_pid(devnm);
        if (pid <= 0)
                return 0;
        if (kill(pid, 0) == 0)
@@ -1602,7 +1684,7 @@ int mdmon_running(int devnum)
        return 0;
 }
 
-int start_mdmon(int devnum)
+int start_mdmon(char *devnm)
 {
        int i, skipped;
        int len;
@@ -1611,8 +1693,8 @@ int start_mdmon(int devnum)
        char pathbuf[1024];
        char *paths[4] = {
                pathbuf,
-               "/sbin/mdmon",
-               "mdmon",
+               BINDIR "/mdmon",
+               "./mdmon",
                NULL
        };
 
@@ -1632,6 +1714,42 @@ int start_mdmon(int devnum)
        } else
                pathbuf[0] = '\0';
 
+       /* First try to run systemctl */
+       if (!check_env("MDADM_NO_SYSTEMCTL"))
+               switch(fork()) {
+               case 0:
+                       /* FIXME yuk. CLOSE_EXEC?? */
+                       skipped = 0;
+                       for (i = 3; skipped < 20; i++)
+                               if (close(i) < 0)
+                                       skipped++;
+                               else
+                                       skipped = 0;
+
+                       /* Don't want to see error messages from
+                        * systemctl.  If the service doesn't exist,
+                        * we start mdmon ourselves.
+                        */
+                       close(2);
+                       open("/dev/null", O_WRONLY);
+                       snprintf(pathbuf, sizeof(pathbuf), "mdmon@%s.service",
+                                devnm);
+                       status = execl("/usr/bin/systemctl", "systemctl",
+                                      "start",
+                                      pathbuf, NULL);
+                       status = execl("/bin/systemctl", "systemctl", "start",
+                                      pathbuf, NULL);
+                       exit(1);
+               case -1: pr_err("cannot run mdmon. "
+                               "Array remains readonly\n");
+                       return -1;
+               default: /* parent - good */
+                       pid = wait(&status);
+                       if (pid >= 0 && status == 0)
+                               return 0;
+               }
+
+       /* That failed, try running mdmon directly */
        switch(fork()) {
        case 0:
                /* FIXME yuk. CLOSE_EXEC?? */
@@ -1644,15 +1762,8 @@ int start_mdmon(int devnum)
 
                for (i = 0; paths[i]; i++)
                        if (paths[i][0]) {
-                               if (__offroot) {
-                                       execl(paths[i], "mdmon", "--offroot",
-                                             devnum2devname(devnum),
-                                             NULL);
-                               } else {
-                                       execl(paths[i], "mdmon",
-                                             devnum2devname(devnum),
-                                             NULL);
-                               }
+                               execl(paths[i], paths[i],
+                                     devnm, NULL);
                        }
                exit(1);
        case -1: pr_err("cannot run mdmon. "
@@ -1660,22 +1771,15 @@ int start_mdmon(int devnum)
                return -1;
        default: /* parent - good */
                pid = wait(&status);
-               if (pid < 0 || status != 0)
+               if (pid < 0 || status != 0) {
+                       pr_err("failed to launch mdmon. "
+                              "Array remains readonly\n");
                        return -1;
+               }
        }
        return 0;
 }
 
-int check_env(char *name)
-{
-       char *val = getenv(name);
-
-       if (val && atoi(val) == 1)
-               return 1;
-
-       return 0;
-}
-
 __u32 random32(void)
 {
        __u32 rv;
@@ -1696,7 +1800,7 @@ int flush_metadata_updates(struct supertype *st)
                return -1;
        }
 
-       sfd = connect_monitor(devnum2devname(st->container_dev));
+       sfd = connect_monitor(st->container_devnm);
        if (sfd < 0)
                return -1;
 
@@ -1783,7 +1887,7 @@ struct mdinfo *container_choose_spares(struct supertype *st,
                                found = 1;
                        /* check if domain matches */
                        if (found && domlist) {
-                               struct dev_policy *pol = devnum_policy(dev);
+                               struct dev_policy *pol = devid_policy(dev);
                                if (spare_group)
                                        pol_add(&pol, pol_domain,
                                                spare_group, NULL);
@@ -1807,3 +1911,65 @@ struct mdinfo *container_choose_spares(struct supertype *st,
        }
        return disks;
 }
+
+/* Checks if paths point to the same device
+ * Returns 0 if they do.
+ * Returns 1 if they don't.
+ * Returns -1 if something went wrong,
+ * e.g. paths are empty or the files
+ * they point to don't exist */
+int compare_paths (char* path1, char* path2)
+{
+       struct stat st1,st2;
+
+       if (path1 == NULL || path2 == NULL)
+               return -1;
+       if (stat(path1,&st1) != 0)
+               return -1;
+       if (stat(path2,&st2) != 0)
+               return -1;
+       if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev))
+               return 0;
+       return 1;
+}
+
+/* Make sure we can open as many devices as needed */
+void enable_fds(int devices)
+{
+       unsigned int fds = 20 + devices;
+       struct rlimit lim;
+       if (getrlimit(RLIMIT_NOFILE, &lim) != 0
+           || lim.rlim_cur >= fds)
+               return;
+       if (lim.rlim_max < fds)
+               lim.rlim_max = fds;
+       lim.rlim_cur = fds;
+       setrlimit(RLIMIT_NOFILE, &lim);
+}
+
+int in_initrd(void)
+{
+       /* This is based on similar function in systemd. */
+       struct statfs s;
+       /* statfs.f_type is signed long on s390x and MIPS, causing all
+          sorts of sign extension problems with RAMFS_MAGIC being
+          defined as 0x858458f6 */
+       return  statfs("/", &s) >= 0 &&
+               ((unsigned long)s.f_type == TMPFS_MAGIC ||
+                ((unsigned long)s.f_type & 0xFFFFFFFFUL) ==
+                ((unsigned long)RAMFS_MAGIC & 0xFFFFFFFFUL));
+}
+
+void reopen_mddev(int mdfd)
+{
+       /* Re-open without any O_EXCL, but keep
+        * the same fd
+        */
+       char *devnm;
+       int fd;
+       devnm = fd2devnm(mdfd);
+       close(mdfd);
+       fd = open_dev(devnm);
+       if (fd >= 0 && fd != mdfd)
+               dup2(fd, mdfd);
+}