]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - util.c
Create: support --readonly flag.
[thirdparty/mdadm.git] / util.c
diff --git a/util.c b/util.c
index 8e4e60d3bc8ebee09a6a212d615f61eeb2c77c21..d0765a6dabad4c678fc9ec7ca7dfc069d28802b6 100644 (file)
--- a/util.c
+++ b/util.c
@@ -1,7 +1,7 @@
 /*
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2001-2006 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
  *    Author: Neil Brown
- *    Email: <neilb@cse.unsw.edu.au>
- *    Paper: Neil Brown
- *           School of Computer Science and Engineering
- *           The University of New South Wales
- *           Sydney, 2052
- *           Australia
+ *    Email: <neilb@suse.de>
  */
 
 #include       "mdadm.h"
 #include       "md_p.h"
+#include       <sys/socket.h>
 #include       <sys/utsname.h>
+#include       <sys/wait.h>
+#include       <sys/un.h>
 #include       <ctype.h>
 #include       <dirent.h>
+#include       <signal.h>
+
+int __offroot;
 
 /*
  * following taken from linux/blkpg.h because they aren't
@@ -66,6 +67,17 @@ struct blkpg_partition {
        char volname[BLKPG_VOLNAMELTH]; /* volume label */
 };
 
+#include "part.h"
+
+/* Force a compilation error if condition is true */
+#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
+
+/* Force a compilation error if condition is true, but also produce a
+   result (of value 0 and type size_t), so the expression can be used
+   e.g. in a structure initializer (or where-ever else comma expressions
+   aren't permitted). */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+
 /*
  * Parse a 128 bit uuid in 4 integers
  * format is 32 hexx nibbles with options :.<space> separator
@@ -136,20 +148,113 @@ int get_linux_version()
 {
        struct utsname name;
        char *cp;
-       int a,b,c;
+       int a = 0, b = 0,c = 0;
        if (uname(&name) <0)
                return -1;
 
        cp = name.release;
        a = strtoul(cp, &cp, 10);
-       if (*cp != '.') return -1;
-       b = strtoul(cp+1, &cp, 10);
-       if (*cp != '.') return -1;
-       c = strtoul(cp+1, NULL, 10);
+       if (*cp == '.')
+               b = strtoul(cp+1, &cp, 10);
+       if (*cp == '.')
+               c = strtoul(cp+1, &cp, 10);
+
+       return (a*1000000)+(b*1000)+c;
+}
+
+#ifndef MDASSEMBLE
+int mdadm_version(char *version)
+{
+       int a, b, c;
+       char *cp;
+
+       if (!version)
+               version = Version;
 
+       cp = strchr(version, '-');
+       if (!cp || *(cp+1) != ' ' || *(cp+2) != 'v')
+               return -1;
+       cp += 3;
+       a = strtoul(cp, &cp, 10);
+       if (*cp != '.')
+               return -1;
+       b = strtoul(cp+1, &cp, 10);
+       if (*cp == '.')
+               c = strtoul(cp+1, &cp, 10);
+       else
+               c = 0;
+       if (*cp != ' ' && *cp != '-')
+               return -1;
        return (a*1000000)+(b*1000)+c;
 }
 
+long long parse_size(char *size)
+{
+       /* parse 'size' which should be a number optionally
+        * followed by 'K', 'M', or 'G'.
+        * Without a suffix, K is assumed.
+        * Number returned is in sectors (half-K)
+        * -1 returned on error.
+        */
+       char *c;
+       long long s = strtoll(size, &c, 10);
+       if (s > 0) {
+               switch (*c) {
+               case 'K':
+                       c++;
+               default:
+                       s *= 2;
+                       break;
+               case 'M':
+                       c++;
+                       s *= 1024 * 2;
+                       break;
+               case 'G':
+                       c++;
+                       s *= 1024 * 1024 * 2;
+                       break;
+               }
+       }
+       if (*c)
+               s = -1;
+       return s;
+}
+
+int parse_layout_10(char *layout)
+{
+       int copies, rv;
+       char *cp;
+       /* Parse the layout string for raid10 */
+       /* 'f', 'o' or 'n' followed by a number <= raid_disks */
+       if ((layout[0] !=  'n' && layout[0] != 'f' && layout[0] != 'o') ||
+           (copies = strtoul(layout+1, &cp, 10)) < 1 ||
+           copies > 200 ||
+           *cp)
+               return -1;
+       if (layout[0] == 'n')
+               rv = 256 + copies;
+       else if (layout[0] == 'o')
+               rv = 0x10000 + (copies<<8) + 1;
+       else
+               rv = 1 + (copies<<8);
+       return rv;
+}
+
+int parse_layout_faulty(char *layout)
+{
+       /* Parse the layout string for 'faulty' */
+       int ln = strcspn(layout, "0123456789");
+       char *m = xstrdup(layout);
+       int mode;
+       m[ln] = 0;
+       mode = map_name(faultylayout, m);
+       if (mode == UnSet)
+               return -1;
+
+       return mode | (atoi(layout+ln)<< ModeShift);
+}
+#endif
+
 void remove_partitions(int fd)
 {
        /* remove partitions from this block devices.
@@ -169,10 +274,53 @@ void remove_partitions(int fd)
 #endif
 }
 
-int enough(int level, int raid_disks, int layout, int clean,
-          char *avail, int avail_disks)
+int test_partition(int fd)
+{
+       /* Check if fd is a whole-disk or a partition.
+        * BLKPG will return EINVAL on a partition, and BLKPG_DEL_PARTITION
+        * will return ENXIO on an invalid partition number.
+        */
+       struct blkpg_ioctl_arg a;
+       struct blkpg_partition p;
+       a.op = BLKPG_DEL_PARTITION;
+       a.data = (void*)&p;
+       a.datalen = sizeof(p);
+       a.flags = 0;
+       memset(a.data, 0, a.datalen);
+       p.pno = 1<<30;
+       if (ioctl(fd, BLKPG, &a) == 0)
+               /* Very unlikely, but not a partition */
+               return 0;
+       if (errno == ENXIO)
+               /* not a partition */
+               return 0;
+
+       return 1;
+}
+
+int test_partition_from_id(dev_t id)
+{
+       char buf[20];
+       int fd, rv;
+
+       sprintf(buf, "%d:%d", major(id), minor(id));
+       fd = dev_open(buf, O_RDONLY);
+       if (fd < 0)
+               return -1;
+       rv = test_partition(fd);
+       close(fd);
+       return rv;
+}
+
+int enough(int level, int raid_disks, int layout, int clean, char *avail)
 {
        int copies, first;
+       int i;
+       int avail_disks = 0;
+
+       for (i = 0; i < raid_disks; i++)
+               avail_disks += !!avail[i];
+
        switch (level) {
        case 10:
                /* This is the tricky one - we need to check
@@ -195,9 +343,9 @@ int enough(int level, int raid_disks, int layout, int clean,
                } while (first != 0);
                return 1;
 
-       case -4:
+       case LEVEL_MULTIPATH:
                return avail_disks>= 1;
-       case -1:
+       case LEVEL_LINEAR:
        case 0:
                return avail_disks == raid_disks;
        case 1:
@@ -218,6 +366,43 @@ int enough(int level, int raid_disks, int layout, int clean,
        }
 }
 
+int enough_fd(int fd)
+{
+       struct mdu_array_info_s array;
+       struct mdu_disk_info_s disk;
+       int avail_disks = 0;
+       int i, rv;
+       char *avail;
+
+       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0 ||
+           array.raid_disks <= 0)
+               return 0;
+       avail = xcalloc(array.raid_disks, 1);
+       for (i=0; i < MAX_DISKS && array.nr_disks > 0; i++) {
+               disk.number = i;
+               if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+                       continue;
+               if (disk.major == 0 && disk.minor == 0)
+                       continue;
+               array.nr_disks--;
+
+               if (! (disk.state & (1<<MD_DISK_SYNC)))
+                       continue;
+               if (disk.raid_disk < 0 || disk.raid_disk >= array.raid_disks)
+                       continue;
+               avail_disks++;
+               avail[disk.raid_disk] = 1;
+       }
+       /* This is used on an active array, so assume it is clean */
+       rv = enough(array.level, array.raid_disks, array.layout,
+                   1, avail);
+       free(avail);
+       return rv;
+}
+
+
+const int uuid_zero[4] = { 0, 0, 0, 0 };
+
 int same_uuid(int a[4], int b[4], int swapuuid)
 {
        if (swapuuid) {
@@ -265,6 +450,36 @@ void copy_uuid(void *a, int b[4], int swapuuid)
                memcpy(a, b, 16);
 }
 
+char *__fname_from_uuid(int id[4], int swap, char *buf, char sep)
+{
+       int i, j;
+       char uuid[16];
+       char *c = buf;
+       strcpy(c, "UUID-");
+       c += strlen(c);
+       copy_uuid(uuid, id, swap);
+       for (i = 0; i < 4; i++) {
+               if (i)
+                       *c++ = sep;
+               for (j = 3; j >= 0; j--) {
+                       sprintf(c,"%02x", (unsigned char) uuid[j+4*i]);
+                       c+= 2;
+               }
+       }
+       return buf;
+
+}
+
+char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char sep)
+{
+       // dirty hack to work around an issue with super1 superblocks...
+       // super1 superblocks need swapuuid set in order for assembly to
+       // work, but can't have it set if we want this printout to match
+       // all the other uuid printouts in super1.c, so we force swapuuid
+       // to 1 to make our printout match the rest of super1
+       return __fname_from_uuid(info->uuid, (st->ss == &super1) ? 1 : st->ss->swapuuid, buf, sep);
+}
+
 #ifndef MDASSEMBLE
 int check_ext2(int fd, char *name)
 {
@@ -290,7 +505,7 @@ int check_ext2(int fd, char *name)
        mtime = sb[44]|(sb[45]|(sb[46]|sb[47]<<8)<<8)<<8;
        bsize = sb[24]|(sb[25]|(sb[26]|sb[27]<<8)<<8)<<8;
        size = sb[4]|(sb[5]|(sb[6]|sb[7]<<8)<<8)<<8;
-       fprintf(stderr, Name ": %s appears to contain an ext2fs file system\n",
+       pr_err("%s appears to contain an ext2fs file system\n",
                name);
        fprintf(stderr,"    size=%dK  mtime=%s",
                size*(1<<bsize), ctime(&mtime));
@@ -314,7 +529,7 @@ int check_reiser(int fd, char *name)
        if (strncmp((char*)sb+52, "ReIsErFs",8)!=0 &&
            strncmp((char*)sb+52, "ReIsEr2Fs",9)!=0)
                return 0;
-       fprintf(stderr, Name ": %s appears to contain a reiserfs file system\n",name);
+       pr_err("%s appears to contain a reiserfs file system\n",name);
        size = sb[0]|(sb[1]|(sb[2]|sb[3]<<8)<<8)<<8;
        fprintf(stderr, "    size = %luK\n", size*4);
 
@@ -329,11 +544,12 @@ int check_raid(int fd, char *name)
        struct supertype *st = guess_super(fd);
 
        if (!st) return 0;
+       st->ignore_hw_compat = 1;
        st->ss->load_super(st, fd, name);
        /* Looks like a raid array .. */
-       fprintf(stderr, Name ": %s appears to be part of a raid array:\n",
+       pr_err("%s appears to be part of a raid array:\n",
                name);
-       st->ss->getinfo_super(st, &info);
+       st->ss->getinfo_super(st, &info, NULL);
        st->ss->free_super(st);
        crtime = info.array.ctime;
        level = map_num(pers, info.array.level);
@@ -359,32 +575,11 @@ int ask(char *mesg)
                        return 0;
                add = "(y/n) ";
        }
-       fprintf(stderr, Name ": assuming 'no'\n");
+       pr_err("assuming 'no'\n");
        return 0;
 }
 #endif /* MDASSEMBLE */
 
-char *map_num(mapping_t *map, int num)
-{
-       while (map->name) {
-               if (map->num == num)
-                       return map->name;
-               map++;
-       }
-       return NULL;
-}
-
-int map_name(mapping_t *map, char *name)
-{
-       while (map->name) {
-               if (strcmp(map->name, name)==0)
-                       return map->num;
-               map++;
-       }
-       return UnSet;
-}
-
-
 int is_standard(char *dev, int *nump)
 {
        /* tests if dev is a "standard" md dev name.
@@ -402,7 +597,7 @@ int is_standard(char *dev, int *nump)
        if (strncmp(d, "/d",2)==0)
                d += 2, type=1; /* /dev/md/dN{pM} */
        else if (strncmp(d, "/md_d", 5)==0)
-               d += 5, type=1; /* /dev/md_dNpM */
+               d += 5, type=1; /* /dev/md_dN{pM} */
        else if (strncmp(d, "/md", 3)==0)
                d += 3, type=-1; /* /dev/mdN */
        else if (d-dev > 3 && strncmp(d-2, "md/", 3)==0)
@@ -421,121 +616,6 @@ int is_standard(char *dev, int *nump)
        return type;
 }
 
-
-/*
- * convert a major/minor pair for a block device into a name in /dev, if possible.
- * On the first call, walk /dev collecting name.
- * Put them in a simple linked listfor now.
- */
-struct devmap {
-    int major, minor;
-    char *name;
-    struct devmap *next;
-} *devlist = NULL;
-int devlist_ready = 0;
-
-int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s)
-{
-       struct stat st;
-       if (S_ISLNK(stb->st_mode)) {
-               stat(name, &st);
-               stb = &st;
-       }
-
-       if ((stb->st_mode&S_IFMT)== S_IFBLK) {
-               char *n = strdup(name);
-               struct devmap *dm = malloc(sizeof(*dm));
-               if (strncmp(n, "/dev/./", 7)==0)
-                       strcpy(n+4, name+6);
-               if (dm) {
-                       dm->major = major(stb->st_rdev);
-                       dm->minor = minor(stb->st_rdev);
-                       dm->name = n;
-                       dm->next = devlist;
-                       devlist = dm;
-               }
-       }
-       return 0;
-}
-
-#ifndef HAVE_NFTW
-#ifdef HAVE_FTW
-int add_dev_1(const char *name, const struct stat *stb, int flag)
-{
-       return add_dev(name, stb, flag, NULL);
-}
-int nftw(const char *path, int (*han)(const char *name, const struct stat *stb, int flag, struct FTW *s), int nopenfd, int flags)
-{
-       return ftw(path, add_dev_1, nopenfd);
-}
-#else
-int nftw(const char *path, int (*han)(const char *name, const struct stat *stb, int flag, struct FTW *s), int nopenfd, int flags)
-{
-       return 0;
-}
-#endif /* HAVE_FTW */
-#endif /* HAVE_NFTW */
-
-/*
- * Find a block device with the right major/minor number.
- * If we find multiple names, choose the shortest.
- * If we find a non-standard name, it is probably there
- * deliberately so prefer it over a standard name.
- * This applies only to names for MD devices.
- */
-char *map_dev(int major, int minor, int create)
-{
-       struct devmap *p;
-       char *std = NULL, *nonstd=NULL;
-       int did_check = 0;
-
-       if (major == 0 && minor == 0)
-                       return NULL;
-
- retry:
-       if (!devlist_ready) {
-               char *dev = "/dev";
-               struct stat stb;
-               while(devlist) {
-                       struct devmap *d = devlist;
-                       devlist = d->next;
-                       free(d->name);
-                       free(d);
-               }
-               if (lstat(dev, &stb)==0 &&
-                   S_ISLNK(stb.st_mode))
-                       dev = "/dev/.";
-               nftw(dev, add_dev, 10, FTW_PHYS);
-               devlist_ready=1;
-               did_check = 1;
-       }
-
-       for (p=devlist; p; p=p->next)
-               if (p->major == major &&
-                   p->minor == minor) {
-                       if (is_standard(p->name, NULL)) {
-                               if (std == NULL ||
-                                   strlen(p->name) < strlen(std))
-                                       std = p->name;
-                       } else {
-                               if (nonstd == NULL ||
-                                   strlen(p->name) < strlen(nonstd))
-                                       nonstd = p->name;
-                       }
-               }
-       if (!std && !nonstd && !did_check) {
-               devlist_ready = 0;
-               goto retry;
-       }
-       if (create && !std && !nonstd) {
-               static char buf[30];
-               snprintf(buf, sizeof(buf), "%d:%d", major, minor);
-               nonstd = buf;
-       }
-
-       return nonstd ? nonstd : std;
-}
-
 unsigned long calc_csum(void *super, int bytes)
 {
        unsigned long long newcsum = 0;
@@ -569,7 +649,7 @@ char *human_size(long long bytes)
         * We allow upto 2048Megabytes before converting to
         * gigabytes, as that shows more precision and isn't
         * too large a number.
-        * Terrabytes are not yet handled.
+        * Terabytes are not yet handled.
         */
 
        if (bytes < 5000*1024)
@@ -610,10 +690,35 @@ char *human_size_brief(long long bytes)
                        );
        return buf;
 }
+
+void print_r10_layout(int layout)
+{
+       int near = layout & 255;
+       int far = (layout >> 8) & 255;
+       int offset = (layout&0x10000);
+       char *sep = "";
+
+       if (near != 1) {
+               printf("%s near=%d", sep, near);
+               sep = ",";
+       }
+       if (far != 1)
+               printf("%s %s=%d", sep, offset?"offset":"far", far);
+       if (near*far == 1)
+               printf("NO REDUNDANCY");
+}
 #endif
 
 unsigned long long calc_array_size(int level, int raid_disks, int layout,
                                   int chunksize, unsigned long long devsize)
+{
+       if (level == 1)
+               return devsize;
+       devsize &= ~(unsigned long long)((chunksize>>9)-1);
+       return get_data_disks(level, layout, raid_disks) * devsize;
+}
+
+int get_data_disks(int level, int layout, int raid_disks)
 {
        int data_disks = 0;
        switch (level) {
@@ -625,41 +730,11 @@ unsigned long long calc_array_size(int level, int raid_disks, int layout,
        case 10: data_disks = raid_disks / (layout & 255) / ((layout>>8)&255);
                break;
        }
-       devsize &= ~(unsigned long long)((chunksize>>9)-1);
-       return data_disks * devsize;
-}
 
-#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
-int get_mdp_major(void)
-{
-static int mdp_major = -1;
-       FILE *fl;
-       char *w;
-       int have_block = 0;
-       int have_devices = 0;
-       int last_num = -1;
-
-       if (mdp_major != -1)
-               return mdp_major;
-       fl = fopen("/proc/devices", "r");
-       if (!fl)
-               return -1;
-       while ((w = conf_word(fl, 1))) {
-               if (have_block && strcmp(w, "devices:")==0)
-                       have_devices = 1;
-               have_block =  (strcmp(w, "Block")==0);
-               if (isdigit(w[0]))
-                       last_num = atoi(w);
-               if (have_devices && strcmp(w, "mdp")==0)
-                       mdp_major = last_num;
-               free(w);
-       }
-       fclose(fl);
-       return mdp_major;
+       return data_disks;
 }
 
-
-
+#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
 char *get_md_name(int dev)
 {
        /* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */
@@ -714,26 +789,11 @@ void put_md_name(char *name)
                unlink(name);
 }
 
-static int dev2major(int d)
-{
-       if (d >= 0)
-               return MD_MAJOR;
-       else
-               return get_mdp_major();
-}
-
-static int dev2minor(int d)
-{
-       if (d >= 0)
-               return d;
-       return (-1-d) << MdpMinorShift;
-}
-
 int find_free_devnum(int use_partitions)
 {
        int devnum;
        for (devnum = 127; devnum != 128;
-            devnum = devnum ? devnum-1 : (1<<22)-1) {
+            devnum = devnum ? devnum-1 : (1<<20)-1) {
                char *dn;
                int _devnum;
 
@@ -765,29 +825,116 @@ int dev_open(char *dev, int flags)
        int minor;
 
        if (!dev) return -1;
+       flags |= O_DIRECT;
 
        major = strtoul(dev, &e, 0);
        if (e > dev && *e == ':' && e[1] &&
            (minor = strtoul(e+1, &e, 0)) >= 0 &&
            *e == 0) {
-               snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d",
-                        (int)getpid(), major, minor);
-               if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) {
-                       fd = open(devname, flags);
-                       unlink(devname);
+               char *path = map_dev(major, minor, 0);
+               if (path)
+                       fd = open(path, flags);
+               if (fd < 0) {
+                       snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d",
+                                (int)getpid(), major, minor);
+                       if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) {
+                               fd = open(devname, flags);
+                               unlink(devname);
+                       }
+               }
+               if (fd < 0) {
+                       snprintf(devname, sizeof(devname), "/tmp/.tmp.md.%d:%d:%d",
+                                (int)getpid(), major, minor);
+                       if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) {
+                               fd = open(devname, flags);
+                               unlink(devname);
+                       }
                }
        } else
                fd = open(dev, flags);
        return fd;
 }
 
-struct superswitch *superlist[] = { &super0, &super1, &super_ddf, &super_imsm, NULL };
+int open_dev_flags(int devnum, int flags)
+{
+       char buf[20];
+
+       sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum));
+       return dev_open(buf, flags);
+}
+
+int open_dev(int devnum)
+{
+       return open_dev_flags(devnum, O_RDONLY);
+}
 
-#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
+int open_dev_excl(int devnum)
+{
+       char buf[20];
+       int i;
+       int flags = O_RDWR;
+
+       sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum));
+       for (i=0 ; i<25 ; i++) {
+               int fd = dev_open(buf, flags|O_EXCL);
+               if (fd >= 0)
+                       return fd;
+               if (errno == EACCES && flags == O_RDWR) {
+                       flags = O_RDONLY;
+                       continue;
+               }
+               if (errno != EBUSY)
+                       return fd;
+               usleep(200000);
+       }
+       return -1;
+}
 
-struct supertype supertype_container_member;
+int same_dev(char *one, char *two)
+{
+       struct stat st1, st2;
+       if (stat(one, &st1) != 0)
+               return 0;
+       if (stat(two, &st2) != 0)
+               return 0;
+       if ((st1.st_mode & S_IFMT) != S_IFBLK)
+               return 0;
+       if ((st2.st_mode & S_IFMT) != S_IFBLK)
+               return 0;
+       return st1.st_rdev == st2.st_rdev;
+}
 
-struct supertype *super_by_fd(int fd)
+void wait_for(char *dev, int fd)
+{
+       int i;
+       struct stat stb_want;
+
+       if (fstat(fd, &stb_want) != 0 ||
+           (stb_want.st_mode & S_IFMT) != S_IFBLK)
+               return;
+
+       for (i=0 ; i<25 ; i++) {
+               struct stat stb;
+               if (stat(dev, &stb) == 0 &&
+                   (stb.st_mode & S_IFMT) == S_IFBLK &&
+                   (stb.st_rdev == stb_want.st_rdev))
+                       return;
+               usleep(200000);
+       }
+       if (i == 25)
+               dprintf("%s: timeout waiting for %s\n", __func__, dev);
+}
+
+struct superswitch *superlist[] =
+{
+       &super0, &super1,
+       &super_ddf, &super_imsm,
+       &mbr, &gpt,
+       NULL };
+
+#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
+
+struct supertype *super_by_fd(int fd, char **subarrayp)
 {
        mdu_array_info_t array;
        int vers;
@@ -797,6 +944,8 @@ struct supertype *super_by_fd(int fd)
        char *verstr;
        char version[20];
        int i;
+       char *subarray = NULL;
+       int container = NoMdDev;
 
        sra = sysfs_read(fd, 0, GET_VERSION);
 
@@ -816,68 +965,101 @@ struct supertype *super_by_fd(int fd)
                sprintf(version, "%d.%d", vers, minor);
                verstr = version;
        }
-       if (minor == -2 && verstr[0] == '/')
-               st = &supertype_container_member;
-       else
-               for (i = 0; st == NULL && superlist[i] ; i++)
-                       st = superlist[i]->match_metadata_desc(verstr);
+       if (minor == -2 && is_subarray(verstr)) {
+               char *dev = verstr+1;
+
+               subarray = strchr(dev, '/');
+               if (subarray) {
+                       *subarray++ = '\0';
+                       subarray = xstrdup(subarray);
+               }
+               container = devname2devnum(dev);
+               if (sra)
+                       sysfs_free(sra);
+               sra = sysfs_read(-1, container, GET_VERSION);
+               if (sra && sra->text_version[0])
+                       verstr = sra->text_version;
+               else
+                       verstr = "-no-metadata-";
+       }
+
+       for (i = 0; st == NULL && superlist[i] ; i++)
+               st = superlist[i]->match_metadata_desc(verstr);
 
        if (sra)
                sysfs_free(sra);
-       if (st)
+       if (st) {
                st->sb = NULL;
+               if (subarrayp)
+                       *subarrayp = subarray;
+               st->container_dev = container;
+               st->devnum = fd2devnum(fd);
+       } else
+               free(subarray);
+
        return st;
 }
 #endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
 
-
-struct supertype *dup_super(struct supertype *st)
+int dev_size_from_id(dev_t id, unsigned long long *size)
 {
-       struct supertype *stnew = NULL;
-       char *verstr = NULL;
-       char version[20];
-       int i;
-
-       if (!st)
-               return st;
+       char buf[20];
+       int fd;
 
-       if (st->ss->text_version)
-               strcpy(version, st->ss->text_version);
-       else if (st->minor_version == -1)
-               sprintf(version, "%d", st->ss->major);
-       else
-               sprintf(version, "%d.%d", st->ss->major, st->minor_version);
-       verstr = version;
+       sprintf(buf, "%d:%d", major(id), minor(id));
+       fd = dev_open(buf, O_RDONLY);
+       if (fd < 0)
+               return 0;
+       if (get_dev_size(fd, NULL, size)) {
+               close(fd);
+               return 1;
+       }
+       close(fd);
+       return 0;
+}
 
-       for (i = 0; stnew == NULL && superlist[i] ; i++)
-               stnew = superlist[i]->match_metadata_desc(verstr);
+struct supertype *dup_super(struct supertype *orig)
+{
+       struct supertype *st;
 
-       if (stnew)
-               stnew->sb = NULL;
-       return stnew;
+       if (!orig)
+               return orig;
+       st = xcalloc(1, sizeof(*st));
+       st->ss = orig->ss;
+       st->max_devs = orig->max_devs;
+       st->minor_version = orig->minor_version;
+       st->sb = NULL;
+       st->info = NULL;
+       return st;
 }
 
-struct supertype *guess_super(int fd)
+struct supertype *guess_super_type(int fd, enum guess_types guess_type)
 {
        /* try each load_super to find the best match,
         * and return the best superswitch
         */
        struct superswitch  *ss;
        struct supertype *st;
-       unsigned long besttime = 0;
+       time_t besttime = 0;
        int bestsuper = -1;
        int i;
 
-       st = malloc(sizeof(*st));
-       memset(st, 0, sizeof(*st));
+       st = xcalloc(1, sizeof(*st));
+       st->container_dev = NoMdDev;
+
        for (i=0 ; superlist[i]; i++) {
                int rv;
                ss = superlist[i];
-               st->ss = NULL;
+               if (guess_type == guess_array && ss->add_to_super == NULL)
+                       continue;
+               if (guess_type == guess_partitions && ss->add_to_super != NULL)
+                       continue;
+               memset(st, 0, sizeof(*st));
+               st->ignore_hw_compat = 1;
                rv = ss->load_super(st, fd, NULL);
                if (rv == 0) {
                        struct mdinfo info;
-                       st->ss->getinfo_super(st, &info);
+                       st->ss->getinfo_super(st, &info, NULL);
                        if (bestsuper == -1 ||
                            besttime < info.array.ctime) {
                                bestsuper = i;
@@ -888,10 +1070,12 @@ struct supertype *guess_super(int fd)
        }
        if (bestsuper != -1) {
                int rv;
-               st->ss = NULL;
+               memset(st, 0, sizeof(*st));
+               st->ignore_hw_compat = 1;
                rv = superlist[bestsuper]->load_super(st, fd, NULL);
                if (rv == 0) {
                        superlist[bestsuper]->free_super(st);
+                       st->ignore_hw_compat = 0;
                        return st;
                }
        }
@@ -918,7 +1102,7 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep)
                        ldsize <<= 9;
                } else {
                        if (dname)
-                               fprintf(stderr, Name ": Cannot get size of %s: %s\b",
+                               pr_err("Cannot get size of %s: %s\b",
                                        dname, strerror(errno));
                        return 0;
                }
@@ -927,13 +1111,168 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep)
        return 1;
 }
 
+/* Return true if this can only be a container, not a member device.
+ * i.e. is and md device and size is zero
+ */
+int must_be_container(int fd)
+{
+       unsigned long long size;
+       if (md_get_version(fd) < 0)
+               return 0;
+       if (get_dev_size(fd, NULL, &size) == 0)
+               return 1;
+       if (size == 0)
+               return 1;
+       return 0;
+}
+
+/* Sets endofpart parameter to the last block used by the last GPT partition on the device.
+ * Returns: 1 if successful
+ *         -1 for unknown partition type
+ *          0 for other errors
+ */
+static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
+{
+       struct GPT gpt;
+       unsigned char empty_gpt_entry[16]= {0};
+       struct GPT_part_entry *part;
+       char buf[512];
+       unsigned long long curr_part_end;
+       unsigned all_partitions, entry_size;
+       unsigned part_nr;
+
+       *endofpart = 0;
+
+       BUILD_BUG_ON(sizeof(gpt) != 512);
+       /* skip protective MBR */
+       lseek(fd, 512, SEEK_SET);
+       /* read GPT header */
+       if (read(fd, &gpt, 512) != 512)
+               return 0;
+
+       /* get the number of partition entries and the entry size */
+       all_partitions = __le32_to_cpu(gpt.part_cnt);
+       entry_size = __le32_to_cpu(gpt.part_size);
+
+       /* Check GPT signature*/
+       if (gpt.magic != GPT_SIGNATURE_MAGIC)
+               return -1;
+
+       /* sanity checks */
+       if (all_partitions > 1024 ||
+           entry_size > sizeof(buf))
+               return -1;
+
+       part = (struct GPT_part_entry *)buf;
+
+       for (part_nr=0; part_nr < all_partitions; part_nr++) {
+               /* read partition entry */
+               if (read(fd, buf, entry_size) != (ssize_t)entry_size)
+                       return 0;
+
+               /* is this valid partition? */
+               if (memcmp(part->type_guid, empty_gpt_entry, 16) != 0) {
+                       /* check the last lba for the current partition */
+                       curr_part_end = __le64_to_cpu(part->ending_lba);
+                       if (curr_part_end > *endofpart)
+                               *endofpart = curr_part_end;
+               }
+
+       }
+       return 1;
+}
+
+/* Sets endofpart parameter to the last block used by the last partition on the device.
+ * Returns: 1 if successful
+ *         -1 for unknown partition type
+ *          0 for other errors
+ */
+static int get_last_partition_end(int fd, unsigned long long *endofpart)
+{
+       struct MBR boot_sect;
+       struct MBR_part_record *part;
+       unsigned long long curr_part_end;
+       unsigned part_nr;
+       int retval = 0;
+
+       *endofpart = 0;
+
+       BUILD_BUG_ON(sizeof(boot_sect) != 512);
+       /* read MBR */
+       lseek(fd, 0, 0);
+       if (read(fd, &boot_sect, 512) != 512)
+               goto abort;
+
+       /* check MBP signature */
+       if (boot_sect.magic == MBR_SIGNATURE_MAGIC) {
+               retval = 1;
+               /* found the correct signature */
+               part = boot_sect.parts;
+
+               for (part_nr=0; part_nr < MBR_PARTITIONS; part_nr++) {
+                       /* check for GPT type */
+                       if (part->part_type == MBR_GPT_PARTITION_TYPE) {
+                               retval = get_gpt_last_partition_end(fd, endofpart);
+                               break;
+                       }
+                       /* check the last used lba for the current partition  */
+                       curr_part_end = __le32_to_cpu(part->first_sect_lba) +
+                               __le32_to_cpu(part->blocks_num);
+                       if (curr_part_end > *endofpart)
+                               *endofpart = curr_part_end;
+
+                       part++;
+               }
+       } else {
+               /* Unknown partition table */
+               retval = -1;
+       }
+ abort:
+       return retval;
+}
+
+int check_partitions(int fd, char *dname, unsigned long long freesize,
+                       unsigned long long size)
+{
+       /*
+        * Check where the last partition ends
+        */
+       unsigned long long endofpart;
+       int ret;
+
+       if ((ret = get_last_partition_end(fd, &endofpart)) > 0) {
+               /* There appears to be a partition table here */
+               if (freesize == 0) {
+                       /* partitions will not be visible in new device */
+                       pr_err("partition table exists on %s but will be lost or\n"
+                              "       meaningless after creating array\n",
+                              dname);
+                       return 1;
+               } else if (endofpart > freesize) {
+                       /* last partition overlaps metadata */
+                       pr_err("metadata will over-write last partition on %s.\n",
+                              dname);
+                       return 1;
+               } else if (size && endofpart > size) {
+                       /* partitions will be truncated in new device */
+                       pr_err("array size is too small to cover all partitions on %s.\n",
+                              dname);
+                       return 1;
+               }
+       }
+       return 0;
+}
+
 void get_one_disk(int mdfd, mdu_array_info_t *ainf, mdu_disk_info_t *disk)
 {
        int d;
+
        ioctl(mdfd, GET_ARRAY_INFO, ainf);
-       for (d = 0 ; d < ainf->raid_disks + ainf->nr_disks ; d++)
-               if (ioctl(mdfd, GET_DISK_INFO, disk) == 0)
+       for (d = 0 ; d < MAX_DISKS ; d++) {
+               if (ioctl(mdfd, GET_DISK_INFO, disk) == 0 &&
+                   (disk->major || disk->minor))
                        return;
+       }
 }
 
 int open_container(int fd)
@@ -970,7 +1309,7 @@ int open_container(int fd)
                        continue;
                n = read(dfd, buf, sizeof(buf));
                close(dfd);
-               if (n <= 0 || n >= sizeof(buf))
+               if (n <= 0 || (unsigned)n >= sizeof(buf))
                        continue;
                buf[n] = 0;
                if (sscanf(buf, "%d:%d", &major, &minor) != 2)
@@ -982,34 +1321,510 @@ int open_container(int fd)
                        return dfd;
                }
        }
+       closedir(dir);
        return -1;
 }
 
-char *devnum2devname(int num)
+struct superswitch *version_to_superswitch(char *vers)
+{
+       int i;
+
+       for (i = 0; superlist[i]; i++) {
+               struct superswitch *ss = superlist[i];
+
+               if (strcmp(vers, ss->name) == 0)
+                       return ss;
+       }
+
+       return NULL;
+}
+
+int is_container_member(struct mdstat_ent *mdstat, char *container)
+{
+       if (mdstat->metadata_version == NULL ||
+           strncmp(mdstat->metadata_version, "external:", 9) != 0 ||
+           !is_subarray(mdstat->metadata_version+9) ||
+           strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 ||
+           mdstat->metadata_version[10+strlen(container)] != '/')
+               return 0;
+
+       return 1;
+}
+
+int is_subarray_active(char *subarray, char *container)
 {
-       char name[100];
-       if (num > 0)
-               sprintf(name, "md%d", num);
+       struct mdstat_ent *mdstat = mdstat_read(0, 0);
+       struct mdstat_ent *ent;
+
+       for (ent = mdstat; ent; ent = ent->next)
+               if (is_container_member(ent, container))
+                       if (strcmp(to_subarray(ent, container), subarray) == 0)
+                               break;
+
+       free_mdstat(mdstat);
+
+       return ent != NULL;
+}
+
+/* open_subarray - opens a subarray in a container
+ * @dev: container device name
+ * @st: empty supertype
+ * @quiet: block reporting errors flag
+ *
+ * On success returns an fd to a container and fills in *st
+ */
+int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet)
+{
+       struct mdinfo *mdi;
+       struct mdinfo *info;
+       int fd, err = 1;
+
+       fd = open(dev, O_RDWR|O_EXCL);
+       if (fd < 0) {
+               if (!quiet)
+                       pr_err("Couldn't open %s, aborting\n",
+                               dev);
+               return -1;
+       }
+
+       st->devnum = fd2devnum(fd);
+       if (st->devnum == NoMdDev) {
+               if (!quiet)
+                       pr_err("Failed to determine device number for %s\n",
+                              dev);
+               goto close_fd;
+       }
+
+       mdi = sysfs_read(fd, st->devnum, GET_VERSION|GET_LEVEL);
+       if (!mdi) {
+               if (!quiet)
+                       pr_err("Failed to read sysfs for %s\n",
+                               dev);
+               goto close_fd;
+       }
+
+       if (mdi->array.level != UnSet) {
+               if (!quiet)
+                       pr_err("%s is not a container\n", dev);
+               goto free_sysfs;
+       }
+
+       st->ss = version_to_superswitch(mdi->text_version);
+       if (!st->ss) {
+               if (!quiet)
+                       pr_err("Operation not supported for %s metadata\n",
+                              mdi->text_version);
+               goto free_sysfs;
+       }
+
+       st->devname = devnum2devname(st->devnum);
+       if (!st->devname) {
+               if (!quiet)
+                       pr_err("Failed to allocate device name\n");
+               goto free_sysfs;
+       }
+
+       if (!st->ss->load_container) {
+               if (!quiet)
+                       pr_err("%s is not a container\n", dev);
+               goto free_name;
+       }
+
+       if (st->ss->load_container(st, fd, NULL)) {
+               if (!quiet)
+                       pr_err("Failed to load metadata for %s\n",
+                               dev);
+               goto free_name;
+       }
+
+       info = st->ss->container_content(st, subarray);
+       if (!info) {
+               if (!quiet)
+                       pr_err("Failed to find subarray-%s in %s\n",
+                               subarray, dev);
+               goto free_super;
+       }
+       free(info);
+
+       err = 0;
+
+ free_super:
+       if (err)
+               st->ss->free_super(st);
+ free_name:
+       if (err)
+               free(st->devname);
+ free_sysfs:
+       sysfs_free(mdi);
+ close_fd:
+       if (err)
+               close(fd);
+
+       if (err)
+               return -1;
        else
-               sprintf(name, "md_d%d", -1-num);
-       return strdup(name);
+               return fd;
 }
 
-int fd2devnum(int fd)
+int add_disk(int mdfd, struct supertype *st,
+            struct mdinfo *sra, struct mdinfo *info)
 {
-       struct stat stb;
-       if (fstat(fd, &stb) == 0 &&
-           (S_IFMT&stb.st_mode)==S_IFBLK) {
-               if (major(stb.st_rdev) == MD_MAJOR)
-                       return minor(stb.st_rdev);
+       /* Add a device to an array, in one of 2 ways. */
+       int rv;
+#ifndef MDASSEMBLE
+       if (st->ss->external) {
+               if (info->disk.state & (1<<MD_DISK_SYNC))
+                       info->recovery_start = MaxSector;
                else
-                       return -1- (minor(stb.st_rdev)>>6);
+                       info->recovery_start = 0;
+               rv = sysfs_add_disk(sra, info, 0);
+               if (! rv) {
+                       struct mdinfo *sd2;
+                       for (sd2 = sra->devs; sd2; sd2=sd2->next)
+                               if (sd2 == info)
+                                       break;
+                       if (sd2 == NULL) {
+                               sd2 = xmalloc(sizeof(*sd2));
+                               *sd2 = *info;
+                               sd2->next = sra->devs;
+                               sra->devs = sd2;
+                       }
+               }
+       } else
+#endif
+               rv = ioctl(mdfd, ADD_NEW_DISK, &info->disk);
+       return rv;
+}
+
+int remove_disk(int mdfd, struct supertype *st,
+               struct mdinfo *sra, struct mdinfo *info)
+{
+       int rv;
+       /* Remove the disk given by 'info' from the array */
+#ifndef MDASSEMBLE
+       if (st->ss->external)
+               rv = sysfs_set_str(sra, info, "slot", "none");
+       else
+#endif
+               rv = ioctl(mdfd, HOT_REMOVE_DISK, makedev(info->disk.major,
+                                                         info->disk.minor));
+       return rv;
+}
+
+int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
+{
+       /* Initialise kernel's knowledge of array.
+        * This varies between externally managed arrays
+        * and older kernels
+        */
+       int vers = md_get_version(mdfd);
+       int rv;
+
+#ifndef MDASSEMBLE
+       if (st->ss->external)
+               rv = sysfs_set_array(info, vers);
+       else
+#endif
+               if ((vers % 100) >= 1) { /* can use different versions */
+               mdu_array_info_t inf;
+               memset(&inf, 0, sizeof(inf));
+               inf.major_version = info->array.major_version;
+               inf.minor_version = info->array.minor_version;
+               rv = ioctl(mdfd, SET_ARRAY_INFO, &inf);
+       } else
+               rv = ioctl(mdfd, SET_ARRAY_INFO, NULL);
+       return rv;
+}
+
+unsigned long long min_recovery_start(struct mdinfo *array)
+{
+       /* find the minimum recovery_start in an array for metadata
+        * formats that only record per-array recovery progress instead
+        * of per-device
+        */
+       unsigned long long recovery_start = MaxSector;
+       struct mdinfo *d;
+
+       for (d = array->devs; d; d = d->next)
+               recovery_start = min(recovery_start, d->recovery_start);
+
+       return recovery_start;
+}
+
+int mdmon_pid(int devnum)
+{
+       char path[100];
+       char pid[10];
+       int fd;
+       int n;
+       char *devname = devnum2devname(devnum);
+
+       sprintf(path, "%s/%s.pid", MDMON_DIR, devname);
+       free(devname);
+
+       fd = open(path, O_RDONLY | O_NOATIME, 0);
+
+       if (fd < 0)
+               return -1;
+       n = read(fd, pid, 9);
+       close(fd);
+       if (n <= 0)
+               return -1;
+       return atoi(pid);
+}
+
+int mdmon_running(int devnum)
+{
+       int pid = mdmon_pid(devnum);
+       if (pid <= 0)
+               return 0;
+       if (kill(pid, 0) == 0)
+               return 1;
+       return 0;
+}
+
+int start_mdmon(int devnum)
+{
+       int i, skipped;
+       int len;
+       pid_t pid;      
+       int status;
+       char pathbuf[1024];
+       char *paths[4] = {
+               pathbuf,
+               "/sbin/mdmon",
+               "mdmon",
+               NULL
+       };
+
+       if (check_env("MDADM_NO_MDMON"))
+               return 0;
+
+       len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf)-1);
+       if (len > 0) {
+               char *sl;
+               pathbuf[len] = 0;
+               sl = strrchr(pathbuf, '/');
+               if (sl)
+                       sl++;
+               else
+                       sl = pathbuf;
+               strcpy(sl, "mdmon");
+       } else
+               pathbuf[0] = '\0';
+
+       switch(fork()) {
+       case 0:
+               /* FIXME yuk. CLOSE_EXEC?? */
+               skipped = 0;
+               for (i=3; skipped < 20; i++)
+                       if (close(i) < 0)
+                               skipped++;
+                       else
+                               skipped = 0;
+
+               for (i=0; paths[i]; i++)
+                       if (paths[i][0]) {
+                               if (__offroot) {
+                                       execl(paths[i], "mdmon", "--offroot",
+                                             devnum2devname(devnum),
+                                             NULL);
+                               } else {
+                                       execl(paths[i], "mdmon",
+                                             devnum2devname(devnum),
+                                             NULL);
+                               }
+                       }
+               exit(1);
+       case -1: pr_err("cannot run mdmon. "
+                        "Array remains readonly\n");
+               return -1;
+       default: /* parent - good */
+               pid = wait(&status);
+               if (pid < 0 || status != 0)
+                       return -1;
        }
-       return -1;
+       return 0;
 }
 
+int check_env(char *name)
+{
+       char *val = getenv(name);
+
+       if (val && atoi(val) == 1)
+               return 1;
+
+       return 0;
+}
+
+__u32 random32(void)
+{
+       __u32 rv;
+       int rfd = open("/dev/urandom", O_RDONLY);
+       if (rfd < 0 || read(rfd, &rv, 4) != 4)
+               rv = random();
+       if (rfd >= 0)
+               close(rfd);
+       return rv;
+}
+
+#ifndef MDASSEMBLE
+int flush_metadata_updates(struct supertype *st)
+{
+       int sfd;
+       if (!st->updates) {
+               st->update_tail = NULL;
+               return -1;
+       }
+
+       sfd = connect_monitor(devnum2devname(st->container_dev));
+       if (sfd < 0)
+               return -1;
+
+       while (st->updates) {
+               struct metadata_update *mu = st->updates;
+               st->updates = mu->next;
+
+               send_message(sfd, mu, 0);
+               wait_reply(sfd, 0);
+               free(mu->buf);
+               free(mu);
+       }
+       ack(sfd, 0);
+       wait_reply(sfd, 0);
+       close(sfd);
+       st->update_tail = NULL;
+       return 0;
+}
+
+void append_metadata_update(struct supertype *st, void *buf, int len)
+{
+
+       struct metadata_update *mu = xmalloc(sizeof(*mu));
+
+       mu->buf = buf;
+       mu->len = len;
+       mu->space = NULL;
+       mu->space_list = NULL;
+       mu->next = NULL;
+       *st->update_tail = mu;
+       st->update_tail = &mu->next;
+}
+#endif /* MDASSEMBLE */
+
 #ifdef __TINYC__
 /* tinyc doesn't optimize this check in ioctl.h out ... */
 unsigned int __invalid_size_argument_for_IOC = 0;
 #endif
 
+int experimental(void)
+{
+       if (check_env("MDADM_EXPERIMENTAL"))
+               return 1;
+       else {
+               pr_err("To use this feature MDADM_EXPERIMENTAL"
+                               " environment variable has to be defined.\n");
+               return 0;
+       }
+}
+
+/* Pick all spares matching given criteria from a container
+ * if min_size == 0 do not check size
+ * if domlist == NULL do not check domains
+ * if spare_group given add it to domains of each spare
+ * metadata allows to test domains using metadata of destination array */
+struct mdinfo *container_choose_spares(struct supertype *st,
+                                      unsigned long long min_size,
+                                      struct domainlist *domlist,
+                                      char *spare_group,
+                                      const char *metadata, int get_one)
+{
+       struct mdinfo *d, **dp, *disks = NULL;
+
+       /* get list of all disks in container */
+       if (st->ss->getinfo_super_disks)
+               disks = st->ss->getinfo_super_disks(st);
+
+       if (!disks)
+               return disks;
+       /* find spare devices on the list */
+       dp = &disks->devs;
+       disks->array.spare_disks = 0;
+       while (*dp) {
+               int found = 0;
+               d = *dp;
+               if (d->disk.state == 0) {
+                       /* check if size is acceptable */
+                       unsigned long long dev_size;
+                       dev_t dev = makedev(d->disk.major,d->disk.minor);
+
+                       if (!min_size ||
+                          (dev_size_from_id(dev,  &dev_size) &&
+                           dev_size >= min_size))
+                               found = 1;
+                       /* check if domain matches */
+                       if (found && domlist) {
+                               struct dev_policy *pol = devnum_policy(dev);
+                               if (spare_group)
+                                       pol_add(&pol, pol_domain,
+                                               spare_group, NULL);
+                               if (domain_test(domlist, pol, metadata) != 1)
+                                       found = 0;
+                               dev_policy_free(pol);
+                       }
+               }
+               if (found) {
+                       dp = &d->next;
+                       disks->array.spare_disks++;
+                       if (get_one) {
+                               sysfs_free(*dp);
+                               d->next = NULL;
+                       }
+               } else {
+                       *dp = d->next;
+                       d->next = NULL;
+                       sysfs_free(d);
+               }
+       }
+       return disks;
+}
+
+void *xmalloc(size_t len)
+{
+       void *rv = malloc(len);
+       char *msg;
+       if (rv)
+               return rv;
+       msg = Name ": memory allocation failure - aborting\n";
+       exit(4+!!write(2, msg, strlen(msg)));
+}
+
+void *xrealloc(void *ptr, size_t len)
+{
+       void *rv = realloc(ptr, len);
+       char *msg;
+       if (rv)
+               return rv;
+       msg = Name ": memory allocation failure - aborting\n";
+       exit(4+!!write(2, msg, strlen(msg)));
+}
+
+void *xcalloc(size_t num, size_t size)
+{
+       void *rv = calloc(num, size);
+       char *msg;
+       if (rv)
+               return rv;
+       msg = Name ": memory allocation failure - aborting\n";
+       exit(4+!!write(2, msg, strlen(msg)));
+}
+
+char *xstrdup(const char *str)
+{
+       char *rv = strdup(str);
+       char *msg;
+       if (rv)
+               return rv;
+       msg = Name ": memory allocation failure - aborting\n";
+       exit(4+!!write(2, msg, strlen(msg)));
+}