]> git.ipfire.org Git - thirdparty/mdadm.git/commitdiff
Merge branch 'master' into devel-3.0
authorNeilBrown <neilb@suse.de>
Thu, 30 Oct 2008 02:59:11 +0000 (13:59 +1100)
committerNeilBrown <neilb@suse.de>
Thu, 30 Oct 2008 02:59:11 +0000 (13:59 +1100)
Conflicts:

Incremental.c
super0.c
super1.c

1  2 
Detail.c
Incremental.c
mdopen.c
super0.c
util.c

diff --combined Detail.c
index dc11102856b4522017f2d48fdf6bfdaa6611de05,9ba8af1d4da2cae97a30f80a4a4a8b27030f4642..c97172c7fa4459c695185908374315422576e03e
+++ b/Detail.c
@@@ -30,7 -30,6 +30,7 @@@
  #include      "mdadm.h"
  #include      "md_p.h"
  #include      "md_u.h"
 +#include      <dirent.h>
  
  int Detail(char *dev, int brief, int export, int test, char *homehost)
  {
@@@ -57,8 -56,6 +57,8 @@@
        int max_disks = MD_SB_DISKS; /* just a default */
        struct mdinfo info;
        struct mdinfo *sra;
 +      char *member = NULL;
 +      char *container = NULL;
  
        int rv = test ? 4 : 1;
        int avail_disks = 0;
                stb.st_rdev = 0;
        rv = 0;
  
 -      if (st) max_disks = st->max_devs;
 +      if (st)
 +              max_disks = st->max_devs;
 +
 +      if (sra && is_subarray(sra->text_version) &&
 +              strchr(sra->text_version+1, '/')) {
 +              /* This is a subarray of some container.
 +               * We want the name of the container, and the member
 +               */
 +              char *s = strchr(sra->text_version+1, '/');
 +              int dn;
 +              *s++ = '\0';
 +              member = s;
 +              dn = devname2devnum(sra->text_version+1);
 +              container = map_dev(dev2major(dn), dev2minor(dn), 1);
 +      }
  
        /* try to load a superblock */
        for (d= 0; d<max_disks; d++) {
                        continue;
                if ((dv=map_dev(disk.major, disk.minor, 1))) {
                        if ((!st || !st->sb) &&
 -                          (disk.state & (1<<MD_DISK_ACTIVE))) {
 +                          (array.raid_disks == 0 || 
 +                           (disk.state & (1<<MD_DISK_ACTIVE)))) {
                                /* try to read the superblock from this device
                                 * to get more info
                                 */
                                if (fd2 >=0 && st &&
                                    st->ss->load_super(st, fd2, NULL) == 0) {
                                        st->ss->getinfo_super(st, &info);
 -                                      if (info.array.ctime != array.ctime ||
 -                                          info.array.level != array.level)
 +                                      if (array.raid_disks != 0 && /* container */
 +                                          (info.array.ctime != array.ctime ||
 +                                           info.array.level != array.level))
                                                st->ss->free_super(st);
                                }
                                if (fd2 >= 0) close(fd2);
        c = map_num(pers, array.level);
  
        if (export) {
 -              if (c)
 -                      printf("MD_LEVEL=%s\n", c);
 -              printf("MD_DEVICES=%d\n", array.raid_disks);
 -              if (sra && sra->array.major_version < 0)
 -                      printf("MD_METADATA=%s\n", sra->text_version);
 -              else
 -                      printf("MD_METADATA=%d.%02d\n",
 -                             array.major_version, array.minor_version);
 +              if (array.raid_disks) {
 +                      if (c)
 +                              printf("MD_LEVEL=%s\n", c);
 +                      printf("MD_DEVICES=%d\n", array.raid_disks);
 +              } else {
 +                      printf("MD_LEVEL=container\n");
 +                      printf("MD_DEVICES=%d\n", array.nr_disks);
 +              }
 +              if (container) {
 +                      printf("MD_CONTAINER=%s\n", container);
 +                      printf("MD_MEMBER=%s\n", member);
 +              } else {
 +                      if (sra && sra->array.major_version < 0)
 +                              printf("MD_METADATA=%s\n", sra->text_version);
 +                      else
-                               printf("MD_METADATA=%02d.%02d\n",
++                              printf("MD_METADATA=%d.%02d\n",
 +                                     array.major_version, array.minor_version);
 +              }
 +              
 +              if (st && st->sb) {
 +                      struct mdinfo info;
 +                      char nbuf[64];
 +                      st->ss->getinfo_super(st, &info);
 +                      fname_from_uuid(st, &info, nbuf, ':');
 +                      printf("MD_UUID=%s\n", nbuf+5);
  
 -              if (st && st->sb)
 -                      st->ss->export_detail_super(st);
 +                      if (st->ss->export_detail_super)
 +                              st->ss->export_detail_super(st);
 +              }
                goto out;
        }
  
        if (brief) {
                mdu_bitmap_file_t bmf;
 -              printf("ARRAY %s level=%s num-devices=%d", dev,
 -                     c?c:"-unknown-",
 -                     array.raid_disks );
 -              if (sra && sra->array.major_version < 0)
 -                      printf(" metadata=%s", sra->text_version);
 +              if (array.raid_disks)
 +                      printf("ARRAY %s level=%s num-devices=%d", dev,
 +                             c?c:"-unknown-",
 +                             array.raid_disks );
                else
 -                      printf(" metadata=%d.%02d",
 -                             array.major_version, array.minor_version);
 +                      printf("ARRAY %s level=container num-devices=%d",
 +                             dev, array.nr_disks);
 +
 +              if (container) {
 +                      printf(" container=%s", container);
 +                      printf(" member=%s", member);
 +              } else {
 +                      if (sra && sra->array.major_version < 0)
 +                              printf(" metadata=%s", sra->text_version);
 +                      else
-                               printf(" metadata=%02d.%02d",
++                              printf(" metadata=%d.%02d",
 +                                     array.major_version, array.minor_version);
 +              }
  
                /* Only try GET_BITMAP_FILE for 0.90.01 and later */
                if (vers >= 9001 &&
  
                printf("%s:\n", dev);
  
 +              if (container)
 +                      printf("      Container : %s, member %s\n", container, member);
 +              else {
                if (sra && sra->array.major_version < 0)
                        printf("        Version : %s\n", sra->text_version);
                else
-                       printf("        Version : %02d.%02d\n",
+                       printf("        Version : %d.%02d\n",
                               array.major_version, array.minor_version);
 +              }
  
                atime = array.ctime;
 -              printf("  Creation Time : %.24s\n", ctime(&atime));
 +              if (atime)
 +                      printf("  Creation Time : %.24s\n", ctime(&atime));
                if (array.raid_disks == 0) c = "container";
                printf("     Raid Level : %s\n", c?c:"-unknown-");
                if (larray_size)
                                printf("  Used Dev Size : %d%s\n", array.size,
                                       human_size((long long)array.size<<10));
                }
 -              printf("   Raid Devices : %d\n", array.raid_disks);
 +              if (array.raid_disks)
 +                      printf("   Raid Devices : %d\n", array.raid_disks);
                printf("  Total Devices : %d\n", array.nr_disks);
 -              printf("Preferred Minor : %d\n", array.md_minor);
 +              if (!container && 
 +                  ((sra == NULL && array.major_version == 0) ||
 +                   (sra && sra->array.major_version == 0)))
 +                      printf("Preferred Minor : %d\n", array.md_minor);
                if (sra == NULL || sra->array.major_version >= 0)
                        printf("    Persistence : Superblock is %spersistent\n",
                               array.not_persistent?"not ":"");
                } else if (array.state & (1<<MD_SB_BITMAP_PRESENT))
                        printf("  Intent Bitmap : Internal\n\n");
                atime = array.utime;
 -              printf("    Update Time : %.24s\n", ctime(&atime));
 -              printf("          State : %s%s%s%s\n",
 -                     (array.state&(1<<MD_SB_CLEAN))?"clean":"active",
 -                     array.active_disks < array.raid_disks? ", degraded":"",
 -                     (!e || e->percent < 0) ? "" :
 -                      (e->resync) ? ", resyncing": ", recovering",
 -                     larray_size ? "": ", Not Started");
 -              printf(" Active Devices : %d\n", array.active_disks);
 +              if (atime)
 +                      printf("    Update Time : %.24s\n", ctime(&atime));
 +              if (array.raid_disks)
 +                      printf("          State : %s%s%s%s\n",
 +                             (array.state&(1<<MD_SB_CLEAN))?"clean":"active",
 +                             array.active_disks < array.raid_disks? ", degraded":"",
 +                             (!e || e->percent < 0) ? "" :
 +                             (e->resync) ? ", resyncing": ", recovering",
 +                             larray_size ? "": ", Not Started");
 +              if (array.raid_disks)
 +                      printf(" Active Devices : %d\n", array.active_disks);
                printf("Working Devices : %d\n", array.working_disks);
 -              printf(" Failed Devices : %d\n", array.failed_disks);
 -              printf("  Spare Devices : %d\n", array.spare_disks);
 +              if (array.raid_disks) {
 +                      printf(" Failed Devices : %d\n", array.failed_disks);
 +                      printf("  Spare Devices : %d\n", array.spare_disks);
 +              }
                printf("\n");
                if (array.level == 5) {
                        c = map_num(r5layout, array.layout);
@@@ -367,45 -306,7 +367,45 @@@ This is pretty borin
                if (st && st->sb)
                        st->ss->detail_super(st, homehost);
  
 -              printf("    Number   Major   Minor   RaidDevice State\n");
 +              if (array.raid_disks == 0 && sra && sra->array.major_version == -1
 +                  && sra->array.minor_version == -2 && sra->text_version[0] != '/') {
 +                      /* This looks like a container.  Find any active arrays
 +                       * That claim to be a member.
 +                       */
 +                      DIR *dir = opendir("/sys/block");
 +                      struct dirent *de;
 +
 +                      printf("  Member Arrays :");
 +
 +                      while (dir && (de = readdir(dir)) != NULL) {
 +                              char path[200];
 +                              char vbuf[1024];
 +                              int nlen = strlen(sra->sys_name);
 +                              int dn;
 +                              if (de->d_name[0] == '.')
 +                                      continue;
 +                              sprintf(path, "/sys/block/%s/md/metadata_version",
 +                                      de->d_name);
 +                              if (load_sys(path, vbuf) < 0)
 +                                      continue;
 +                              if (strncmp(vbuf, "external:", 9) != 0 ||
 +                                  !is_subarray(sra->sys_name+9) ||
 +                                  strncmp(vbuf+10, sra->sys_name, nlen) != 0 ||
 +                                  vbuf[10+nlen] != '/')
 +                                      continue;
 +                              dn = devname2devnum(de->d_name);
 +                              printf(" %s", map_dev(dev2major(dn),
 +                                                    dev2minor(dn), 1));
 +                      }
 +                      if (dir)
 +                              closedir(dir);
 +                      printf("\n\n");
 +              }
 +
 +              if (array.raid_disks)
 +                      printf("    Number   Major   Minor   RaidDevice State\n");
 +              else
 +                      printf("    Number   Major   Minor   RaidDevice\n");
        }
        disks = malloc(max_disks * sizeof(mdu_disk_info_t));
        for (d=0; d<max_disks; d++) {
                        else
                                printf("   %5d   %5d    %5d    %5d     ",
                                       disk.number, disk.major, disk.minor, disk.raid_disk);
 +              }
 +              if (!brief && array.raid_disks) {
 +
                        if (disk.state & (1<<MD_DISK_FAULTY)) {
                                printf(" faulty");
                                if (disk.raid_disk < array.raid_disks &&
                }
                if (!brief) printf("\n");
        }
 -      if (spares && brief) printf(" spares=%d", spares);
 +      if (spares && brief && array.raid_disks) printf(" spares=%d", spares);
        if (brief && st && st->sb)
                st->ss->brief_detail_super(st);
        st->ss->free_super(st);
diff --combined Incremental.c
index 80a0a07fc536a13b4a9fcb7b21dcee8e1fb7ef58,7148a734a883b598adb2b3fb40e67ff537e6d85b..cea693a1b507d11f2afde7dd99f47bc934cec020
@@@ -40,7 -40,7 +40,7 @@@ int Incremental(char *devname, int verb
                struct supertype *st, char *homehost, int autof)
  {
        /* Add this device to an array, creating the array if necessary
 -       * and starting the array if sensibe or - if runstop>0 - if possible.
 +       * and starting the array if sensible or - if runstop>0 - if possible.
         *
         * This has several steps:
         *
@@@ -56,7 -56,6 +56,7 @@@
         * - Choose a free, high number.
         * - Use a partitioned device unless strong suggestion not to.
         *         e.g. auto=md
 +       *   Don't choose partitioned for containers.
         * 5/ Find out if array already exists
         * 5a/ if it does not
         * - choose a name, from mdadm.conf or 'name' field in array.
@@@ -68,7 -67,6 +68,7 @@@
         * - add the device
         * 6/ Make sure /var/run/mdadm.map contains this array.
         * 7/ Is there enough devices to possibly start the array?
 +       *     For a container, this means running Incremental_container.
         * 7a/ if not, finish with success.
         * 7b/ if yes,
         * - read all metadata and arrange devices like -A does
@@@ -76,7 -74,7 +76,7 @@@
         *   start the array (auto-readonly).
         */
        struct stat stb;
 -      struct mdinfo info, info2;
 +      struct mdinfo info;
        struct mddev_ident_s *array_list, *match;
        char chosen_name[1024];
        int rv;
        int dfd, mdfd;
        char *avail;
        int active_disks;
 +      int uuid_for_name = 0;
 +      char *name_to_use;
 +      char nbuf[64];
 +
        struct createinfo *ci = conf_get_create_info();
 -      char *name;
  
-       if (autof == 0)
-               autof = ci->autof;
  
 -      /* 1/ Check if devices is permitted by mdadm.conf */
 +      /* 1/ Check if device is permitted by mdadm.conf */
  
        if (!conf_test_dev(devname)) {
                if (verbose >= 0)
                close(dfd);
                return 1;
        }
 -      st->ss->getinfo_super(st, &info);
        close (dfd);
  
 +      if (st->ss->container_content && st->loaded_container) {
 +              /* This is a pre-built container array, so we do something
 +               * rather different.
 +               */
 +              return Incremental_container(st, devname, verbose, runstop,
 +                                           autof);
 +      }
 +
 +      memset(&info, 0, sizeof(info));
 +      st->ss->getinfo_super(st, &info);
        /* 3/ Check if there is a match in mdadm.conf */
  
        array_list = conf_get_ident(NULL);
                match = array_list;
        }
  
-       /* 3a/ if not, check for homehost match.  If no match, reject. */
+       /* 3a/ if not, check for homehost match.  If no match, continue
+        * but don't trust the 'name' in the array. Thus a 'random' minor
+        * number will be assigned, and the device name will be based
+        * on that. */
 -      name = info.name;
        if (!match) {
                if (homehost == NULL ||
 -                  st->ss->match_home(st, homehost) == 0) {
 -                      if (verbose >= 0)
 -                              fprintf(stderr, Name
 -            ": not found in mdadm.conf and not identified by homehost.\n");
 -                      name = NULL;
 -              }
 +                     st->ss->match_home(st, homehost) != 1)
 +                      uuid_for_name = 1;
        }
        /* 4/ Determine device number. */
-       /* - If in mdadm.conf with std name, use that */
-       /* - UUID in /var/run/mdadm.map  use that */
+       /* - If in mdadm.conf with std name, get number from name. */
+       /* - UUID in /var/run/mdadm.map  get number from mapping */
        /* - If name is suggestive, use that. unless in use with */
        /*           different uuid. */
        /* - Choose a free, high number. */
        /* - Use a partitioned device unless strong suggestion not to. */
        /*         e.g. auto=md */
 +      mp = map_by_uuid(&map, info.uuid);
 +
 +      if (uuid_for_name && ! mp) {
 +              name_to_use = fname_from_uuid(st, &info, nbuf, '-');
 +              if (verbose >= 0)
 +                      fprintf(stderr, Name
 +              ": not found in mdadm.conf and not identified by homehost"
 +                              " - using uuid based name\n");
 +      } else
 +              name_to_use = info.name;
  
-       if (match && is_standard(match->devname, &devnum))
-               /* We have devnum now */;
-       else if (mp != NULL)
+       /* There are three possible sources for 'autof':  command line,
+        * ARRAY line in mdadm.conf, or CREATE line in mdadm.conf.
+        * They have precedence in that order.
+        */
+       if (autof == 0 && match)
+               autof = match->autof;
+       if (autof == 0)
+               autof = ci->autof;
+       if (match && (rv = is_standard(match->devname, &devnum))) {
+               devnum = (rv > 0) ? (-1-devnum) : devnum;
 -      } else if ((mp = map_by_uuid(&map, info.uuid)) != NULL)
++      } else if (mp != NULL)
                devnum = mp->devnum;
        else {
                /* Have to guess a bit. */
                int use_partitions = 1;
                char *np, *ep;
 +              char *nm, nbuf[1024];
 +              struct stat stb2;
 +
                if ((autof&7) == 3 || (autof&7) == 5)
                        use_partitions = 0;
 -              np = name ? strchr(name, ':') : ":NONAME";
 +              if (st->ss->external)
 +                      use_partitions = 0;
 +              np = strchr(name_to_use, ':');
                if (np)
                        np++;
                else
 -                      np = name;
 +                      np = name_to_use;
                devnum = strtoul(np, &ep, 10);
                if (ep > np && *ep == 0) {
                        /* This is a number.  Let check that it is unused. */
                } else
                        devnum = -1;
  
 +              if (match)
 +                      nm = match->devname;
 +              else {
 +                      sprintf(nbuf, "/dev/md/%s", np);
 +                      nm = nbuf;
 +              }
 +              if (stat(nm, &stb2) == 0 &&
 +                  S_ISBLK(stb2.st_mode) &&
 +                  major(stb2.st_rdev) == (use_partitions ?
 +                                          get_mdp_major() : MD_MAJOR)) {
 +                      if (use_partitions)
 +                              devnum = minor(stb2.st_rdev) >> MdpMinorShift;
 +                      else
 +                              devnum = minor(stb2.st_rdev);
 +                      if (mddev_busy(use_partitions ? (-1-devnum) : devnum))
 +                              devnum = -1;
 +              }
 +
                if (devnum < 0) {
                        /* Haven't found anything yet, choose something free */
                        devnum = find_free_devnum(use_partitions);
                } else
                        devnum = use_partitions ? (-1-devnum) : devnum;
        }
 -      mdfd = open_mddev_devnum(match ? match->devname : NULL,
 +
 +      mdfd = open_mddev_devnum(match ? match->devname : mp ? mp->path : NULL,
                                 devnum,
 -                               name,
 +                               name_to_use,
                                 chosen_name, autof >> 3);
        if (mdfd < 0) {
                fprintf(stderr, Name ": failed to open %s: %s.\n",
                        chosen_name, strerror(errno));
                return 2;
        }
 +      sysfs_init(&info, mdfd, 0);
 +
        /* 5/ Find out if array already exists */
        if (! mddev_busy(devnum)) {
        /* 5a/ if it does not */
        /* - choose a name, from mdadm.conf or 'name' field in array. */
        /* - create the array */
        /* - add the device */
 -              mdu_array_info_t ainf;
 -              mdu_disk_info_t disk;
 -              char md[20];
                struct mdinfo *sra;
 +              struct mdinfo dinfo;
  
 -              memset(&ainf, 0, sizeof(ainf));
 -              ainf.major_version = st->ss->major;
 -              ainf.minor_version = st->minor_version;
 -              if (ioctl(mdfd, SET_ARRAY_INFO, &ainf) != 0) {
 -                      fprintf(stderr, Name
 -                              ": SET_ARRAY_INFO failed for %s: %s\b",
 +              if (set_array_info(mdfd, st, &info) != 0) {
 +                      fprintf(stderr, Name ": failed to set array info for %s: %s\n",
                                chosen_name, strerror(errno));
                        close(mdfd);
                        return 2;
                }
 -              sprintf(md, "%d.%d\n", st->ss->major, st->minor_version);
 -              sra = sysfs_read(mdfd, devnum, GET_VERSION);
 -              sysfs_set_str(sra, NULL, "metadata_version", md);
 -              memset(&disk, 0, sizeof(disk));
 -              disk.major = major(stb.st_rdev);
 -              disk.minor = minor(stb.st_rdev);
 -              sysfs_free(sra);
 -              if (ioctl(mdfd, ADD_NEW_DISK, &disk) != 0) {
 +
 +              dinfo = info;
 +              dinfo.disk.major = major(stb.st_rdev);
 +              dinfo.disk.minor = minor(stb.st_rdev);
 +              if (add_disk(mdfd, st, &info, &dinfo) != 0) {
                        fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
                                devname, chosen_name, strerror(errno));
                        ioctl(mdfd, STOP_ARRAY, 0);
                        sysfs_free(sra);
                        return 2;
                }
 +              info.array.working_disks = 1;
 +              sysfs_free(sra);
        } else {
        /* 5b/ if it does */
        /* - check one drive in array to make sure metadata is a reasonably */
        /* - add the device */
                char dn[20];
                int dfd2;
 -              mdu_disk_info_t disk;
                int err;
                struct mdinfo *sra;
                struct supertype *st2;
 -              sra = sysfs_read(mdfd, devnum, (GET_VERSION | GET_DEVS |
 -                                              GET_STATE));
 +              struct mdinfo info2, *d;
 +              sra = sysfs_read(mdfd, devnum, (GET_DEVS | GET_STATE));
  
 -              if (sra->array.major_version != st->ss->major ||
 -                  sra->array.minor_version != st->minor_version) {
 -                      if (verbose >= 0)
 -                              fprintf(stderr, Name
 -            ": %s has different metadata to chosen array %s %d.%d %d.%d.\n",
 -                                      devname, chosen_name,
 -                                      sra->array.major_version,
 -                                      sra->array.minor_version,
 -                                      st->ss->major, st->minor_version);
 -                      close(mdfd);
 -                      return 1;
 -              }
                sprintf(dn, "%d:%d", sra->devs->disk.major,
                        sra->devs->disk.minor);
                dfd2 = dev_open(dn, O_RDONLY);
                st2 = dup_super(st);
 -              if (st2->ss->load_super(st2, dfd2, NULL)) {
 +              if (st2->ss->load_super(st2, dfd2, NULL) ||
 +                  st->ss->compare_super(st, st2) != 0) {
                        fprintf(stderr, Name
 -                              ": Strange error loading metadata for %s.\n",
 -                              chosen_name);
 +                              ": metadata mismatch between %s and "
 +                              "chosen array %s\n",
 +                              devname, chosen_name);
                        close(mdfd);
                        close(dfd2);
                        return 2;
                }
                close(dfd2);
 +              memset(&info2, 0, sizeof(info2));
                st2->ss->getinfo_super(st2, &info2);
                st2->ss->free_super(st2);
                if (info.array.level != info2.array.level ||
                        close(mdfd);
                        return 2;
                }
 -              memset(&disk, 0, sizeof(disk));
 -              disk.major = major(stb.st_rdev);
 -              disk.minor = minor(stb.st_rdev);
 -              err = ioctl(mdfd, ADD_NEW_DISK, &disk);
 +              info2.disk.major = major(stb.st_rdev);
 +              info2.disk.minor = minor(stb.st_rdev);
 +              /* add disk needs to know about containers */
 +              if (st->ss->external)
 +                      sra->array.level = LEVEL_CONTAINER;
 +              err = add_disk(mdfd, st2, sra, &info2);
                if (err < 0 && errno == EBUSY) {
                        /* could be another device present with the same
                         * disk.number. Find and reject any such
                         */
                        find_reject(mdfd, st, sra, info.disk.number,
                                    info.events, verbose, chosen_name);
 -                      err = ioctl(mdfd, ADD_NEW_DISK, &disk);
 +                      err = add_disk(mdfd, st2, sra, &info2);
                }
                if (err < 0) {
                        fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
                        close(mdfd);
                        return 2;
                }
 +              info.array.working_disks = 0;
 +              for (d = sra->devs; d; d=d->next)
 +                      info.array.working_disks ++;
 +                      
        }
        /* 6/ Make sure /var/run/mdadm.map contains this array. */
        map_update(&map, devnum,
 -                 info.array.major_version,
 -                 info.array.minor_version,
 +                 info.text_version,
                   info.uuid, chosen_name);
  
        /* 7/ Is there enough devices to possibly start the array? */
        /* 7a/ if not, finish with success. */
 +      if (info.array.level == LEVEL_CONTAINER) {
 +              /* Try to assemble within the container */
 +              close(mdfd);
 +              if (verbose >= 0)
 +                      fprintf(stderr, Name
 +                              ": container %s now has %d devices\n",
 +                              chosen_name, info.array.working_disks);
 +              return Incremental(chosen_name, verbose, runstop,
 +                                 NULL, homehost, autof);
 +      }
        avail = NULL;
        active_disks = count_active(st, mdfd, &avail, &info);
        if (enough(info.array.level, info.array.raid_disks,
                        close(bmfd);
                }
                sra = sysfs_read(mdfd, devnum, 0);
-               if (sra == NULL || active_disks >= info.array.working_disks)
+               if ((sra == NULL || active_disks >= info.array.working_disks)
 -                  && name != NULL)
++                  && uuid_for_name == 0)
                        rv = ioctl(mdfd, RUN_ARRAY, NULL);
                else
                        rv = sysfs_set_str(sra, NULL,
@@@ -587,12 -555,18 +598,18 @@@ static int count_active(struct supertyp
                if (ok != 0)
                        continue;
                st->ss->getinfo_super(st, &info);
+               if (!avail) {
+                       avail = malloc(info.array.raid_disks);
+                       if (!avail) {
+                               fprintf(stderr, Name ": out of memory.\n");
+                               exit(1);
+                       }
+                       memset(avail, 0, info.array.raid_disks);
+                       *availp = avail;
+               }
                if (info.disk.state & (1<<MD_DISK_SYNC))
                {
-                       if (avail == NULL) {
-                               avail = malloc(info.array.raid_disks);
-                               memset(avail, 0, info.array.raid_disks);
-                       }
                        if (cnt == 0) {
                                cnt++;
                                max_events = info.events;
@@@ -664,8 -638,8 +681,8 @@@ void RebuildMap(void
                                path = map_dev(MD_MAJOR, md->devnum, 0);
                        else
                                path = map_dev(mdp, (-1-md->devnum)<< 6, 0);
 -                      map_add(&map, md->devnum, st->ss->major,
 -                              st->minor_version,
 +                      map_add(&map, md->devnum,
 +                              info.text_version,
                                info.uuid, path ? : "/unknown");
                        st->ss->free_super(st);
                        break;
@@@ -752,218 -726,3 +769,218 @@@ int IncrementalScan(int verbose
        }
        return rv;
  }
 +
 +static char *container2devname(char *devname)
 +{
 +      int fd = open(devname, O_RDONLY);
 +      char *mdname = NULL;
 +
 +      if (fd >= 0) {
 +              mdname = devnum2devname(fd2devnum(fd));
 +              close(fd);
 +      }
 +
 +      return mdname;
 +}
 +
 +int Incremental_container(struct supertype *st, char *devname, int verbose,
 +                        int runstop, int autof)
 +{
 +      /* Collect the contents of this container and for each
 +       * array, choose a device name and assemble the array.
 +       */
 +
 +      struct mdinfo *list = st->ss->container_content(st);
 +      struct mdinfo *ra;
 +      char *mdname = container2devname(devname);
 +
 +      if (!mdname) {
 +              fprintf(stderr, Name": failed to determine device name\n");
 +              return 2;
 +      }
 +
 +      for (ra = list ; ra ; ra = ra->next) {
 +              struct mdinfo *dev, *sra;
 +              int devnum = -1;
 +              int mdfd;
 +              char chosen_name[1024];
 +              int usepart = 1;
 +              char *n;
 +              int working = 0, preexist = 0;
 +              struct map_ent *mp, *map = NULL;
 +              char nbuf[64];
 +              char *name_to_use;
 +              struct mddev_ident_s *match = NULL;
 +
 +              if ((autof&7) == 3 || (autof&7) == 5)
 +                      usepart = 0;
 +
 +              mp = map_by_uuid(&map, ra->uuid);
 +
 +              name_to_use = ra->name;
 +              if (! name_to_use ||
 +                  ! *name_to_use ||
 +                  (*devname != '/' || strncmp("UUID-", strrchr(devname,'/')+1,5) == 0)
 +                      )
 +                      name_to_use = fname_from_uuid(st, ra, nbuf, '-');
 +                  
 +              if (!mp) {
 +
 +                      /* Check in mdadm.conf for devices == devname and
 +                       * member == ra->text_version after second slash.
 +                       */
 +                      char *sub = strchr(ra->text_version+1, '/');
 +                      struct mddev_ident_s *array_list;
 +                      if (sub) {
 +                              sub++;
 +                              array_list = conf_get_ident(NULL);
 +                      } else
 +                              array_list = NULL;
 +                      for(; array_list ; array_list = array_list->next) {
 +                              int fd;
 +                              char *dn;
 +                              if (array_list->member == NULL ||
 +                                  array_list->container == NULL)
 +                                      continue;
 +                              if (strcmp(array_list->member, sub) != 0)
 +                                      continue;
 +                              if (array_list->uuid_set &&
 +                                  !same_uuid(ra->uuid, array_list->uuid, st->ss->swapuuid))
 +                                      continue;
 +                              fd = open(array_list->container, O_RDONLY);
 +                              if (fd < 0)
 +                                      continue;
 +                              dn = devnum2devname(fd2devnum(fd));
 +                              close(fd);
 +                              if (strncmp(dn, ra->text_version+1,
 +                                          strlen(dn)) != 0 ||
 +                                  ra->text_version[strlen(dn)+1] != '/') {
 +                                      free(dn);
 +                                      continue;
 +                              }
 +                              free(dn);
 +                              /* we have a match */
 +                              match = array_list;
 +                              if (verbose>0)
 +                                      fprintf(stderr, Name ": match found for member %s\n",
 +                                              array_list->member);
 +                              break;
 +                      }
 +              }
 +
 +              if (match && is_standard(match->devname, &devnum))
 +                      /* we have devnum now */;
 +              else if (mp)
 +                      devnum = mp->devnum;
 +              else if (is_standard(name_to_use, &devnum))
 +                      /* have devnum */;
 +              else {
 +                      n = name_to_use;
 +                      if (*n == 'd')
 +                              n++;
 +                      if (*n && devnum < 0) {
 +                              devnum = strtoul(n, &n, 10);
 +                              if (devnum >= 0 && (*n == 0 || *n == ' ')) {
 +                                      /* Use this devnum */
 +                                      usepart = (name_to_use[0] == 'd');
 +                                      if (mddev_busy(usepart ? (-1-devnum) : devnum))
 +                                              devnum = -1;
 +                              } else
 +                                      devnum = -1;
 +                      }
 +
 +                      if (devnum < 0) {
 +                              char *nm = name_to_use;
 +                              char nbuf[1024];
 +                              struct stat stb;
 +                              if (strchr(nm, ':'))
 +                                      nm = strchr(nm, ':')+1;
 +                              sprintf(nbuf, "/dev/md/%s", nm);
 +
 +                              if (stat(nbuf, &stb) == 0 &&
 +                                  S_ISBLK(stb.st_mode) &&
 +                                  major(stb.st_rdev) == (usepart ?
 +                                                         get_mdp_major() : MD_MAJOR)){
 +                                      if (usepart)
 +                                              devnum = minor(stb.st_rdev)
 +                                                      >> MdpMinorShift;
 +                                      else
 +                                              devnum = minor(stb.st_rdev);
 +                                      if (mddev_busy(usepart ? (-1-devnum) : devnum))
 +                                              devnum = -1;
 +                              }
 +                      }
 +
 +                      if (devnum >= 0)
 +                              devnum = usepart ? (-1-devnum) : devnum;
 +                      else
 +                              devnum = find_free_devnum(usepart);
 +              }
 +              mdfd = open_mddev_devnum(mp ? mp->path : match ? match->devname : NULL,
 +                                       devnum, name_to_use,
 +                                       chosen_name, autof>>3);
 +
 +              if (mdfd < 0) {
 +                      fprintf(stderr, Name ": failed to open %s: %s.\n",
 +                              chosen_name, strerror(errno));
 +                      return 2;
 +              }
 +
 +
 +              sysfs_init(ra, mdfd, 0);
 +
 +              sra = sysfs_read(mdfd, 0, GET_VERSION);
 +              if (sra == NULL || strcmp(sra->text_version, ra->text_version) != 0)
 +                      if (sysfs_set_array(ra, md_get_version(mdfd)) != 0)
 +                              return 1;
 +              if (sra)
 +                      sysfs_free(sra);
 +
 +              for (dev = ra->devs; dev; dev = dev->next)
 +                      if (sysfs_add_disk(ra, dev) == 0)
 +                              working++;
 +                      else if (errno == EEXIST)
 +                              preexist++;
 +              if (working == 0)
 +                      /* Nothing new, don't try to start */ ;
 +              else if (runstop > 0 ||
 +                       (working + preexist) >= ra->array.working_disks) {
 +                      switch(ra->array.level) {
 +                      case LEVEL_LINEAR:
 +                      case LEVEL_MULTIPATH:
 +                      case 0:
 +                              sysfs_set_str(ra, NULL, "array_state",
 +                                            "active");
 +                              break;
 +                      default:
 +                              sysfs_set_str(ra, NULL, "array_state",
 +                                            "readonly");
 +                              /* start mdmon if needed. */
 +                              if (!mdmon_running(st->container_dev))
 +                                      start_mdmon(st->container_dev);
 +                              ping_monitor(devnum2devname(st->container_dev));
 +                              break;
 +                      }
 +                      sysfs_set_safemode(ra, ra->safe_mode_delay);
 +                      if (verbose >= 0) {
 +                              fprintf(stderr, Name
 +                                      ": Started %s with %d devices",
 +                                      chosen_name, working + preexist);
 +                              if (preexist)
 +                                      fprintf(stderr, " (%d new)", working);
 +                              fprintf(stderr, "\n");
 +                      }
 +                      /* FIXME should have an O_EXCL and wait for read-auto */
 +              } else
 +                      if (verbose >= 0)
 +                              fprintf(stderr, Name
 +                                      ": %s assembled with %d devices but "
 +                                      "not started\n",
 +                                      chosen_name, working);
 +              close(mdfd);
 +              map_update(&map, devnum,
 +                         ra->text_version,
 +                         ra->uuid, chosen_name);
 +      }
 +      return 0;
 +}
diff --combined mdopen.c
index 0b9498cc9054dbfc85b8c06399ce0307996c08c5,9250e4bacf3062b308157c83480de7561603ed1c..eee1eea15f049c734e47cb09c0fd524da5f94373
+++ b/mdopen.c
@@@ -282,7 -282,7 +282,7 @@@ int open_mddev_devnum(char *devname, in
  
        if (devname)
                strcpy(chosen_name, devname);
-       else if (name && name[0] && strchr(name,'/') == NULL) {
 -      else if (name && *name && strchr(name,'/') == NULL) {
++      else if (name && *name && name[0] && strchr(name,'/') == NULL) {
                char *n = strchr(name, ':');
                if (n) n++; else n = name;
                if (isdigit(*n) && devnum < 0)
diff --combined super0.c
index 924d75d3e7692def4996b4de811191331d1bae06,90fdf23d075f46b1eceb90dfc0d6b6f6861efaba..92255c23cbec0f9f7e1776377bce2db03021a0d2
+++ b/super0.c
@@@ -53,7 -53,7 +53,7 @@@ static unsigned long calc_sb0_csum(mdp_
  }
  
  
 -void super0_swap_endian(struct mdp_superblock_s *sb)
 +static void super0_swap_endian(struct mdp_superblock_s *sb)
  {
        /* as super0 superblocks are host-endian, it is sometimes
         * useful to be able to swap the endianness
@@@ -93,7 -93,7 +93,7 @@@ static void examine_super0(struct super
        char *c;
  
        printf("          Magic : %08x\n", sb->md_magic);
-       printf("        Version : %02d.%02d.%02d\n", sb->major_version, sb->minor_version,
+       printf("        Version : %d.%02d.%02d\n", sb->major_version, sb->minor_version,
               sb->patch_version);
        if (sb->minor_version >= 90) {
                printf("           UUID : %08x:%08x:%08x:%08x", sb->set_uuid0, sb->set_uuid1,
@@@ -300,6 -300,18 +300,6 @@@ static void brief_detail_super0(struct 
        else
                printf("%08x", sb->set_uuid0);
  }
 -
 -static void export_detail_super0(struct supertype *st)
 -{
 -      mdp_super_t *sb = st->sb;
 -      printf("MD_UUID=");
 -      if (sb->minor_version >= 90)
 -              printf("%08x:%08x:%08x:%08x", sb->set_uuid0, sb->set_uuid1,
 -                     sb->set_uuid2, sb->set_uuid3);
 -      else
 -              printf("%08x", sb->set_uuid0);
 -      printf("\n");
 -}
  #endif
  
  static int match_home0(struct supertype *st, char *homehost)
@@@ -356,9 -368,6 +356,9 @@@ static void getinfo_super0(struct super
        info->events = md_event(sb);
        info->data_offset = 0;
  
 +      sprintf(info->text_version, "0.%d", sb->minor_version);
 +      info->safe_mode_delay = 200;
 +
        uuid_from_super0(st, info->uuid);
  
        if (sb->minor_version > 90 && (sb->reshape_position+1) != 0) {
@@@ -542,17 -551,12 +542,17 @@@ static int init_super0(struct supertyp
                       unsigned long long size, char *ignored_name, char *homehost,
                       int *uuid)
  {
 -      mdp_super_t *sb = malloc(MD_SB_BYTES + sizeof(bitmap_super_t));
 +      mdp_super_t *sb;
        int spares;
 +
 +      if (posix_memalign((void**)&sb, 512, MD_SB_BYTES + sizeof(bitmap_super_t)) != 0) {
 +              fprintf(stderr, Name ": %s could not allocate superblock\n", __func__);
 +              return 0;
 +      }
        memset(sb, 0, MD_SB_BYTES + sizeof(bitmap_super_t));
  
        st->sb = sb;
 -      if (info->major_version == -1) {
 +      if (info == NULL) {
                /* zeroing the superblock */
                return 0;
        }
        return 1;
  }
  
 +struct devinfo {
 +      int fd;
 +      char *devname;
 +      mdu_disk_info_t disk;
 +      struct devinfo *next;
 +};
 +
 +#ifndef MDASSEMBLE
  /* Add a device to the superblock being created */
 -static void add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo)
 +static void add_to_super0(struct supertype *st, mdu_disk_info_t *dinfo,
 +                        int fd, char *devname)
  {
        mdp_super_t *sb = st->sb;
        mdp_disk_t *dk = &sb->disks[dinfo->number];
 +      struct devinfo *di, **dip;
  
        dk->number = dinfo->number;
        dk->major = dinfo->major;
        dk->minor = dinfo->minor;
        dk->raid_disk = dinfo->raid_disk;
        dk->state = dinfo->state;
 +
 +      sb->this_disk = sb->disks[dinfo->number];
 +      sb->sb_csum = calc_sb0_csum(sb);
 +
 +      dip = (struct devinfo **)&st->info;
 +      while (*dip)
 +              dip = &(*dip)->next;
 +      di = malloc(sizeof(struct devinfo));
 +      di->fd = fd;
 +      di->devname = devname;
 +      di->disk = *dinfo;
 +      di->next = NULL;
 +      *dip = di;
  }
 +#endif
  
  static int store_super0(struct supertype *st, int fd)
  {
        if (super->state & (1<<MD_SB_BITMAP_PRESENT)) {
                struct bitmap_super_s * bm = (struct bitmap_super_s*)(super+1);
                if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC)
 -                      if (write(fd, bm, sizeof(*bm)) != sizeof(*bm))
 +                      if (write(fd, bm, ROUND_UP(sizeof(*bm),512)) != 
 +                          ROUND_UP(sizeof(*bm),512))
                            return 5;
        }
  
        return 0;
  }
  
 -static int write_init_super0(struct supertype *st,
 -                           mdu_disk_info_t *dinfo, char *devname)
 +#ifndef MDASSEMBLE
 +static int write_init_super0(struct supertype *st)
  {
        mdp_super_t *sb = st->sb;
 -      int fd = open(devname, O_RDWR|O_EXCL);
 -      int rv;
 +      int rv = 0;
 +      struct devinfo *di;
  
 -      if (fd < 0) {
 -              fprintf(stderr, Name ": Failed to open %s to write superblock\n", devname);
 -              return -1;
 -      }
 +      for (di = st->info ; di && ! rv ; di = di->next) {
  
 -      sb->disks[dinfo->number].state &= ~(1<<MD_DISK_FAULTY);
 +              if (di->disk.state == 1)
 +                      continue;
 +              if (di->fd == -1)
 +                      continue;
 +              Kill(di->devname, 0, 1, 1);
 +              Kill(di->devname, 0, 1, 1);
  
 -      sb->this_disk = sb->disks[dinfo->number];
 -      sb->sb_csum = calc_sb0_csum(sb);
 -      rv = store_super0(st, fd);
 +              sb->disks[di->disk.number].state &= ~(1<<MD_DISK_FAULTY);
  
 -      if (rv == 0 && (sb->state & (1<<MD_SB_BITMAP_PRESENT)))
 -              rv = st->ss->write_bitmap(st, fd);
 +              sb->this_disk = sb->disks[di->disk.number];
 +              sb->sb_csum = calc_sb0_csum(sb);
 +              rv = store_super0(st, di->fd);
  
 -      close(fd);
 -      if (rv)
 -              fprintf(stderr, Name ": failed to write superblock to %s\n", devname);
 +              if (rv == 0 && (sb->state & (1<<MD_SB_BITMAP_PRESENT)))
 +                      rv = st->ss->write_bitmap(st, di->fd);
 +
 +              if (rv)
 +                      fprintf(stderr,
 +                              Name ": failed to write superblock to %s\n",
 +                              di->devname);
 +              close(di->fd);
 +              di->fd = -1;
 +      }
        return rv;
  }
 +#endif
  
  static int compare_super0(struct supertype *st, struct supertype *tst)
  {
        if (second->md_magic != MD_SB_MAGIC)
                return 1;
        if (!first) {
 -              first = malloc(MD_SB_BYTES + sizeof(struct bitmap_super_s));
 +              if (posix_memalign((void**)&first, 512, 
 +                             MD_SB_BYTES + sizeof(struct bitmap_super_s)) != 0) {
 +                      fprintf(stderr, Name
 +                              ": %s could not allocate superblock\n", __func__);
 +                      return 1;
 +              }
                memcpy(first, second, MD_SB_BYTES + sizeof(struct bitmap_super_s));
                st->sb = first;
                return 0;
@@@ -788,9 -753,6 +788,9 @@@ static int load_super0(struct supertyp
  
        free_super0(st);
  
 +      if (st->subarray[0])
 +              return 1;
 +
        if (!get_dev_size(fd, devname, &dsize))
                return 1;
  
                return 1;
        }
  
 -      super = malloc(MD_SB_BYTES + sizeof(bitmap_super_t));
 +      if (posix_memalign((void**)&super, 512,
 +                         MD_SB_BYTES + sizeof(bitmap_super_t)+512) != 0) {
 +              fprintf(stderr, Name
 +                      ": %s could not allocate superblock\n", __func__);
 +              return 1;
 +      }
  
        if (read(fd, super, sizeof(*super)) != MD_SB_BYTES) {
                if (devname)
                st->ss = &super0;
                st->minor_version = super->minor_version;
                st->max_devs = MD_SB_DISKS;
 +              st->info = NULL;
        }
  
        /* Now check on the bitmap superblock */
         * valid.  If it doesn't clear the bit.  An --assemble --force
         * should get that written out.
         */
 -      if (read(fd, super+1, sizeof(struct bitmap_super_s))
 -          != sizeof(struct bitmap_super_s))
 +      if (read(fd, super+1, ROUND_UP(sizeof(struct bitmap_super_s),512))
 +          != ROUND_UP(sizeof(struct bitmap_super_s),512))
                goto no_bitmap;
  
        uuid_from_super0(st, uuid);
@@@ -886,14 -842,12 +886,14 @@@ static struct supertype *match_metadata
        struct supertype *st = malloc(sizeof(*st));
        if (!st) return st;
  
 +      memset(st, 0, sizeof(*st));
        st->ss = &super0;
 +      st->info = NULL;
        st->minor_version = 90;
        st->max_devs = MD_SB_DISKS;
        st->sb = NULL;
 -      /* Eliminate pointless leading 0 from some versions of mdadm -D */
 -      if (strncmp(arg, "00.", 3) == 0)
 +      /* we sometimes get 00.90 */
 +      while (arg[0] == '0' && arg[1] == '0')
                arg++;
        if (strcmp(arg, "0") == 0 ||
            strcmp(arg, "0.90") == 0 ||
@@@ -967,7 -921,7 +967,7 @@@ static int add_internal_bitmap0(struct 
  }
  
  
 -void locate_bitmap0(struct supertype *st, int fd)
 +static void locate_bitmap0(struct supertype *st, int fd)
  {
        unsigned long long dsize;
        unsigned long long offset;
        lseek64(fd, offset, 0);
  }
  
 -int write_bitmap0(struct supertype *st, int fd)
 +static int write_bitmap0(struct supertype *st, int fd)
  {
        unsigned long long dsize;
        unsigned long long offset;
        int rv = 0;
  
        int towrite, n;
 -      char buf[4096];
 +      char abuf[4096+512];
 +      char *buf = (char*)(((long)(abuf+512))&~511UL);
  
        if (!get_dev_size(fd, NULL, &dsize))
                return 1;
        if (lseek64(fd, offset + 4096, 0)< 0LL)
                return 3;
  
 -
 -      if (write(fd, ((char*)sb)+MD_SB_BYTES, sizeof(bitmap_super_t)) !=
 -          sizeof(bitmap_super_t))
 -              return -2;
 -      towrite = 64*1024 - MD_SB_BYTES - sizeof(bitmap_super_t);
 -      memset(buf, 0xff, sizeof(buf));
 +      memset(buf, 0xff, 4096);
 +      memcpy(buf,  ((char*)sb)+MD_SB_BYTES, sizeof(bitmap_super_t));
 +      towrite = 64*1024;
        while (towrite > 0) {
                n = towrite;
 -              if (n > sizeof(buf))
 -                      n = sizeof(buf);
 +              if (n > 4096)
 +                      n = 4096;
                n = write(fd, buf, n);
                if (n > 0)
                        towrite -= n;
                else
                        break;
 +              memset(buf, 0xff, 4096);
        }
        fsync(fd);
        if (towrite)
@@@ -1041,48 -996,6 +1041,48 @@@ static void free_super0(struct supertyp
        st->sb = NULL;
  }
  
 +#ifndef MDASSEMBLE
 +static int validate_geometry0(struct supertype *st, int level,
 +                            int layout, int raiddisks,
 +                            int chunk, unsigned long long size,
 +                            char *subdev, unsigned long long *freesize,
 +                            int verbose)
 +{
 +      unsigned long long ldsize;
 +      int fd;
 +
 +      if (level == LEVEL_CONTAINER)
 +              return 0;
 +      if (raiddisks > MD_SB_DISKS)
 +              return 0;
 +      if (size > (0x7fffffffULL<<10))
 +              return 0;
 +      if (!subdev)
 +              return 1;
 +
 +      fd = open(subdev, O_RDONLY|O_EXCL, 0);
 +      if (fd < 0) {
 +              if (verbose)
 +                      fprintf(stderr, Name ": super0.90 cannot open %s: %s\n",
 +                              subdev, strerror(errno));
 +              return 0;
 +      }
 +
 +      if (!get_dev_size(fd, subdev, &ldsize)) {
 +              close(fd);
 +              return 0;
 +      }
 +      close(fd);
 +
 +      if (ldsize < MD_RESERVED_SECTORS * 512)
 +              return 0;
 +      if (size > (0x7fffffffULL<<10))
 +              return 0;
 +      *freesize = MD_NEW_SIZE_SECTORS(ldsize >> 9);
 +      return 1;
 +}
 +#endif /* MDASSEMBLE */
 +
  struct superswitch super0 = {
  #ifndef MDASSEMBLE
        .examine_super = examine_super0,
        .export_examine_super = export_examine_super0,
        .detail_super = detail_super0,
        .brief_detail_super = brief_detail_super0,
 -      .export_detail_super = export_detail_super0,
 +      .write_init_super = write_init_super0,
 +      .validate_geometry = validate_geometry0,
 +      .add_to_super = add_to_super0,
  #endif
        .match_home = match_home0,
        .uuid_from_super = uuid_from_super0,
        .getinfo_super = getinfo_super0,
        .update_super = update_super0,
        .init_super = init_super0,
 -      .add_to_super = add_to_super0,
        .store_super = store_super0,
 -      .write_init_super = write_init_super0,
        .compare_super = compare_super0,
        .load_super = load_super0,
        .match_metadata_desc = match_metadata_desc0,
        .locate_bitmap = locate_bitmap0,
        .write_bitmap = write_bitmap0,
        .free_super = free_super0,
 -      .major = 0,
 -      .swapuuid = 0,
  };
diff --combined util.c
index dee04971b97ed9faea2b2ebdb1d8b32d153e29db,a50036c116a76c86c2205ac3c148de345d4e9b93..ab2d7e9f06e3cb1f91a4a9c30529df872ee95670
--- 1/util.c
--- 2/util.c
+++ b/util.c
  
  #include      "mdadm.h"
  #include      "md_p.h"
 +#include      <sys/socket.h>
  #include      <sys/utsname.h>
 +#include      <sys/wait.h>
 +#include      <sys/un.h>
  #include      <ctype.h>
 +#include      <dirent.h>
 +#include      <signal.h>
  
  /*
   * following taken from linux/blkpg.h because they aren't
@@@ -222,13 -217,8 +222,13 @@@ int enough(int level, int raid_disks, i
        }
  }
  
 +const int uuid_match_any[4] = { ~0, ~0, ~0, ~0 };
  int same_uuid(int a[4], int b[4], int swapuuid)
  {
 +      if (memcmp(a, uuid_match_any, sizeof(int[4])) == 0 ||
 +          memcmp(b, uuid_match_any, sizeof(int[4])) == 0)
 +              return 1;
 +
        if (swapuuid) {
                /* parse uuids are hostendian.
                 * uuid's from some superblocks are big-ending
@@@ -274,27 -264,6 +274,27 @@@ void copy_uuid(void *a, int b[4], int s
                memcpy(a, b, 16);
  }
  
 +char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char sep)
 +{
 +      int i, j;
 +      int id;
 +      char uuid[16];
 +      char *c = buf;
 +      strcpy(c, "UUID-");
 +      c += strlen(c);
 +      copy_uuid(uuid, info->uuid, st->ss->swapuuid);
 +      for (i = 0; i < 4; i++) {
 +              id = uuid[i];
 +              if (i)
 +                      *c++ = sep;
 +              for (j = 3; j >= 0; j--) {
 +                      sprintf(c,"%02x", (unsigned char) uuid[j+4*i]);
 +                      c+= 2;
 +              }
 +      }
 +      return buf;
 +}
 +
  #ifndef MDASSEMBLE
  int check_ext2(int fd, char *name)
  {
@@@ -420,9 -389,6 +420,9 @@@ int is_standard(char *dev, int *nump
        /* tests if dev is a "standard" md dev name.
         * i.e if the last component is "/dNN" or "/mdNN",
         * where NN is a string of digits
 +       * Returns 1 if a partitionable standard,
 +       *   -1 if non-partitonable,
 +       *   0 if not a standard name.
         */
        char *d = strrchr(dev, '/');
        int type=0;
        if (strncmp(d, "/d",2)==0)
                d += 2, type=1; /* /dev/md/dN{pM} */
        else if (strncmp(d, "/md_d", 5)==0)
-               d += 5, type=1; /* /dev/md_dNpM */
+               d += 5, type=1; /* /dev/md_dN{pM} */
        else if (strncmp(d, "/md", 3)==0)
                d += 3, type=-1; /* /dev/mdN */
        else if (d-dev > 3 && strncmp(d-2, "md/", 3)==0)
@@@ -659,23 -625,7 +659,23 @@@ void print_r10_layout(int layout
  }
  #endif
  
 -#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
 +unsigned long long calc_array_size(int level, int raid_disks, int layout,
 +                                 int chunksize, unsigned long long devsize)
 +{
 +      int data_disks = 0;
 +      switch (level) {
 +      case 0: data_disks = raid_disks; break;
 +      case 1: data_disks = 1; break;
 +      case 4:
 +      case 5: data_disks = raid_disks - 1; break;
 +      case 6: data_disks = raid_disks - 2; break;
 +      case 10: data_disks = raid_disks / (layout & 255) / ((layout>>8)&255);
 +              break;
 +      }
 +      devsize &= ~(unsigned long long)((chunksize>>9)-1);
 +      return data_disks * devsize;
 +}
 +
  int get_mdp_major(void)
  {
  static int mdp_major = -1;
        return mdp_major;
  }
  
 -
 -
 +#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
  char *get_md_name(int dev)
  {
        /* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */
@@@ -759,6 -710,21 +759,6 @@@ void put_md_name(char *name
                unlink(name);
  }
  
 -static int dev2major(int d)
 -{
 -      if (d >= 0)
 -              return MD_MAJOR;
 -      else
 -              return get_mdp_major();
 -}
 -
 -static int dev2minor(int d)
 -{
 -      if (d >= 0)
 -              return d;
 -      return (-1-d) << MdpMinorShift;
 -}
 -
  int find_free_devnum(int use_partitions)
  {
        int devnum;
@@@ -800,38 -766,19 +800,38 @@@ int dev_open(char *dev, int flags
        if (e > dev && *e == ':' && e[1] &&
            (minor = strtoul(e+1, &e, 0)) >= 0 &&
            *e == 0) {
 -              snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d", major, minor);
 +              snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d",
 +                       (int)getpid(), major, minor);
                if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) {
 -                      fd = open(devname, flags);
 +                      fd = open(devname, flags|O_DIRECT);
                        unlink(devname);
                }
        } else
 -              fd = open(dev, flags);
 +              fd = open(dev, flags|O_DIRECT);
        return fd;
  }
  
 -struct superswitch *superlist[] = { &super0, &super1, NULL };
 +int open_dev_excl(int devnum)
 +{
 +      char buf[20];
 +      int i;
 +
 +      sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum));
 +      for (i=0 ; i<25 ; i++) {
 +              int fd = dev_open(buf, O_RDWR|O_EXCL);
 +              if (fd >= 0)
 +                      return fd;
 +              if (errno != EBUSY)
 +                      return fd;
 +              usleep(200000);
 +      }
 +      return -1;
 +}
 +
 +struct superswitch *superlist[] = { &super0, &super1, &super_ddf, &super_imsm, NULL };
  
  #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
 +
  struct supertype *super_by_fd(int fd)
  {
        mdu_array_info_t array;
        char *verstr;
        char version[20];
        int i;
 +      char *subarray = NULL;
  
        sra = sysfs_read(fd, 0, GET_VERSION);
  
                sprintf(version, "%d.%d", vers, minor);
                verstr = version;
        }
 +      if (minor == -2 && is_subarray(verstr)) {
 +              char *dev = verstr+1;
 +              subarray = strchr(dev, '/');
 +              int devnum;
 +              if (subarray)
 +                      *subarray++ = '\0';
 +              devnum = devname2devnum(dev);
 +              subarray = strdup(subarray);
 +              if (sra)
 +                      sysfs_free(sra);
 +              sra = sysfs_read(-1, devnum, GET_VERSION);
 +              verstr = sra->text_version ? : "-no-metadata-";
 +      }
 +
        for (i = 0; st == NULL && superlist[i] ; i++)
                st = superlist[i]->match_metadata_desc(verstr);
  
        if (sra)
                sysfs_free(sra);
 -      if (st)
 +      if (st) {
                st->sb = NULL;
 +              if (subarray) {
 +                      strncpy(st->subarray, subarray, 32);
 +                      st->subarray[31] = 0;
 +                      free(subarray);
 +              } else
 +                      st->subarray[0] = 0;
 +      }
        return st;
  }
  #endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
  
  
 -struct supertype *dup_super(struct supertype *st)
 +struct supertype *dup_super(struct supertype *orig)
  {
 -      struct supertype *stnew = NULL;
 -      char *verstr = NULL;
 -      char version[20];
 -      int i;
 +      struct supertype *st;
  
 +      if (!orig)
 +              return orig;
 +      st = malloc(sizeof(*st));
        if (!st)
                return st;
 -
 -      if (st->minor_version == -1)
 -              sprintf(version, "%d", st->ss->major);
 -      else
 -              sprintf(version, "%d.%d", st->ss->major, st->minor_version);
 -      verstr = version;
 -
 -      for (i = 0; stnew == NULL && superlist[i] ; i++)
 -              stnew = superlist[i]->match_metadata_desc(verstr);
 -
 -      if (stnew)
 -              stnew->sb = NULL;
 -      return stnew;
 +      memset(st, 0, sizeof(*st));
 +      st->ss = orig->ss;
 +      st->max_devs = orig->max_devs;
 +      st->minor_version = orig->minor_version;
 +      strcpy(st->subarray, orig->subarray);
 +      st->sb = NULL;
 +      st->info = NULL;
 +      return st;
  }
  
  struct supertype *guess_super(int fd)
        int i;
  
        st = malloc(sizeof(*st));
 -      memset(st, 0, sizeof(*st));
        for (i=0 ; superlist[i]; i++) {
                int rv;
                ss = superlist[i];
 -              st->ss = NULL;
 +              memset(st, 0, sizeof(*st));
                rv = ss->load_super(st, fd, NULL);
                if (rv == 0) {
                        struct mdinfo info;
        }
        if (bestsuper != -1) {
                int rv;
 -              st->ss = NULL;
 +              memset(st, 0, sizeof(*st));
                rv = superlist[bestsuper]->load_super(st, fd, NULL);
                if (rv == 0) {
                        superlist[bestsuper]->free_super(st);
@@@ -992,303 -923,6 +992,303 @@@ void get_one_disk(int mdfd, mdu_array_i
                        return;
  }
  
 +int open_container(int fd)
 +{
 +      /* 'fd' is a block device.  Find out if it is in use
 +       * by a container, and return an open fd on that container.
 +       */
 +      char path[256];
 +      char *e;
 +      DIR *dir;
 +      struct dirent *de;
 +      int dfd, n;
 +      char buf[200];
 +      int major, minor;
 +      struct stat st;
 +
 +      if (fstat(fd, &st) != 0)
 +              return -1;
 +      sprintf(path, "/sys/dev/block/%d:%d/holders",
 +              (int)major(st.st_rdev), (int)minor(st.st_rdev));
 +      e = path + strlen(path);
 +
 +      dir = opendir(path);
 +      if (!dir)
 +              return -1;
 +      while ((de = readdir(dir))) {
 +              if (de->d_ino == 0)
 +                      continue;
 +              if (de->d_name[0] == '.')
 +                      continue;
 +              sprintf(e, "/%s/dev", de->d_name);
 +              dfd = open(path, O_RDONLY);
 +              if (dfd < 0)
 +                      continue;
 +              n = read(dfd, buf, sizeof(buf));
 +              close(dfd);
 +              if (n <= 0 || n >= sizeof(buf))
 +                      continue;
 +              buf[n] = 0;
 +              if (sscanf(buf, "%d:%d", &major, &minor) != 2)
 +                      continue;
 +              sprintf(buf, "%d:%d", major, minor);
 +              dfd = dev_open(buf, O_RDONLY);
 +              if (dfd >= 0) {
 +                      closedir(dir);
 +                      return dfd;
 +              }
 +      }
 +      closedir(dir);
 +      return -1;
 +}
 +
 +int add_disk(int mdfd, struct supertype *st,
 +           struct mdinfo *sra, struct mdinfo *info)
 +{
 +      /* Add a device to an array, in one of 2 ways. */
 +      int rv;
 +#ifndef MDASSEMBLE
 +      if (st->ss->external) {
 +              rv = sysfs_add_disk(sra, info);
 +              if (! rv) {
 +                      struct mdinfo *sd2;
 +                      for (sd2 = sra->devs; sd2; sd2=sd2->next)
 +                              if (sd2 == info)
 +                                      break;
 +                      if (sd2 == NULL) {
 +                              sd2 = malloc(sizeof(*sd2));
 +                              *sd2 = *info;
 +                              sd2->next = sra->devs;
 +                              sra->devs = sd2;
 +                      }
 +              }
 +      } else
 +#endif
 +              rv = ioctl(mdfd, ADD_NEW_DISK, &info->disk);
 +      return rv;
 +}
 +
 +int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
 +{
 +      /* Initialise kernel's knowledge of array.
 +       * This varies between externally managed arrays
 +       * and older kernels
 +       */
 +      int vers = md_get_version(mdfd);
 +      int rv;
 +
 +#ifndef MDASSEMBLE
 +      if (st->ss->external)
 +              rv = sysfs_set_array(info, vers);
 +      else
 +#endif
 +              if ((vers % 100) >= 1) { /* can use different versions */
 +              mdu_array_info_t inf;
 +              memset(&inf, 0, sizeof(inf));
 +              inf.major_version = info->array.major_version;
 +              inf.minor_version = info->array.minor_version;
 +              rv = ioctl(mdfd, SET_ARRAY_INFO, &inf);
 +      } else
 +              rv = ioctl(mdfd, SET_ARRAY_INFO, NULL);
 +      return rv;
 +}
 +
 +char *devnum2devname(int num)
 +{
 +      char name[100];
 +      if (num > 0)
 +              sprintf(name, "md%d", num);
 +      else
 +              sprintf(name, "md_d%d", -1-num);
 +      return strdup(name);
 +}
 +
 +int devname2devnum(char *name)
 +{
 +      char *ep;
 +      int num;
 +      if (strncmp(name, "md_d", 4)==0)
 +              num = -1-strtoul(name+4, &ep, 10);
 +      else
 +              num = strtoul(name+2, &ep, 10);
 +      return num;
 +}
 +
 +int stat2devnum(struct stat *st)
 +{
 +      if ((S_IFMT & st->st_mode) == S_IFBLK) {
 +              if (major(st->st_rdev) == MD_MAJOR)
 +                      return minor(st->st_rdev);
 +              else
 +                      return -1- (minor(st->st_rdev)>>6);
 +      }
 +      return -1;
 +
 +}
 +
 +int fd2devnum(int fd)
 +{
 +      struct stat stb;
 +      if (fstat(fd, &stb) == 0)
 +              return stat2devnum(&stb);
 +      return -1;
 +}
 +
 +int mdmon_running(int devnum)
 +{
 +      char path[100];
 +      char pid[10];
 +      int fd;
 +      int n;
 +      sprintf(path, "/var/run/mdadm/%s.pid", devnum2devname(devnum));
 +      fd = open(path, O_RDONLY, 0);
 +
 +      if (fd < 0)
 +              return 0;
 +      n = read(fd, pid, 9);
 +      close(fd);
 +      if (n <= 0)
 +              return 0;
 +      if (kill(atoi(pid), 0) == 0)
 +              return 1;
 +      return 0;
 +}
 +
 +int signal_mdmon(int devnum)
 +{
 +      char path[100];
 +      char pid[10];
 +      int fd;
 +      int n;
 +      sprintf(path, "/var/run/mdadm/%s.pid", devnum2devname(devnum));
 +      fd = open(path, O_RDONLY, 0);
 +
 +      if (fd < 0)
 +              return 0;
 +      n = read(fd, pid, 9);
 +      close(fd);
 +      if (n <= 0)
 +              return 0;
 +      if (kill(atoi(pid), SIGUSR1) == 0)
 +              return 1;
 +      return 0;
 +}
 +
 +int start_mdmon(int devnum)
 +{
 +      int i;
 +      int len;
 +      pid_t pid;      
 +      int status;
 +      char pathbuf[1024];
 +      char *paths[4] = {
 +              pathbuf,
 +              "/sbin/mdmon",
 +              "mdmon",
 +              NULL
 +      };
 +
 +      if (env_no_mdmon())
 +              return 0;
 +
 +      len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf));
 +      if (len > 0) {
 +              char *sl;
 +              pathbuf[len] = 0;
 +              sl = strrchr(pathbuf, '/');
 +              if (sl)
 +                      sl++;
 +              else
 +                      sl = pathbuf;
 +              strcpy(sl, "mdmon");
 +      } else
 +              pathbuf[0] = '\0';
 +
 +      switch(fork()) {
 +      case 0:
 +              /* FIXME yuk. CLOSE_EXEC?? */
 +              for (i=3; i < 100; i++)
 +                      close(i);
 +              for (i=0; paths[i]; i++)
 +                      if (paths[i][0])
 +                              execl(paths[i], "mdmon",
 +                                    map_dev(dev2major(devnum),
 +                                            dev2minor(devnum),
 +                                            1), NULL);
 +              exit(1);
 +      case -1: fprintf(stderr, Name ": cannot run mdmon. "
 +                       "Array remains readonly\n");
 +              return -1;
 +      default: /* parent - good */
 +              pid = wait(&status);
 +              if (pid < 0 || status != 0)
 +                      return -1;
 +      }
 +      return 0;
 +}
 +
 +int env_no_mdmon(void)
 +{
 +      char *val = getenv("MDADM_NO_MDMON");
 +
 +      if (val && atoi(val) == 1)
 +              return 1;
 +
 +      return 0;
 +}
 +
 +#ifndef MDASSEMBLE
 +int flush_metadata_updates(struct supertype *st)
 +{
 +      int sfd;
 +      if (!st->updates) {
 +              st->update_tail = NULL;
 +              return -1;
 +      }
 +
 +      sfd = connect_monitor(devnum2devname(st->container_dev));
 +      if (sfd < 0)
 +              return -1;
 +
 +      while (st->updates) {
 +              struct metadata_update *mu = st->updates;
 +              st->updates = mu->next;
 +
 +              send_message(sfd, mu, 0);
 +              wait_reply(sfd, 0);
 +              free(mu->buf);
 +              free(mu);
 +      }
 +      ack(sfd, 0);
 +      wait_reply(sfd, 0);
 +      close(sfd);
 +      st->update_tail = NULL;
 +      return 0;
 +}
 +
 +void append_metadata_update(struct supertype *st, void *buf, int len)
 +{
 +
 +      struct metadata_update *mu = malloc(sizeof(*mu));
 +
 +      mu->buf = buf;
 +      mu->len = len;
 +      mu->space = NULL;
 +      mu->next = NULL;
 +      *st->update_tail = mu;
 +      st->update_tail = &mu->next;
 +}
 +
 +struct superswitch *find_metadata_methods(char *vers)
 +{
 +      if (strcmp(vers, "ddf") == 0)
 +              return &super_ddf;
 +      if (strcmp(vers, "imsm") == 0)
 +              return &super_imsm;
 +      return NULL;
 +}
 +#endif /* MDASSEMBLE */
 +
  #ifdef __TINYC__
  /* tinyc doesn't optimize this check in ioctl.h out ... */
  unsigned int __invalid_size_argument_for_IOC = 0;