]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Assemble.c
imsm: Allow create RAID volume with link to container
[thirdparty/mdadm.git] / Assemble.c
index d6beb23da9c5218adbea829522a6eea6776cae1b..e83d550b2c7bd80c6f4b9726e17ba16307c7882a 100644 (file)
@@ -149,6 +149,7 @@ static int select_devices(struct mddev_dev *devlist,
        struct mdinfo *content = NULL;
        int report_mismatch = ((inargv && c->verbose >= 0) || c->verbose > 0);
        struct domainlist *domains = NULL;
+       dev_t rdev;
 
        tmpdev = devlist; num_devs = 0;
        while (tmpdev) {
@@ -169,7 +170,6 @@ static int select_devices(struct mddev_dev *devlist,
             tmpdev = tmpdev ? tmpdev->next : NULL) {
                char *devname = tmpdev->devname;
                int dfd;
-               struct stat stb;
                struct supertype *tst;
                struct dev_policy *pol = NULL;
                int found_container = 0;
@@ -204,14 +204,7 @@ static int select_devices(struct mddev_dev *devlist,
                                pr_err("cannot open device %s: %s\n",
                                       devname, strerror(errno));
                        tmpdev->used = 2;
-               } else if (fstat(dfd, &stb)< 0) {
-                       /* Impossible! */
-                       pr_err("fstat failed for %s: %s\n",
-                              devname, strerror(errno));
-                       tmpdev->used = 2;
-               } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
-                       pr_err("%s is not a block device.\n",
-                              devname);
+               } else if (!fstat_is_blkdev(dfd, devname, &rdev)) {
                        tmpdev->used = 2;
                } else if (must_be_container(dfd)) {
                        if (st) {
@@ -227,14 +220,15 @@ static int select_devices(struct mddev_dev *devlist,
                                        pr_err("not a recognisable container: %s\n",
                                               devname);
                                tmpdev->used = 2;
-                       } else if (!tst->ss->load_container
-                                  || tst->ss->load_container(tst, dfd, NULL)) {
+                       } else if (!tst->ss->load_container ||
+                                  tst->ss->load_container(tst, dfd, NULL)) {
                                if (report_mismatch)
                                        pr_err("no correct container type: %s\n",
                                               devname);
                                tmpdev->used = 2;
                        } else if (auto_assem &&
-                                  !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)),
+                                  !conf_test_metadata(tst->ss->name,
+                                                      (pol = devid_policy(rdev)),
                                                       tst->ss->match_home(tst, c->homehost) == 1)) {
                                if (report_mismatch)
                                        pr_err("%s has metadata type %s for which auto-assembly is disabled\n",
@@ -261,7 +255,8 @@ static int select_devices(struct mddev_dev *devlist,
                                               tst->ss->name, devname);
                                tmpdev->used = 2;
                        } else if (auto_assem && st == NULL &&
-                                  !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)),
+                                  !conf_test_metadata(tst->ss->name,
+                                                      (pol = devid_policy(rdev)),
                                                       tst->ss->match_home(tst, c->homehost) == 1)) {
                                if (report_mismatch)
                                        pr_err("%s has metadata type %s for which auto-assembly is disabled\n",
@@ -484,7 +479,7 @@ static int select_devices(struct mddev_dev *devlist,
                /* Collect domain information from members only */
                if (tmpdev && tmpdev->used == 1) {
                        if (!pol)
-                               pol = devid_policy(stb.st_rdev);
+                               pol = devid_policy(rdev);
                        domain_merge(&domains, pol, tst?tst->ss->name:NULL);
                }
                dev_policy_free(pol);
@@ -517,15 +512,12 @@ static int select_devices(struct mddev_dev *devlist,
 
        /* Now reject spares that don't match domains of identified members */
        for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
-               struct stat stb;
                if (tmpdev->used != 3)
                        continue;
-               if (stat(tmpdev->devname, &stb)< 0) {
-                       pr_err("fstat failed for %s: %s\n",
-                              tmpdev->devname, strerror(errno));
+               if (!stat_is_blkdev(tmpdev->devname, &rdev)) {
                        tmpdev->used = 2;
                } else {
-                       struct dev_policy *pol = devid_policy(stb.st_rdev);
+                       struct dev_policy *pol = devid_policy(rdev);
                        int dt = domain_test(domains, pol, NULL);
                        if (inargv && dt != 0)
                                /* take this spare as domains match
@@ -761,12 +753,12 @@ static int load_devices(struct devs *devices, char *devmap,
                                bestcnt = newbestcnt;
                        }
                        if (best[i] >=0 &&
-                           devices[best[i]].i.events
-                           == devices[devcnt].i.events
-                           && (devices[best[i]].i.disk.minor
-                               != devices[devcnt].i.disk.minor)
-                           && st->ss == &super0
-                           && content->array.level != LEVEL_MULTIPATH) {
+                           devices[best[i]].i.events ==
+                           devices[devcnt].i.events &&
+                           (devices[best[i]].i.disk.minor !=
+                            devices[devcnt].i.disk.minor) &&
+                           st->ss == &super0 &&
+                           content->array.level != LEVEL_MULTIPATH) {
                                /* two different devices with identical superblock.
                                 * Could be a mis-detection caused by overlapping
                                 * partitions.  fail-safe.
@@ -784,9 +776,8 @@ static int load_devices(struct devs *devices, char *devmap,
                                *stp = st;
                                return -1;
                        }
-                       if (best[i] == -1
-                           || (devices[best[i]].i.events
-                               < devices[devcnt].i.events))
+                       if (best[i] == -1 || (devices[best[i]].i.events
+                                             < devices[devcnt].i.events))
                                best[i] = devcnt;
                }
                devcnt++;
@@ -809,14 +800,11 @@ static int force_array(struct mdinfo *content,
        int okcnt = 0;
        while (!enough(content->array.level, content->array.raid_disks,
                       content->array.layout, 1,
-                      avail)
-              ||
+                      avail) ||
               (content->reshape_active && content->delta_disks > 0 &&
                !enough(content->array.level, (content->array.raid_disks
                                               - content->delta_disks),
-                       content->new_layout, 1,
-                       avail)
-                      )) {
+                       content->new_layout, 1, avail))) {
                /* Choose the newest best drive which is
                 * not up-to-date, update the superblock
                 * and add it.
@@ -858,7 +846,19 @@ static int force_array(struct mdinfo *content,
                                        /* OK */;
                                else
                                        continue;
-                       }
+                       } else if (devices[j].i.reshape_active !=
+                           content->reshape_active ||
+                           (devices[j].i.reshape_active &&
+                           devices[j].i.reshape_progress !=
+                           content->reshape_progress))
+                               /* Here, it may be a source of data. If two
+                                * devices claim different progresses, it
+                                * means that reshape boundaries differ for
+                                * their own devices. Kernel will only treat
+                                * the first one as reshape progress and
+                                * go on. It may cause disaster, so avoid it.
+                                */
+                               continue;
                        if (chosen_drive < 0 ||
                             devices[j].i.events
                            > devices[chosen_drive].i.events)
@@ -920,7 +920,13 @@ static int force_array(struct mdinfo *content,
                        if (j >= 0 &&
                            !devices[j].uptodate &&
                            devices[j].i.recovery_start == MaxSector &&
-                           devices[j].i.events == current_events) {
+                           devices[j].i.events == current_events &&
+                           ((!devices[j].i.reshape_active &&
+                           !content->reshape_active) ||
+                           (devices[j].i.reshape_active ==
+                           content->reshape_active &&
+                           devices[j].i.reshape_progress ==
+                           content->reshape_progress))) {
                                chosen_drive = j;
                                goto add_another;
                        }
@@ -1307,13 +1313,13 @@ int Assemble(struct supertype *st, char *mddev,
         *    START_ARRAY
         *
         */
-       int rv;
-       int mdfd;
+       int rv = -1;
+       int mdfd = -1;
        int clean;
        int auto_assem = (mddev == NULL && !ident->uuid_set &&
-                         ident->super_minor == UnSet && ident->name[0] == 0
-                         && (ident->container == NULL || ident->member == NULL));
-       struct devs *devices;
+                         ident->super_minor == UnSet && ident->name[0] == 0 &&
+                         (ident->container == NULL || ident->member == NULL));
+       struct devs *devices = NULL;
        char *devmap;
        int *best = NULL; /* indexed by raid_disk */
        int bestcnt = 0;
@@ -1338,6 +1344,9 @@ int Assemble(struct supertype *st, char *mddev,
        char chosen_name[1024];
        struct map_ent *map = NULL;
        struct map_ent *mp;
+       int locked = 0;
+       struct mdp_superblock_1 *sb;
+       bitmap_super_t *bms;
 
        /*
         * If any subdevs are listed, then any that don't
@@ -1368,6 +1377,12 @@ try_again:
         * set of devices failed.  Those are now marked as ->used==2 and
         * we ignore them and try again
         */
+       if (locked)
+               /*
+                * if come back try_again is called, then need to unlock first,
+                * and lock again since the metadate is re-read.
+                */
+               cluster_release_dlmlock();
        if (!st && ident->st)
                st = ident->st;
        if (c->verbose>0)
@@ -1385,6 +1400,14 @@ try_again:
        if (!st || !st->sb || !content)
                return 2;
 
+       sb = st->sb;
+       bms = (bitmap_super_t*)(((char*)sb) + 4096);
+       if (sb && bms->version == BITMAP_MAJOR_CLUSTERED) {
+               locked = cluster_get_dlmlock();
+               if (locked != 1)
+                       return 1;
+       }
+
        /* We have a full set of devices - we now need to find the
         * array device.
         * However there is a risk that we are racing with "mdadm -I"
@@ -1411,7 +1434,7 @@ try_again:
                        pr_err("Found some drive for an array that is already active: %s\n",
                               mp->path);
                        pr_err("giving up.\n");
-                       return 1;
+                       goto out;
                }
                for (dv = pre_exist->devs; dv; dv = dv->next) {
                        /* We want to add this device to our list,
@@ -1478,13 +1501,13 @@ try_again:
                        name = strchr(name, ':')+1;
 
                mdfd = create_mddev(mddev, name, ident->autof, trustworthy,
-                                   chosen_name);
+                                   chosen_name, 0);
        }
        if (mdfd < 0) {
                st->ss->free_super(st);
                if (auto_assem)
                        goto try_again;
-               return 1;
+               goto out;
        }
        mddev = chosen_name;
        if (pre_exist == NULL) {
@@ -1503,7 +1526,7 @@ try_again:
                        st->ss->free_super(st);
                        if (auto_assem)
                                goto try_again;
-                       return 1;
+                       goto out;
                }
                /* just incase it was started but has no content */
                ioctl(mdfd, STOP_ARRAY, NULL);
@@ -1515,6 +1538,8 @@ try_again:
                err = assemble_container_content(st, mdfd, content, c,
                                                 chosen_name, NULL);
                close(mdfd);
+               if (locked == 1)
+                       cluster_release_dlmlock();
                return err;
        }
 
@@ -1524,18 +1549,18 @@ try_again:
        devcnt = load_devices(devices, devmap, ident, &st, devlist,
                              c, content, mdfd, mddev,
                              &most_recent, &bestcnt, &best, inargv);
-       if (devcnt < 0)
-               return 1;
+       if (devcnt < 0) {
+               mdfd = -3;
+               goto out;
+       }
 
        if (devcnt == 0) {
                pr_err("no devices found for %s\n",
                       mddev);
                if (st)
                        st->ss->free_super(st);
-               close(mdfd);
-               free(devices);
                free(devmap);
-               return 1;
+               goto out;
        }
 
        if (c->update && strcmp(c->update, "byteorder")==0)
@@ -1649,32 +1674,24 @@ try_again:
                                 : (O_RDONLY|O_EXCL)))< 0) {
                        pr_err("Cannot open %s: %s\n",
                               devices[j].devname, strerror(errno));
-                       close(mdfd);
-                       free(devices);
-                       return 1;
+                       goto out;
                }
                if (st->ss->load_super(st,fd, NULL)) {
                        close(fd);
                        pr_err("RAID superblock has disappeared from %s\n",
                               devices[j].devname);
-                       close(mdfd);
-                       free(devices);
-                       return 1;
+                       goto out;
                }
                close(fd);
        }
        if (st->sb == NULL) {
                pr_err("No suitable drives found for %s\n", mddev);
-               close(mdfd);
-               free(devices);
-               return 1;
+               goto out;
        }
        st->ss->getinfo_super(st, content, NULL);
        if (sysfs_init(content, mdfd, NULL)) {
                pr_err("Unable to initialize sysfs\n");
-               close(mdfd);
-               free(devices);
-               return 1;
+               goto out;
        }
 
        /* after reload context, store journal_clean in context */
@@ -1683,6 +1700,8 @@ try_again:
                int j = best[i];
                unsigned int desired_state;
 
+               if (j < 0)
+                       continue;
                if (devices[j].i.disk.raid_disk == MD_DISK_ROLE_JOURNAL)
                        desired_state = (1<<MD_DISK_JOURNAL);
                else if (i >= content->array.raid_disks * 2)
@@ -1692,8 +1711,6 @@ try_again:
                else
                        desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
 
-               if (j<0)
-                       continue;
                if (!devices[j].uptodate)
                        continue;
 
@@ -1740,17 +1757,13 @@ try_again:
                if (fd < 0) {
                        pr_err("Could not open %s for write - cannot Assemble array.\n",
                               devices[chosen_drive].devname);
-                       close(mdfd);
-                       free(devices);
-                       return 1;
+                       goto out;
                }
                if (st->ss->store_super(st, fd)) {
                        close(fd);
                        pr_err("Could not re-write superblock on %s\n",
                               devices[chosen_drive].devname);
-                       close(mdfd);
-                       free(devices);
-                       return 1;
+                       goto out;
                }
                if (c->verbose >= 0)
                        pr_err("Marking array %s as 'clean'\n",
@@ -1808,9 +1821,7 @@ try_again:
                        pr_err("Failed to restore critical section for reshape, sorry.\n");
                        if (c->backup_file == NULL)
                                cont_err("Possibly you needed to specify the --backup-file\n");
-                       close(mdfd);
-                       free(devices);
-                       return err;
+                       goto out;
                }
        }
 
@@ -1839,6 +1850,7 @@ try_again:
                ioctl(mdfd, STOP_ARRAY, NULL);
        free(devices);
        map_unlock(&map);
+out:
        if (rv == 0) {
                wait_for(chosen_name, mdfd);
                close(mdfd);
@@ -1868,10 +1880,16 @@ try_again:
                                usecs <<= 1;
                        }
                }
-       } else
+       } else if (mdfd >= 0)
                close(mdfd);
 
        /* '2' means 'OK, but not started yet' */
+       if (locked == 1)
+               cluster_release_dlmlock();
+       if (rv == -1) {
+               free(devices);
+               return 1;
+       }
        return rv == 2 ? 0 : rv;
 }