]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Assemble.c
imsm: Allow create RAID volume with link to container
[thirdparty/mdadm.git] / Assemble.c
index 9d0a89ff0fa5416c276f52a2d68b7476c894ce09..e83d550b2c7bd80c6f4b9726e17ba16307c7882a 100644 (file)
@@ -220,8 +220,8 @@ static int select_devices(struct mddev_dev *devlist,
                                        pr_err("not a recognisable container: %s\n",
                                               devname);
                                tmpdev->used = 2;
-                       } else if (!tst->ss->load_container
-                                  || tst->ss->load_container(tst, dfd, NULL)) {
+                       } else if (!tst->ss->load_container ||
+                                  tst->ss->load_container(tst, dfd, NULL)) {
                                if (report_mismatch)
                                        pr_err("no correct container type: %s\n",
                                               devname);
@@ -512,15 +512,12 @@ static int select_devices(struct mddev_dev *devlist,
 
        /* Now reject spares that don't match domains of identified members */
        for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
-               struct stat stb;
                if (tmpdev->used != 3)
                        continue;
-               if (stat(tmpdev->devname, &stb)< 0) {
-                       pr_err("fstat failed for %s: %s\n",
-                              tmpdev->devname, strerror(errno));
+               if (!stat_is_blkdev(tmpdev->devname, &rdev)) {
                        tmpdev->used = 2;
                } else {
-                       struct dev_policy *pol = devid_policy(stb.st_rdev);
+                       struct dev_policy *pol = devid_policy(rdev);
                        int dt = domain_test(domains, pol, NULL);
                        if (inargv && dt != 0)
                                /* take this spare as domains match
@@ -756,12 +753,12 @@ static int load_devices(struct devs *devices, char *devmap,
                                bestcnt = newbestcnt;
                        }
                        if (best[i] >=0 &&
-                           devices[best[i]].i.events
-                           == devices[devcnt].i.events
-                           && (devices[best[i]].i.disk.minor
-                               != devices[devcnt].i.disk.minor)
-                           && st->ss == &super0
-                           && content->array.level != LEVEL_MULTIPATH) {
+                           devices[best[i]].i.events ==
+                           devices[devcnt].i.events &&
+                           (devices[best[i]].i.disk.minor !=
+                            devices[devcnt].i.disk.minor) &&
+                           st->ss == &super0 &&
+                           content->array.level != LEVEL_MULTIPATH) {
                                /* two different devices with identical superblock.
                                 * Could be a mis-detection caused by overlapping
                                 * partitions.  fail-safe.
@@ -779,9 +776,8 @@ static int load_devices(struct devs *devices, char *devmap,
                                *stp = st;
                                return -1;
                        }
-                       if (best[i] == -1
-                           || (devices[best[i]].i.events
-                               < devices[devcnt].i.events))
+                       if (best[i] == -1 || (devices[best[i]].i.events
+                                             < devices[devcnt].i.events))
                                best[i] = devcnt;
                }
                devcnt++;
@@ -804,14 +800,11 @@ static int force_array(struct mdinfo *content,
        int okcnt = 0;
        while (!enough(content->array.level, content->array.raid_disks,
                       content->array.layout, 1,
-                      avail)
-              ||
+                      avail) ||
               (content->reshape_active && content->delta_disks > 0 &&
                !enough(content->array.level, (content->array.raid_disks
                                               - content->delta_disks),
-                       content->new_layout, 1,
-                       avail)
-                      )) {
+                       content->new_layout, 1, avail))) {
                /* Choose the newest best drive which is
                 * not up-to-date, update the superblock
                 * and add it.
@@ -853,7 +846,19 @@ static int force_array(struct mdinfo *content,
                                        /* OK */;
                                else
                                        continue;
-                       }
+                       } else if (devices[j].i.reshape_active !=
+                           content->reshape_active ||
+                           (devices[j].i.reshape_active &&
+                           devices[j].i.reshape_progress !=
+                           content->reshape_progress))
+                               /* Here, it may be a source of data. If two
+                                * devices claim different progresses, it
+                                * means that reshape boundaries differ for
+                                * their own devices. Kernel will only treat
+                                * the first one as reshape progress and
+                                * go on. It may cause disaster, so avoid it.
+                                */
+                               continue;
                        if (chosen_drive < 0 ||
                             devices[j].i.events
                            > devices[chosen_drive].i.events)
@@ -915,7 +920,13 @@ static int force_array(struct mdinfo *content,
                        if (j >= 0 &&
                            !devices[j].uptodate &&
                            devices[j].i.recovery_start == MaxSector &&
-                           devices[j].i.events == current_events) {
+                           devices[j].i.events == current_events &&
+                           ((!devices[j].i.reshape_active &&
+                           !content->reshape_active) ||
+                           (devices[j].i.reshape_active ==
+                           content->reshape_active &&
+                           devices[j].i.reshape_progress ==
+                           content->reshape_progress))) {
                                chosen_drive = j;
                                goto add_another;
                        }
@@ -1302,13 +1313,13 @@ int Assemble(struct supertype *st, char *mddev,
         *    START_ARRAY
         *
         */
-       int rv;
-       int mdfd;
+       int rv = -1;
+       int mdfd = -1;
        int clean;
        int auto_assem = (mddev == NULL && !ident->uuid_set &&
-                         ident->super_minor == UnSet && ident->name[0] == 0
-                         && (ident->container == NULL || ident->member == NULL));
-       struct devs *devices;
+                         ident->super_minor == UnSet && ident->name[0] == 0 &&
+                         (ident->container == NULL || ident->member == NULL));
+       struct devs *devices = NULL;
        char *devmap;
        int *best = NULL; /* indexed by raid_disk */
        int bestcnt = 0;
@@ -1333,6 +1344,9 @@ int Assemble(struct supertype *st, char *mddev,
        char chosen_name[1024];
        struct map_ent *map = NULL;
        struct map_ent *mp;
+       int locked = 0;
+       struct mdp_superblock_1 *sb;
+       bitmap_super_t *bms;
 
        /*
         * If any subdevs are listed, then any that don't
@@ -1363,6 +1377,12 @@ try_again:
         * set of devices failed.  Those are now marked as ->used==2 and
         * we ignore them and try again
         */
+       if (locked)
+               /*
+                * if come back try_again is called, then need to unlock first,
+                * and lock again since the metadate is re-read.
+                */
+               cluster_release_dlmlock();
        if (!st && ident->st)
                st = ident->st;
        if (c->verbose>0)
@@ -1380,6 +1400,14 @@ try_again:
        if (!st || !st->sb || !content)
                return 2;
 
+       sb = st->sb;
+       bms = (bitmap_super_t*)(((char*)sb) + 4096);
+       if (sb && bms->version == BITMAP_MAJOR_CLUSTERED) {
+               locked = cluster_get_dlmlock();
+               if (locked != 1)
+                       return 1;
+       }
+
        /* We have a full set of devices - we now need to find the
         * array device.
         * However there is a risk that we are racing with "mdadm -I"
@@ -1406,7 +1434,7 @@ try_again:
                        pr_err("Found some drive for an array that is already active: %s\n",
                               mp->path);
                        pr_err("giving up.\n");
-                       return 1;
+                       goto out;
                }
                for (dv = pre_exist->devs; dv; dv = dv->next) {
                        /* We want to add this device to our list,
@@ -1479,7 +1507,7 @@ try_again:
                st->ss->free_super(st);
                if (auto_assem)
                        goto try_again;
-               return 1;
+               goto out;
        }
        mddev = chosen_name;
        if (pre_exist == NULL) {
@@ -1498,7 +1526,7 @@ try_again:
                        st->ss->free_super(st);
                        if (auto_assem)
                                goto try_again;
-                       return 1;
+                       goto out;
                }
                /* just incase it was started but has no content */
                ioctl(mdfd, STOP_ARRAY, NULL);
@@ -1510,6 +1538,8 @@ try_again:
                err = assemble_container_content(st, mdfd, content, c,
                                                 chosen_name, NULL);
                close(mdfd);
+               if (locked == 1)
+                       cluster_release_dlmlock();
                return err;
        }
 
@@ -1519,18 +1549,18 @@ try_again:
        devcnt = load_devices(devices, devmap, ident, &st, devlist,
                              c, content, mdfd, mddev,
                              &most_recent, &bestcnt, &best, inargv);
-       if (devcnt < 0)
-               return 1;
+       if (devcnt < 0) {
+               mdfd = -3;
+               goto out;
+       }
 
        if (devcnt == 0) {
                pr_err("no devices found for %s\n",
                       mddev);
                if (st)
                        st->ss->free_super(st);
-               close(mdfd);
-               free(devices);
                free(devmap);
-               return 1;
+               goto out;
        }
 
        if (c->update && strcmp(c->update, "byteorder")==0)
@@ -1644,32 +1674,24 @@ try_again:
                                 : (O_RDONLY|O_EXCL)))< 0) {
                        pr_err("Cannot open %s: %s\n",
                               devices[j].devname, strerror(errno));
-                       close(mdfd);
-                       free(devices);
-                       return 1;
+                       goto out;
                }
                if (st->ss->load_super(st,fd, NULL)) {
                        close(fd);
                        pr_err("RAID superblock has disappeared from %s\n",
                               devices[j].devname);
-                       close(mdfd);
-                       free(devices);
-                       return 1;
+                       goto out;
                }
                close(fd);
        }
        if (st->sb == NULL) {
                pr_err("No suitable drives found for %s\n", mddev);
-               close(mdfd);
-               free(devices);
-               return 1;
+               goto out;
        }
        st->ss->getinfo_super(st, content, NULL);
        if (sysfs_init(content, mdfd, NULL)) {
                pr_err("Unable to initialize sysfs\n");
-               close(mdfd);
-               free(devices);
-               return 1;
+               goto out;
        }
 
        /* after reload context, store journal_clean in context */
@@ -1678,6 +1700,8 @@ try_again:
                int j = best[i];
                unsigned int desired_state;
 
+               if (j < 0)
+                       continue;
                if (devices[j].i.disk.raid_disk == MD_DISK_ROLE_JOURNAL)
                        desired_state = (1<<MD_DISK_JOURNAL);
                else if (i >= content->array.raid_disks * 2)
@@ -1687,8 +1711,6 @@ try_again:
                else
                        desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
 
-               if (j<0)
-                       continue;
                if (!devices[j].uptodate)
                        continue;
 
@@ -1735,17 +1757,13 @@ try_again:
                if (fd < 0) {
                        pr_err("Could not open %s for write - cannot Assemble array.\n",
                               devices[chosen_drive].devname);
-                       close(mdfd);
-                       free(devices);
-                       return 1;
+                       goto out;
                }
                if (st->ss->store_super(st, fd)) {
                        close(fd);
                        pr_err("Could not re-write superblock on %s\n",
                               devices[chosen_drive].devname);
-                       close(mdfd);
-                       free(devices);
-                       return 1;
+                       goto out;
                }
                if (c->verbose >= 0)
                        pr_err("Marking array %s as 'clean'\n",
@@ -1803,9 +1821,7 @@ try_again:
                        pr_err("Failed to restore critical section for reshape, sorry.\n");
                        if (c->backup_file == NULL)
                                cont_err("Possibly you needed to specify the --backup-file\n");
-                       close(mdfd);
-                       free(devices);
-                       return err;
+                       goto out;
                }
        }
 
@@ -1834,6 +1850,7 @@ try_again:
                ioctl(mdfd, STOP_ARRAY, NULL);
        free(devices);
        map_unlock(&map);
+out:
        if (rv == 0) {
                wait_for(chosen_name, mdfd);
                close(mdfd);
@@ -1863,10 +1880,16 @@ try_again:
                                usecs <<= 1;
                        }
                }
-       } else
+       } else if (mdfd >= 0)
                close(mdfd);
 
        /* '2' means 'OK, but not started yet' */
+       if (locked == 1)
+               cluster_release_dlmlock();
+       if (rv == -1) {
+               free(devices);
+               return 1;
+       }
        return rv == 2 ? 0 : rv;
 }