]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - Assemble.c
Release 3.2.6 - stability release
[thirdparty/mdadm.git] / Assemble.c
index bfc879c7f4433550aa25e80dc1fd39156544582f..8c8aad37b26dabf0d32a53ca09a12d616b02dabf 100644 (file)
@@ -138,7 +138,7 @@ int Assemble(struct supertype *st, char *mddev,
             char *backup_file, int invalid_backup,
             int readonly, int runstop,
             char *update, char *homehost, int require_homehost,
-            int verbose, int force)
+            int verbose, int force, int freeze_reshape)
 {
        /*
         * The task of Assemble is to find a collection of
@@ -220,7 +220,9 @@ int Assemble(struct supertype *st, char *mddev,
        int change = 0;
        int inargv = 0;
        int report_missmatch;
+#ifndef MDASSEMBLE
        int bitmap_done;
+#endif
        int start_partial_ok = (runstop >= 0) && 
                (force || devlist==NULL || auto_assem);
        unsigned int num_devs;
@@ -293,7 +295,7 @@ int Assemble(struct supertype *st, char *mddev,
                char *devname = tmpdev->devname;
                int dfd;
                struct stat stb;
-               struct supertype *tst = dup_super(st);
+               struct supertype *tst;
                struct dev_policy *pol = NULL;
                int found_container = 0;
 
@@ -306,7 +308,9 @@ int Assemble(struct supertype *st, char *mddev,
                        continue;
                }
 
-               dfd = dev_open(devname, O_RDONLY|O_EXCL);
+               tst = dup_super(st);
+
+               dfd = dev_open(devname, O_RDONLY);
                if (dfd < 0) {
                        if (report_missmatch)
                                fprintf(stderr, Name ": cannot open device %s: %s\n",
@@ -404,6 +408,17 @@ int Assemble(struct supertype *st, char *mddev,
                        /* tmpdev is a container.  We need to be either
                         * looking for a member, or auto-assembling
                         */
+                       /* should be safe to try an exclusive open now, we
+                        * have rejected anything that some other mdadm might
+                        * be looking at
+                        */
+                       dfd = dev_open(devname, O_RDONLY | O_EXCL);
+                       if (dfd < 0) {
+                               if (report_missmatch)
+                                       fprintf(stderr, Name ": %s is busy - skipping\n", devname);
+                               goto loop;
+                       }
+                       close(dfd);
 
                        if (ident->container) {
                                if (ident->container[0] == '/' &&
@@ -418,7 +433,6 @@ int Assemble(struct supertype *st, char *mddev,
                                        int uuid[4];
 
                                        content = &info;
-                                       memset(content, 0, sizeof(*content));
                                        tst->ss->getinfo_super(tst, content, NULL);
 
                                        if (!parse_uuid(ident->container, uuid) ||
@@ -440,13 +454,6 @@ int Assemble(struct supertype *st, char *mddev,
                             content;
                             content = content->next) {
 
-                               /* do not assemble arrays that might have bad blocks */
-                               if (content->array.state & (1<<MD_SB_BBM_ERRORS)) {
-                                       fprintf(stderr, Name ": BBM log found in metadata. "
-                                                               "Cannot activate array(s).\n");
-                                       tmpdev->used = 2;
-                                       goto loop;
-                               }
                                if (!ident_matches(ident, content, tst,
                                                   homehost, update,
                                                   report_missmatch ? devname : NULL))
@@ -456,6 +463,11 @@ int Assemble(struct supertype *st, char *mddev,
                                                fprintf(stderr, Name ": member %s in %s is already assembled\n",
                                                        content->text_version,
                                                        devname);
+                               } else if (content->array.state & (1<<MD_SB_BLOCK_VOLUME)) {
+                                       /* do not assemble arrays with unsupported configurations */
+                                       fprintf(stderr, Name ": Cannot activate member %s in %s.\n",
+                                               content->text_version,
+                                               devname);
                                } else
                                        break;
                        }
@@ -484,7 +496,6 @@ int Assemble(struct supertype *st, char *mddev,
                } else {
 
                        content = &info;
-                       memset(content, 0, sizeof(*content));
                        tst->ss->getinfo_super(tst, content, NULL);
 
                        if (!ident_matches(ident, content, tst,
@@ -492,6 +503,18 @@ int Assemble(struct supertype *st, char *mddev,
                                           report_missmatch ? devname : NULL))
                                goto loop;
                                
+                       /* should be safe to try an exclusive open now, we
+                        * have rejected anything that some other mdadm might
+                        * be looking at
+                        */
+                       dfd = dev_open(devname, O_RDONLY | O_EXCL);
+                       if (dfd < 0) {
+                               if (report_missmatch)
+                                       fprintf(stderr, Name ": %s is busy - skipping\n", devname);
+                               goto loop;
+                       }
+                       close(dfd);
+
                        if (st == NULL)
                                st = dup_super(tst);
                        if (st->minor_version == -1)
@@ -699,14 +722,13 @@ int Assemble(struct supertype *st, char *mddev,
                int err;
                err = assemble_container_content(st, mdfd, content, runstop,
                                                 chosen_name, verbose,
-                                                backup_file);
+                                                backup_file, freeze_reshape);
                close(mdfd);
                return err;
        }
+       bitmap_done = 0;
 #endif
        /* Ok, no bad inconsistancy, we can try updating etc */
-       bitmap_done = 0;
-       content->update_private = NULL;
        devices = malloc(num_devs * sizeof(*devices));
        devmap = calloc(num_devs * content->array.raid_disks, 1);
        for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) if (tmpdev->used == 1) {
@@ -891,8 +913,6 @@ int Assemble(struct supertype *st, char *mddev,
                }
                devcnt++;
        }
-       free(content->update_private);
-       content->update_private = NULL;
 
        if (devcnt == 0) {
                fprintf(stderr, Name ": no devices found for %s\n",
@@ -937,7 +957,7 @@ int Assemble(struct supertype *st, char *mddev,
                                }
                                continue;
                        }
-               /* If this devices thinks that 'most_recent' has failed, then
+               /* If this device thinks that 'most_recent' has failed, then
                 * we must reject this device.
                 */
                if (j != most_recent &&
@@ -956,7 +976,9 @@ int Assemble(struct supertype *st, char *mddev,
                        if (i < content->array.raid_disks) {
                                if (devices[j].i.recovery_start == MaxSector ||
                                    (content->reshape_active &&
-                                    j >= content->array.raid_disks - content->delta_disks)) {
+                                    ((i >= content->array.raid_disks - content->delta_disks) ||
+                                     (i >= content->array.raid_disks - content->delta_disks - 1
+                                      && content->array.level == 4)))) {
                                        okcnt++;
                                        avail[i]=1;
                                } else
@@ -966,9 +988,17 @@ int Assemble(struct supertype *st, char *mddev,
                }
        }
        free(devmap);
-       while (force && !enough(content->array.level, content->array.raid_disks,
-                               content->array.layout, 1,
-                               avail, okcnt)) {
+       while (force &&
+              (!enough(content->array.level, content->array.raid_disks,
+                       content->array.layout, 1,
+                       avail)
+               ||
+               (content->reshape_active && content->delta_disks > 0 &&
+                !enough(content->array.level, (content->array.raid_disks
+                                               - content->delta_disks),
+                        content->new_layout, 1,
+                        avail)
+                       ))) {
                /* Choose the newest best drive which is
                 * not up-to-date, update the superblock
                 * and add it.
@@ -1039,6 +1069,7 @@ int Assemble(struct supertype *st, char *mddev,
                        int j = best[i];
                        if (j >= 0 &&
                            !devices[j].uptodate &&
+                           devices[j].i.recovery_start == MaxSector &&
                            devices[j].i.events == current_events) {
                                chosen_drive = j;
                                goto add_another;
@@ -1062,6 +1093,8 @@ int Assemble(struct supertype *st, char *mddev,
                        continue;
                if (!devices[j].uptodate)
                        continue;
+               if (devices[j].i.events < devices[most_recent].i.events)
+                       continue;
                chosen_drive = j;
                if ((fd=dev_open(devices[j].devname, O_RDONLY|O_EXCL))< 0) {
                        fprintf(stderr, Name ": Cannot open %s: %s\n",
@@ -1133,7 +1166,7 @@ int Assemble(struct supertype *st, char *mddev,
        if (force && !clean &&
            !enough(content->array.level, content->array.raid_disks,
                    content->array.layout, clean,
-                   avail, okcnt)) {
+                   avail)) {
                change += st->ss->update_super(st, content, "force-array",
                                        devices[chosen_drive].devname, verbose,
                                               0, NULL);
@@ -1158,6 +1191,9 @@ int Assemble(struct supertype *st, char *mddev,
                        free(devices);
                        return 1;
                }
+               if (verbose >= 0)
+                       fprintf(stderr, Name ": Marking array %s as 'clean'\n",
+                               mddev);
                close(fd);
        }
 
@@ -1187,8 +1223,11 @@ int Assemble(struct supertype *st, char *mddev,
                                fdlist[i] = -1;
                }
                if (!err) {
-                       err = Grow_restart(st, content, fdlist, bestcnt,
-                                          backup_file, verbose > 0);
+                       if (st->ss->external && st->ss->recover_backup)
+                               err = st->ss->recover_backup(st, content);
+                       else
+                               err = Grow_restart(st, content, fdlist, bestcnt,
+                                                  backup_file, verbose > 0);
                        if (err && invalid_backup) {
                                if (verbose > 0)
                                        fprintf(stderr, Name ": continuing"
@@ -1200,6 +1239,7 @@ int Assemble(struct supertype *st, char *mddev,
                        i--;
                        if (fdlist[i]>=0) close(fdlist[i]);
                }
+               free(fdlist);
                if (err) {
                        fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n");
                        if (backup_file == NULL)
@@ -1296,9 +1336,11 @@ int Assemble(struct supertype *st, char *mddev,
                                                sparecnt--;
                                } else if (verbose > 0)
                                        fprintf(stderr, Name ": added %s "
-                                                       "to %s as %d\n",
+                                                       "to %s as %d%s\n",
                                                devices[j].devname, mddev,
-                                               devices[j].i.disk.raid_disk);
+                                               devices[j].i.disk.raid_disk,
+                                               devices[j].uptodate?"":
+                                               " (possibly out of date)");
                        } else if (verbose > 0 && i < content->array.raid_disks)
                                fprintf(stderr, Name ": no uptodate device for "
                                                "slot %d of %s\n",
@@ -1326,7 +1368,7 @@ int Assemble(struct supertype *st, char *mddev,
                if (runstop == 1 ||
                    (runstop <= 0 &&
                     ( enough(content->array.level, content->array.raid_disks,
-                             content->array.layout, clean, avail, okcnt) &&
+                             content->array.layout, clean, avail) &&
                       (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok)
                             ))) {
                        /* This array is good-to-go.
@@ -1337,9 +1379,14 @@ int Assemble(struct supertype *st, char *mddev,
                        int rv;
 #ifndef MDASSEMBLE
                        if (content->reshape_active &&
-                           content->delta_disks <= 0)
-                               rv = Grow_continue(mdfd, st, content, backup_file);
-                       else
+                           content->delta_disks <= 0) {
+                               rv = sysfs_set_str(content, NULL,
+                                                  "array_state", "readonly");
+                               if (rv == 0)
+                                       rv = Grow_continue(mdfd, st, content,
+                                                          backup_file,
+                                                          freeze_reshape);
+                       } else
 #endif
                                rv = ioctl(mdfd, RUN_ARRAY, NULL);
                        if (rv == 0) {
@@ -1366,6 +1413,7 @@ int Assemble(struct supertype *st, char *mddev,
                                                        sysfs_set_num(sra, NULL,
                                                                      "stripe_cache_size",
                                                                      (4 * content->array.chunk_size / 4096) + 1);
+                                               sysfs_free(sra);
                                        }
                                }
                                if (okcnt < (unsigned)content->array.raid_disks) {
@@ -1377,7 +1425,7 @@ int Assemble(struct supertype *st, char *mddev,
                                         * might allow them to be included, or
                                         * they will become spares.
                                         */
-                                       for (i = 0; i <= bestcnt; i++) {
+                                       for (i = 0; i < bestcnt; i++) {
                                                int j = best[i];
                                                if (j >= 0 && !devices[j].uptodate) {
                                                        if (!disk_action_allows(&devices[j].i, st->ss->name, act_re_add))
@@ -1426,13 +1474,13 @@ int Assemble(struct supertype *st, char *mddev,
                                mddev, strerror(errno));
 
                        if (!enough(content->array.level, content->array.raid_disks,
-                                   content->array.layout, 1, avail, okcnt))
+                                   content->array.layout, 1, avail))
                                fprintf(stderr, Name ": Not enough devices to "
                                        "start the array.\n");
                        else if (!enough(content->array.level,
                                         content->array.raid_disks,
                                         content->array.layout, clean,
-                                        avail, okcnt))
+                                        avail))
                                fprintf(stderr, Name ": Not enough devices to "
                                        "start the array while not clean "
                                        "- consider --force.\n");
@@ -1460,12 +1508,12 @@ int Assemble(struct supertype *st, char *mddev,
                        if (sparecnt)
                                fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
                        if (!enough(content->array.level, content->array.raid_disks,
-                                   content->array.layout, 1, avail, okcnt))
+                                   content->array.layout, 1, avail))
                                fprintf(stderr, " - not enough to start the array.\n");
                        else if (!enough(content->array.level,
                                         content->array.raid_disks,
                                         content->array.layout, clean,
-                                        avail, okcnt))
+                                        avail))
                                fprintf(stderr, " - not enough to start the "
                                        "array while not clean - consider "
                                        "--force.\n");
@@ -1505,36 +1553,51 @@ int Assemble(struct supertype *st, char *mddev,
 int assemble_container_content(struct supertype *st, int mdfd,
                               struct mdinfo *content, int runstop,
                               char *chosen_name, int verbose,
-                              char *backup_file)
+                              char *backup_file, int freeze_reshape)
 {
        struct mdinfo *dev, *sra;
        int working = 0, preexist = 0;
        int expansion = 0;
        struct map_ent *map = NULL;
+       int old_raid_disks;
+       int start_reshape;
 
        sysfs_init(content, mdfd, 0);
 
        sra = sysfs_read(mdfd, 0, GET_VERSION);
        if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0)
-               if (sysfs_set_array(content, md_get_version(mdfd)) != 0)
+               if (sysfs_set_array(content, md_get_version(mdfd)) != 0) {
+                       if (sra)
+                               sysfs_free(sra);
                        return 1;
+               }
 
-       if (content->reshape_active)
+       /* There are two types of reshape: container wide or sub-array specific
+        * Check if metadata requests blocking container wide reshapes
+        */
+       start_reshape = (content->reshape_active &&
+               !((content->reshape_active == CONTAINER_RESHAPE) &&
+               (content->array.state & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE))));
+
+       /* Block subarray here if it is under reshape now
+        * Do not allow for any changes in this array
+        */
+       if (st->ss->external && content->recovery_blocked && start_reshape)
                block_subarray(content);
 
        if (sra)
                sysfs_free(sra);
-
+       old_raid_disks = content->array.raid_disks - content->delta_disks;
        for (dev = content->devs; dev; dev = dev->next)
                if (sysfs_add_disk(content, dev, 1) == 0) {
-                       if (dev->disk.raid_disk >= content->array.raid_disks &&
+                       if (dev->disk.raid_disk >= old_raid_disks &&
                            content->reshape_active)
                                expansion++;
                        else
                                working++;
                } else if (errno == EEXIST)
                        preexist++;
-       if (working == 0)
+       if (working + expansion == 0)
                return 1;/* Nothing new, don't try to start */
 
        map_update(&map, fd2devnum(mdfd),
@@ -1546,44 +1609,29 @@ int assemble_container_content(struct supertype *st, int mdfd,
                        content->array.working_disks) {
                int err;
 
-               if (content->reshape_active) {
+               if (start_reshape) {
                        int spare = content->array.raid_disks + expansion;
-                       int i;
-                       int *fdlist = malloc(sizeof(int) *
-                                            (working + expansion
-                                             + content->array.raid_disks));
-                       for (i=0; i<spare; i++)
-                               fdlist[i] = -1;
-                       for (dev = content->devs; dev; dev = dev->next) {
-                               char buf[20];
-                               int fd;
-                               sprintf(buf, "%d:%d",
-                                       dev->disk.major,
-                                       dev->disk.minor);
-                               fd = dev_open(buf, O_RDWR);
-
-                               if (dev->disk.raid_disk >= 0)
-                                       fdlist[dev->disk.raid_disk] = fd;
-                               else
-                                       fdlist[spare++] = fd;
-                       }
-                       err = Grow_restart(st, content, fdlist, spare,
-                                          backup_file, verbose > 0);
-                       while (spare > 0) {
-                               spare--;
-                               if (fdlist[spare] >= 0)
-                                       close(fdlist[spare]);
-                       }
-                       if (err) {
-                               fprintf(stderr, Name ": Failed to restore critical"
-                                       " section for reshape - sorry.\n");
-                               if (!backup_file)
-                                       fprintf(stderr, Name ":  Possibly you need"
-                                               " to specify a --backup-file\n");
+                       if (restore_backup(st, content,
+                                          working,
+                                          spare, backup_file, verbose) == 1)
                                return 1;
+
+                       err = sysfs_set_str(content, NULL,
+                                           "array_state", "readonly");
+                       if (err)
+                               return 1;
+
+                       if (st->ss->external) {
+                               if (!mdmon_running(st->container_dev))
+                                       start_mdmon(st->container_dev);
+                               ping_monitor_by_id(st->container_dev);
+                               if (mdmon_running(st->container_dev) &&
+                                               st->update_tail == NULL)
+                                       st->update_tail = &st->updates;
                        }
 
-                       err = Grow_continue(mdfd, st, content, backup_file);
+                       err = Grow_continue(mdfd, st, content, backup_file,
+                                           freeze_reshape);
                } else switch(content->array.level) {
                case LEVEL_LINEAR:
                case LEVEL_MULTIPATH:
@@ -1598,21 +1646,32 @@ int assemble_container_content(struct supertype *st, int mdfd,
                        if (!err) {
                                if (!mdmon_running(st->container_dev))
                                        start_mdmon(st->container_dev);
-                               ping_monitor(devnum2devname(st->container_dev));
+                               ping_monitor_by_id(st->container_dev);
                        }
                        break;
                }
                if (!err)
                        sysfs_set_safemode(content, content->safe_mode_delay);
+
+               /* Block subarray here if it is not reshaped now
+                * It has be blocked a little later to allow mdmon to switch in
+                * in to R/W state
+                */
+               if (st->ss->external && content->recovery_blocked &&
+                   !start_reshape)
+                       block_subarray(content);
+
                if (verbose >= 0) {
                        if (err)
                                fprintf(stderr, Name
-                                       ": array %s now has %d devices",
-                                       chosen_name, working + preexist);
+                                       ": array %s now has %d device%s",
+                                       chosen_name, working + preexist,
+                                       working + preexist == 1 ? "":"s");
                        else
                                fprintf(stderr, Name
-                                       ": Started %s with %d devices",
-                                       chosen_name, working + preexist);
+                                       ": Started %s with %d device%s",
+                                       chosen_name, working + preexist,
+                                       working + preexist == 1 ? "":"s");
                        if (preexist)
                                fprintf(stderr, " (%d new)", working);
                        if (expansion)
@@ -1625,11 +1684,15 @@ int assemble_container_content(struct supertype *st, int mdfd,
                return err;
                /* FIXME should have an O_EXCL and wait for read-auto */
        } else {
-               if (verbose >= 0)
+               if (verbose >= 0) {
                        fprintf(stderr, Name
-                               ": %s assembled with %d devices but "
-                               "not started\n",
-                               chosen_name, working);
+                               ": %s assembled with %d device%s",
+                               chosen_name, preexist + working,
+                               preexist + working == 1 ? "":"s");
+                       if (preexist)
+                               fprintf(stderr, " (%d new)", working);
+                       fprintf(stderr, " but not started\n");
+               }
                return 1;
        }
 }