char *backup_file, int invalid_backup,
int readonly, int runstop,
char *update, char *homehost, int require_homehost,
- int verbose, int force)
+ int verbose, int force, int freeze_reshape)
{
/*
* The task of Assemble is to find a collection of
int change = 0;
int inargv = 0;
int report_missmatch;
+#ifndef MDASSEMBLE
int bitmap_done;
+#endif
int start_partial_ok = (runstop >= 0) &&
(force || devlist==NULL || auto_assem);
unsigned int num_devs;
char *devname = tmpdev->devname;
int dfd;
struct stat stb;
- struct supertype *tst = dup_super(st);
+ struct supertype *tst;
struct dev_policy *pol = NULL;
int found_container = 0;
continue;
}
- dfd = dev_open(devname, O_RDONLY|O_EXCL);
+ tst = dup_super(st);
+
+ dfd = dev_open(devname, O_RDONLY);
if (dfd < 0) {
if (report_missmatch)
fprintf(stderr, Name ": cannot open device %s: %s\n",
/* tmpdev is a container. We need to be either
* looking for a member, or auto-assembling
*/
+ /* should be safe to try an exclusive open now, we
+ * have rejected anything that some other mdadm might
+ * be looking at
+ */
+ dfd = dev_open(devname, O_RDONLY | O_EXCL);
+ if (dfd < 0) {
+ if (report_missmatch)
+ fprintf(stderr, Name ": %s is busy - skipping\n", devname);
+ goto loop;
+ }
+ close(dfd);
if (ident->container) {
if (ident->container[0] == '/' &&
int uuid[4];
content = &info;
- memset(content, 0, sizeof(*content));
tst->ss->getinfo_super(tst, content, NULL);
if (!parse_uuid(ident->container, uuid) ||
content;
content = content->next) {
- /* do not assemble arrays that might have bad blocks */
- if (content->array.state & (1<<MD_SB_BBM_ERRORS)) {
- fprintf(stderr, Name ": BBM log found in metadata. "
- "Cannot activate array(s).\n");
- tmpdev->used = 2;
- goto loop;
- }
if (!ident_matches(ident, content, tst,
homehost, update,
report_missmatch ? devname : NULL))
fprintf(stderr, Name ": member %s in %s is already assembled\n",
content->text_version,
devname);
+ } else if (content->array.state & (1<<MD_SB_BLOCK_VOLUME)) {
+ /* do not assemble arrays with unsupported configurations */
+ fprintf(stderr, Name ": Cannot activate member %s in %s.\n",
+ content->text_version,
+ devname);
} else
break;
}
} else {
content = &info;
- memset(content, 0, sizeof(*content));
tst->ss->getinfo_super(tst, content, NULL);
if (!ident_matches(ident, content, tst,
report_missmatch ? devname : NULL))
goto loop;
+ /* should be safe to try an exclusive open now, we
+ * have rejected anything that some other mdadm might
+ * be looking at
+ */
+ dfd = dev_open(devname, O_RDONLY | O_EXCL);
+ if (dfd < 0) {
+ if (report_missmatch)
+ fprintf(stderr, Name ": %s is busy - skipping\n", devname);
+ goto loop;
+ }
+ close(dfd);
+
if (st == NULL)
st = dup_super(tst);
if (st->minor_version == -1)
int err;
err = assemble_container_content(st, mdfd, content, runstop,
chosen_name, verbose,
- backup_file);
+ backup_file, freeze_reshape);
close(mdfd);
return err;
}
+ bitmap_done = 0;
#endif
/* Ok, no bad inconsistancy, we can try updating etc */
- bitmap_done = 0;
- content->update_private = NULL;
devices = malloc(num_devs * sizeof(*devices));
devmap = calloc(num_devs * content->array.raid_disks, 1);
for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) if (tmpdev->used == 1) {
}
devcnt++;
}
- free(content->update_private);
- content->update_private = NULL;
if (devcnt == 0) {
fprintf(stderr, Name ": no devices found for %s\n",
}
continue;
}
- /* If this devices thinks that 'most_recent' has failed, then
+ /* If this device thinks that 'most_recent' has failed, then
* we must reject this device.
*/
if (j != most_recent &&
if (i < content->array.raid_disks) {
if (devices[j].i.recovery_start == MaxSector ||
(content->reshape_active &&
- j >= content->array.raid_disks - content->delta_disks)) {
+ ((i >= content->array.raid_disks - content->delta_disks) ||
+ (i >= content->array.raid_disks - content->delta_disks - 1
+ && content->array.level == 4)))) {
okcnt++;
avail[i]=1;
} else
}
}
free(devmap);
- while (force && !enough(content->array.level, content->array.raid_disks,
- content->array.layout, 1,
- avail, okcnt)) {
+ while (force &&
+ (!enough(content->array.level, content->array.raid_disks,
+ content->array.layout, 1,
+ avail)
+ ||
+ (content->reshape_active && content->delta_disks > 0 &&
+ !enough(content->array.level, (content->array.raid_disks
+ - content->delta_disks),
+ content->new_layout, 1,
+ avail)
+ ))) {
/* Choose the newest best drive which is
* not up-to-date, update the superblock
* and add it.
int j = best[i];
if (j >= 0 &&
!devices[j].uptodate &&
+ devices[j].i.recovery_start == MaxSector &&
devices[j].i.events == current_events) {
chosen_drive = j;
goto add_another;
continue;
if (!devices[j].uptodate)
continue;
+ if (devices[j].i.events < devices[most_recent].i.events)
+ continue;
chosen_drive = j;
if ((fd=dev_open(devices[j].devname, O_RDONLY|O_EXCL))< 0) {
fprintf(stderr, Name ": Cannot open %s: %s\n",
if (force && !clean &&
!enough(content->array.level, content->array.raid_disks,
content->array.layout, clean,
- avail, okcnt)) {
+ avail)) {
change += st->ss->update_super(st, content, "force-array",
devices[chosen_drive].devname, verbose,
0, NULL);
free(devices);
return 1;
}
+ if (verbose >= 0)
+ fprintf(stderr, Name ": Marking array %s as 'clean'\n",
+ mddev);
close(fd);
}
fdlist[i] = -1;
}
if (!err) {
- err = Grow_restart(st, content, fdlist, bestcnt,
- backup_file, verbose > 0);
+ if (st->ss->external && st->ss->recover_backup)
+ err = st->ss->recover_backup(st, content);
+ else
+ err = Grow_restart(st, content, fdlist, bestcnt,
+ backup_file, verbose > 0);
if (err && invalid_backup) {
if (verbose > 0)
fprintf(stderr, Name ": continuing"
i--;
if (fdlist[i]>=0) close(fdlist[i]);
}
+ free(fdlist);
if (err) {
fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n");
if (backup_file == NULL)
sparecnt--;
} else if (verbose > 0)
fprintf(stderr, Name ": added %s "
- "to %s as %d\n",
+ "to %s as %d%s\n",
devices[j].devname, mddev,
- devices[j].i.disk.raid_disk);
+ devices[j].i.disk.raid_disk,
+ devices[j].uptodate?"":
+ " (possibly out of date)");
} else if (verbose > 0 && i < content->array.raid_disks)
fprintf(stderr, Name ": no uptodate device for "
"slot %d of %s\n",
if (runstop == 1 ||
(runstop <= 0 &&
( enough(content->array.level, content->array.raid_disks,
- content->array.layout, clean, avail, okcnt) &&
+ content->array.layout, clean, avail) &&
(okcnt + rebuilding_cnt >= req_cnt || start_partial_ok)
))) {
/* This array is good-to-go.
int rv;
#ifndef MDASSEMBLE
if (content->reshape_active &&
- content->delta_disks <= 0)
- rv = Grow_continue(mdfd, st, content, backup_file);
- else
+ content->delta_disks <= 0) {
+ rv = sysfs_set_str(content, NULL,
+ "array_state", "readonly");
+ if (rv == 0)
+ rv = Grow_continue(mdfd, st, content,
+ backup_file,
+ freeze_reshape);
+ } else
#endif
rv = ioctl(mdfd, RUN_ARRAY, NULL);
if (rv == 0) {
sysfs_set_num(sra, NULL,
"stripe_cache_size",
(4 * content->array.chunk_size / 4096) + 1);
+ sysfs_free(sra);
}
}
if (okcnt < (unsigned)content->array.raid_disks) {
* might allow them to be included, or
* they will become spares.
*/
- for (i = 0; i <= bestcnt; i++) {
+ for (i = 0; i < bestcnt; i++) {
int j = best[i];
if (j >= 0 && !devices[j].uptodate) {
if (!disk_action_allows(&devices[j].i, st->ss->name, act_re_add))
mddev, strerror(errno));
if (!enough(content->array.level, content->array.raid_disks,
- content->array.layout, 1, avail, okcnt))
+ content->array.layout, 1, avail))
fprintf(stderr, Name ": Not enough devices to "
"start the array.\n");
else if (!enough(content->array.level,
content->array.raid_disks,
content->array.layout, clean,
- avail, okcnt))
+ avail))
fprintf(stderr, Name ": Not enough devices to "
"start the array while not clean "
"- consider --force.\n");
if (sparecnt)
fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
if (!enough(content->array.level, content->array.raid_disks,
- content->array.layout, 1, avail, okcnt))
+ content->array.layout, 1, avail))
fprintf(stderr, " - not enough to start the array.\n");
else if (!enough(content->array.level,
content->array.raid_disks,
content->array.layout, clean,
- avail, okcnt))
+ avail))
fprintf(stderr, " - not enough to start the "
"array while not clean - consider "
"--force.\n");
int assemble_container_content(struct supertype *st, int mdfd,
struct mdinfo *content, int runstop,
char *chosen_name, int verbose,
- char *backup_file)
+ char *backup_file, int freeze_reshape)
{
struct mdinfo *dev, *sra;
int working = 0, preexist = 0;
int expansion = 0;
struct map_ent *map = NULL;
+ int old_raid_disks;
+ int start_reshape;
sysfs_init(content, mdfd, 0);
sra = sysfs_read(mdfd, 0, GET_VERSION);
if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0)
- if (sysfs_set_array(content, md_get_version(mdfd)) != 0)
+ if (sysfs_set_array(content, md_get_version(mdfd)) != 0) {
+ if (sra)
+ sysfs_free(sra);
return 1;
+ }
- if (content->reshape_active)
+ /* There are two types of reshape: container wide or sub-array specific
+ * Check if metadata requests blocking container wide reshapes
+ */
+ start_reshape = (content->reshape_active &&
+ !((content->reshape_active == CONTAINER_RESHAPE) &&
+ (content->array.state & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE))));
+
+ /* Block subarray here if it is under reshape now
+ * Do not allow for any changes in this array
+ */
+ if (st->ss->external && content->recovery_blocked && start_reshape)
block_subarray(content);
if (sra)
sysfs_free(sra);
-
+ old_raid_disks = content->array.raid_disks - content->delta_disks;
for (dev = content->devs; dev; dev = dev->next)
if (sysfs_add_disk(content, dev, 1) == 0) {
- if (dev->disk.raid_disk >= content->array.raid_disks &&
+ if (dev->disk.raid_disk >= old_raid_disks &&
content->reshape_active)
expansion++;
else
working++;
} else if (errno == EEXIST)
preexist++;
- if (working == 0)
+ if (working + expansion == 0)
return 1;/* Nothing new, don't try to start */
map_update(&map, fd2devnum(mdfd),
content->array.working_disks) {
int err;
- if (content->reshape_active) {
+ if (start_reshape) {
int spare = content->array.raid_disks + expansion;
- int i;
- int *fdlist = malloc(sizeof(int) *
- (working + expansion
- + content->array.raid_disks));
- for (i=0; i<spare; i++)
- fdlist[i] = -1;
- for (dev = content->devs; dev; dev = dev->next) {
- char buf[20];
- int fd;
- sprintf(buf, "%d:%d",
- dev->disk.major,
- dev->disk.minor);
- fd = dev_open(buf, O_RDWR);
-
- if (dev->disk.raid_disk >= 0)
- fdlist[dev->disk.raid_disk] = fd;
- else
- fdlist[spare++] = fd;
- }
- err = Grow_restart(st, content, fdlist, spare,
- backup_file, verbose > 0);
- while (spare > 0) {
- spare--;
- if (fdlist[spare] >= 0)
- close(fdlist[spare]);
- }
- if (err) {
- fprintf(stderr, Name ": Failed to restore critical"
- " section for reshape - sorry.\n");
- if (!backup_file)
- fprintf(stderr, Name ": Possibly you need"
- " to specify a --backup-file\n");
+ if (restore_backup(st, content,
+ working,
+ spare, backup_file, verbose) == 1)
return 1;
+
+ err = sysfs_set_str(content, NULL,
+ "array_state", "readonly");
+ if (err)
+ return 1;
+
+ if (st->ss->external) {
+ if (!mdmon_running(st->container_dev))
+ start_mdmon(st->container_dev);
+ ping_monitor_by_id(st->container_dev);
+ if (mdmon_running(st->container_dev) &&
+ st->update_tail == NULL)
+ st->update_tail = &st->updates;
}
- err = Grow_continue(mdfd, st, content, backup_file);
+ err = Grow_continue(mdfd, st, content, backup_file,
+ freeze_reshape);
} else switch(content->array.level) {
case LEVEL_LINEAR:
case LEVEL_MULTIPATH:
if (!err) {
if (!mdmon_running(st->container_dev))
start_mdmon(st->container_dev);
- ping_monitor(devnum2devname(st->container_dev));
+ ping_monitor_by_id(st->container_dev);
}
break;
}
if (!err)
sysfs_set_safemode(content, content->safe_mode_delay);
+
+ /* Block subarray here if it is not reshaped now
+ * It has be blocked a little later to allow mdmon to switch in
+ * in to R/W state
+ */
+ if (st->ss->external && content->recovery_blocked &&
+ !start_reshape)
+ block_subarray(content);
+
if (verbose >= 0) {
if (err)
fprintf(stderr, Name
- ": array %s now has %d devices",
- chosen_name, working + preexist);
+ ": array %s now has %d device%s",
+ chosen_name, working + preexist,
+ working + preexist == 1 ? "":"s");
else
fprintf(stderr, Name
- ": Started %s with %d devices",
- chosen_name, working + preexist);
+ ": Started %s with %d device%s",
+ chosen_name, working + preexist,
+ working + preexist == 1 ? "":"s");
if (preexist)
fprintf(stderr, " (%d new)", working);
if (expansion)
return err;
/* FIXME should have an O_EXCL and wait for read-auto */
} else {
- if (verbose >= 0)
+ if (verbose >= 0) {
fprintf(stderr, Name
- ": %s assembled with %d devices but "
- "not started\n",
- chosen_name, working);
+ ": %s assembled with %d device%s",
+ chosen_name, preexist + working,
+ preexist + working == 1 ? "":"s");
+ if (preexist)
+ fprintf(stderr, " (%d new)", working);
+ fprintf(stderr, " but not started\n");
+ }
return 1;
}
}