return busy;
}
+static int ident_matches(struct mddev_ident *ident,
+ struct mdinfo *content,
+ struct supertype *tst,
+ char *homehost,
+ char *update, char *devname)
+{
+
+ if (ident->uuid_set && (!update || strcmp(update, "uuid")!= 0) &&
+ same_uuid(content->uuid, ident->uuid, tst->ss->swapuuid)==0) {
+ if (devname)
+ fprintf(stderr, Name ": %s has wrong uuid.\n",
+ devname);
+ return 0;
+ }
+ if (ident->name[0] && (!update || strcmp(update, "name")!= 0) &&
+ name_matches(content->name, ident->name, homehost)==0) {
+ if (devname)
+ fprintf(stderr, Name ": %s has wrong name.\n",
+ devname);
+ return 0;
+ }
+ if (ident->super_minor != UnSet &&
+ ident->super_minor != content->array.md_minor) {
+ if (devname)
+ fprintf(stderr, Name ": %s has wrong super-minor.\n",
+ devname);
+ return 0;
+ }
+ if (ident->level != UnSet &&
+ ident->level != content->array.level) {
+ if (devname)
+ fprintf(stderr, Name ": %s has wrong raid level.\n",
+ devname);
+ return 0;
+ }
+ if (ident->raid_disks != UnSet &&
+ ident->raid_disks!= content->array.raid_disks) {
+ if (devname)
+ fprintf(stderr, Name ": %s requires wrong number of drives.\n",
+ devname);
+ return 0;
+ }
+ if (ident->member && ident->member[0]) {
+ /* content->text_version must match */
+ char *s = strchr(content->text_version+1, '/');
+ if (s == NULL) {
+ if (devname)
+ fprintf(stderr, Name ": %s is not a container and one is required.\n",
+ devname);
+ return 0;
+ } else if (strcmp(ident->member, s+1) != 0) {
+ if (devname)
+ fprintf(stderr, Name ": skipping wrong member %s is %s\n",
+ content->text_version, devname);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+
int Assemble(struct supertype *st, char *mddev,
- mddev_ident_t ident,
- mddev_dev_t devlist, char *backup_file,
+ struct mddev_ident *ident,
+ struct mddev_dev *devlist,
+ char *backup_file, int invalid_backup,
int readonly, int runstop,
char *update, char *homehost, int require_homehost,
int verbose, int force)
*/
struct mdinfo i;
} *devices;
+ char *devmap;
int *best = NULL; /* indexed by raid_disk */
int bestcnt = 0;
int devcnt = 0;
int start_partial_ok = (runstop >= 0) &&
(force || devlist==NULL || auto_assem);
unsigned int num_devs;
- mddev_dev_t tmpdev;
+ struct mddev_dev *tmpdev;
struct mdinfo info;
struct mdinfo *content = NULL;
char *avail;
num_devs++;
tmpdev = tmpdev->next;
}
- devices = malloc(num_devs * sizeof(*devices));
if (!st && ident->st) st = ident->st;
*/
for (tmpdev = devlist;
tmpdev;
- tmpdev = tmpdev->next) {
+ tmpdev = tmpdev ? tmpdev->next : NULL) {
char *devname = tmpdev->devname;
int dfd;
struct stat stb;
struct supertype *tst = dup_super(st);
+ struct dev_policy *pol = NULL;
+ int found_container = 0;
if (tmpdev->used > 1) continue;
fprintf(stderr, Name ": %s is not a block device.\n",
devname);
tmpdev->used = 2;
- } else if (!tst && (tst = guess_super(dfd)) == NULL) {
- if (report_missmatch)
- fprintf(stderr, Name ": no recogniseable superblock on %s\n",
- devname);
- tmpdev->used = 2;
- } else if (tst->ss->load_super(tst,dfd, NULL)) {
- if (report_missmatch)
- fprintf( stderr, Name ": no RAID superblock on %s\n",
- devname);
- } else if (auto_assem && st == NULL &&
- !conf_test_metadata(tst->ss->name,
- tst->ss->match_home(tst, homehost) == 1)) {
- if (report_missmatch)
- fprintf(stderr, Name ": %s has metadata type %s for which "
- "auto-assembly is disabled\n",
- devname, tst->ss->name);
- tst->ss->free_super(tst);
- tmpdev->used = 2;
- } else {
- content = &info;
- memset(content, 0, sizeof(*content));
- tst->ss->getinfo_super(tst, content);
- }
- if (dfd >= 0) close(dfd);
-
- if (tst && tst->sb && tst->ss->container_content
- && tst->loaded_container) {
- /* tmpdev is a container. We need to be either
- * looking for a member, or auto-assembling
- */
+ } else if (must_be_container(dfd)) {
if (st) {
/* already found some components, this cannot
* be another one.
if (report_missmatch)
fprintf(stderr, Name ": %s is a container, but we are looking for components\n",
devname);
- goto loop;
+ tmpdev->used = 2;
+ } if (!tst && (tst = super_by_fd(dfd, NULL)) == NULL) {
+ if (report_missmatch)
+ fprintf(stderr, Name ": not a recognisable container: %s\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (!tst->ss->load_container
+ || tst->ss->load_container(tst, dfd, NULL)) {
+ if (report_missmatch)
+ fprintf(stderr, Name ": no correct container type: %s\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (auto_assem &&
+ !conf_test_metadata(tst->ss->name, (pol = devnum_policy(stb.st_rdev)),
+ tst->ss->match_home(tst, homehost) == 1)) {
+ if (report_missmatch)
+ fprintf(stderr, Name ": %s has metadata type %s for which "
+ "auto-assembly is disabled\n",
+ devname, tst->ss->name);
+ tmpdev->used = 2;
+ } else
+ found_container = 1;
+ } else {
+ if (!tst && (tst = guess_super(dfd)) == NULL) {
+ if (report_missmatch)
+ fprintf(stderr, Name ": no recogniseable superblock on %s\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (tst->ss->load_super(tst,dfd, NULL)) {
+ if (report_missmatch)
+ fprintf(stderr, Name ": no RAID superblock on %s\n",
+ devname);
+ tmpdev->used = 2;
+ } else if (tst->ss->compare_super == NULL) {
+ if (report_missmatch)
+ fprintf(stderr, Name ": Cannot assemble %s metadata on %s\n",
+ tst->ss->name, devname);
+ tmpdev->used = 2;
+ } else if (auto_assem && st == NULL &&
+ !conf_test_metadata(tst->ss->name, (pol = devnum_policy(stb.st_rdev)),
+ tst->ss->match_home(tst, homehost) == 1)) {
+ if (report_missmatch)
+ fprintf(stderr, Name ": %s has metadata type %s for which "
+ "auto-assembly is disabled\n",
+ devname, tst->ss->name);
+ tmpdev->used = 2;
}
+ }
+ if (dfd >= 0) close(dfd);
+ if (tmpdev->used == 2) {
+ if (auto_assem)
+ /* Ignore unrecognised devices during auto-assembly */
+ goto loop;
+ if (ident->uuid_set || ident->name[0] ||
+ ident->super_minor != UnSet)
+ /* Ignore unrecognised device if looking for
+ * specific array */
+ goto loop;
+
+
+ fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
+ devname);
+ if (st)
+ st->ss->free_super(st);
+ dev_policy_free(pol);
+ return 1;
+ }
+
+ if (found_container) {
+ /* tmpdev is a container. We need to be either
+ * looking for a member, or auto-assembling
+ */
if (ident->container) {
if (ident->container[0] == '/' &&
if (ident->container[0] != '/') {
/* we have a uuid */
int uuid[4];
+
+ content = &info;
+ memset(content, 0, sizeof(*content));
+ tst->ss->getinfo_super(tst, content, NULL);
+
if (!parse_uuid(ident->container, uuid) ||
!same_uuid(content->uuid, uuid, tst->ss->swapuuid)) {
if (report_missmatch)
if (verbose > 0)
fprintf(stderr, Name ": looking in container %s\n",
devname);
- next_member:
- if (tmpdev->content)
- content = tmpdev->content;
- else
- content = tst->ss->container_content(tst);
- if (!content)
- goto loop; /* empty container */
-
- tmpdev->content = content->next;
- if (tmpdev->content == NULL)
- tmpdev->used = 2;
-
- } else if (ident->container || ident->member) {
- /* No chance of this matching if we don't have
- * a container */
- if (report_missmatch)
- fprintf(stderr, Name "%s is not a container, and one is required.\n",
- devname);
- goto loop;
- }
-
- if (ident->uuid_set && (!update || strcmp(update, "uuid")!= 0) &&
- (!tst || !tst->sb ||
- same_uuid(content->uuid, ident->uuid, tst->ss->swapuuid)==0)) {
- if (report_missmatch)
- fprintf(stderr, Name ": %s has wrong uuid.\n",
- devname);
- goto loop;
- }
- if (ident->name[0] && (!update || strcmp(update, "name")!= 0) &&
- (!tst || !tst->sb ||
- name_matches(content->name, ident->name, homehost)==0)) {
- if (report_missmatch)
- fprintf(stderr, Name ": %s has wrong name.\n",
- devname);
- goto loop;
- }
- if (ident->super_minor != UnSet &&
- (!tst || !tst->sb ||
- ident->super_minor != content->array.md_minor)) {
- if (report_missmatch)
- fprintf(stderr, Name ": %s has wrong super-minor.\n",
- devname);
- goto loop;
- }
- if (ident->level != UnSet &&
- (!tst || !tst->sb ||
- ident->level != content->array.level)) {
- if (report_missmatch)
- fprintf(stderr, Name ": %s has wrong raid level.\n",
- devname);
- goto loop;
- }
- if (ident->raid_disks != UnSet &&
- (!tst || !tst->sb ||
- ident->raid_disks!= content->array.raid_disks)) {
- if (report_missmatch)
- fprintf(stderr, Name ": %s requires wrong number of drives.\n",
- devname);
- goto loop;
- }
- if (auto_assem) {
- if (tst == NULL || tst->sb == NULL)
- continue;
- }
- /* If we are this far, then we are nearly commited to this device.
- * If the super_block doesn't exist, or doesn't match others,
- * then we probably cannot continue
- * However if one of the arrays is for the homehost, and
- * the other isn't that can disambiguate.
- */
- if (!tst || !tst->sb) {
- fprintf(stderr, Name ": %s has no superblock - assembly aborted\n",
- devname);
- if (st)
- st->ss->free_super(st);
- return 1;
- }
+ for (content = tst->ss->container_content(tst, NULL);
+ content;
+ content = content->next) {
- if (tst && tst->sb && tst->ss->container_content
- && tst->loaded_container) {
- /* we have the one container we need, don't keep
- * looking. If the chosen member is active, skip.
- */
- if (is_member_busy(content->text_version)) {
- if (report_missmatch)
- fprintf(stderr, Name ": member %s in %s is already assembled\n",
- content->text_version,
- devname);
- skip:
- if (tmpdev->content)
- goto next_member;
- tst->ss->free_super(tst);
- tst = NULL;
- content = NULL;
- if (auto_assem)
- goto loop;
- return 1;
- }
- if (ident->member && ident->member[0]) {
- char *s = strchr(content->text_version+1, '/');
- if (s == NULL) {
- fprintf(stderr, Name ": badly formatted version: %s\n",
- content->text_version);
- goto skip;
- }
- if (strcmp(ident->member, s+1) != 0) {
+ if (!ident_matches(ident, content, tst,
+ homehost, update,
+ report_missmatch ? devname : NULL))
+ /* message already printed */;
+ else if (is_member_busy(content->text_version)) {
if (report_missmatch)
- fprintf(stderr,
- Name ": skipping wrong member %s\n",
- content->text_version);
- goto skip;
- }
+ fprintf(stderr, Name ": member %s in %s is already assembled\n",
+ content->text_version,
+ devname);
+ } else
+ break;
+ }
+ if (!content) {
+ tmpdev->used = 2;
+ goto loop; /* empty container */
}
+
st = tst; tst = NULL;
if (!auto_assem && inargv && tmpdev->next != NULL) {
fprintf(stderr, Name ": %s is a container, but is not "
"only device given: confused and aborting\n",
devname);
st->ss->free_super(st);
+ dev_policy_free(pol);
return 1;
}
if (verbose > 0)
fprintf(stderr, Name ": found match on member %s in %s\n",
content->text_version, devname);
- break;
- }
- if (st == NULL)
- st = dup_super(tst);
- if (st->minor_version == -1)
- st->minor_version = tst->minor_version;
- if (st->ss != tst->ss ||
- st->minor_version != tst->minor_version ||
- st->ss->compare_super(st, tst) != 0) {
- /* Some mismatch. If exactly one array matches this host,
- * we can resolve on that one.
- * Or, if we are auto assembling, we just ignore the second
- * for now.
- */
- if (auto_assem)
+
+ /* make sure we finished the loop */
+ tmpdev = NULL;
+ goto loop;
+ } else {
+
+ content = &info;
+ memset(content, 0, sizeof(*content));
+ tst->ss->getinfo_super(tst, content, NULL);
+
+ if (!ident_matches(ident, content, tst,
+ homehost, update,
+ report_missmatch ? devname : NULL))
goto loop;
- if (homehost) {
- int first = st->ss->match_home(st, homehost);
- int last = tst->ss->match_home(tst, homehost);
- if (first != last &&
- (first == 1 || last == 1)) {
- /* We can do something */
- if (first) {/* just ignore this one */
- if (report_missmatch)
- fprintf(stderr, Name ": %s misses out due to wrong homehost\n",
- devname);
- goto loop;
- } else { /* reject all those sofar */
- mddev_dev_t td;
- if (report_missmatch)
- fprintf(stderr, Name ": %s overrides previous devices due to good homehost\n",
- devname);
- for (td=devlist; td != tmpdev; td=td->next)
- if (td->used == 1)
- td->used = 0;
- tmpdev->used = 1;
- goto loop;
+
+ if (st == NULL)
+ st = dup_super(tst);
+ if (st->minor_version == -1)
+ st->minor_version = tst->minor_version;
+ if (st->ss != tst->ss ||
+ st->minor_version != tst->minor_version ||
+ st->ss->compare_super(st, tst) != 0) {
+ /* Some mismatch. If exactly one array matches this host,
+ * we can resolve on that one.
+ * Or, if we are auto assembling, we just ignore the second
+ * for now.
+ */
+ if (auto_assem)
+ goto loop;
+ if (homehost) {
+ int first = st->ss->match_home(st, homehost);
+ int last = tst->ss->match_home(tst, homehost);
+ if (first != last &&
+ (first == 1 || last == 1)) {
+ /* We can do something */
+ if (first) {/* just ignore this one */
+ if (report_missmatch)
+ fprintf(stderr, Name ": %s misses out due to wrong homehost\n",
+ devname);
+ goto loop;
+ } else { /* reject all those sofar */
+ struct mddev_dev *td;
+ if (report_missmatch)
+ fprintf(stderr, Name ": %s overrides previous devices due to good homehost\n",
+ devname);
+ for (td=devlist; td != tmpdev; td=td->next)
+ if (td->used == 1)
+ td->used = 0;
+ tmpdev->used = 1;
+ goto loop;
+ }
}
}
+ fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
+ devname);
+ tst->ss->free_super(tst);
+ st->ss->free_super(st);
+ dev_policy_free(pol);
+ return 1;
}
- fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n",
- devname);
- tst->ss->free_super(tst);
- st->ss->free_super(st);
- return 1;
+ tmpdev->used = 1;
}
-
- tmpdev->used = 1;
-
loop:
- if (tmpdev->content)
- goto next_member;
+ dev_policy_free(pol);
+ pol = NULL;
if (tst)
tst->ss->free_super(tst);
}
/* Now need to open the array device. Use create_mddev */
if (content == &info)
- st->ss->getinfo_super(st, content);
+ st->ss->getinfo_super(st, content, NULL);
trustworthy = FOREIGN;
name = content->name;
chosen_name);
if (mdfd < 0) {
st->ss->free_super(st);
- free(devices);
if (auto_assem)
goto try_again;
return 1;
close(mdfd);
mdfd = -3;
st->ss->free_super(st);
- free(devices);
if (auto_assem)
goto try_again;
return 1;
/* Ok, no bad inconsistancy, we can try updating etc */
bitmap_done = 0;
content->update_private = NULL;
+ devices = malloc(num_devs * sizeof(*devices));
+ devmap = calloc(num_devs * content->array.raid_disks, 1);
for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) if (tmpdev->used == 1) {
char *devname = tmpdev->devname;
struct stat stb;
/* prepare useful information in info structures */
struct stat stb2;
struct supertype *tst;
+ int err;
fstat(mdfd, &stb2);
if (strcmp(update, "uuid")==0 &&
}
dfd = dev_open(devname, O_RDWR|O_EXCL);
- remove_partitions(dfd);
-
tst = dup_super(st);
if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) {
fprintf(stderr, Name ": cannot re-read metadata from %s - aborting\n",
if (dfd >= 0)
close(dfd);
close(mdfd);
+ free(devices);
+ free(devmap);
return 1;
}
- tst->ss->getinfo_super(tst, content);
+ tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks);
memcpy(content->uuid, ident->uuid, 16);
strcpy(content->name, ident->name);
content->array.md_minor = minor(stb2.st_rdev);
- tst->ss->update_super(tst, content, update,
- devname, verbose,
- ident->uuid_set, homehost);
+ if (strcmp(update, "byteorder") == 0)
+ err = 0;
+ else
+ err = tst->ss->update_super(tst, content, update,
+ devname, verbose,
+ ident->uuid_set,
+ homehost);
+ if (err < 0) {
+ fprintf(stderr,
+ Name ": --update=%s not understood"
+ " for %s metadata\n",
+ update, tst->ss->name);
+ tst->ss->free_super(tst);
+ free(tst);
+ close(mdfd);
+ close(dfd);
+ free(devices);
+ free(devmap);
+ return 1;
+ }
if (strcmp(update, "uuid")==0 &&
!ident->uuid_set) {
ident->uuid_set = 1;
memcpy(ident->uuid, content->uuid, 16);
}
- if (dfd < 0)
- fprintf(stderr, Name ": Cannot open %s for superblock update\n",
- devname);
- else if (tst->ss->store_super(tst, dfd))
+ if (tst->ss->store_super(tst, dfd))
fprintf(stderr, Name ": Could not re-write superblock on %s.\n",
devname);
- if (dfd >= 0)
- close(dfd);
+ close(dfd);
if (strcmp(update, "uuid")==0 &&
ident->bitmap_fd >= 0 && !bitmap_done) {
int dfd;
dfd = dev_open(devname, O_RDWR|O_EXCL);
- remove_partitions(dfd);
-
if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) {
fprintf(stderr, Name ": cannot re-read metadata from %s - aborting\n",
devname);
if (dfd >= 0)
close(dfd);
close(mdfd);
+ free(devices);
+ free(devmap);
return 1;
}
- tst->ss->getinfo_super(tst, content);
+ tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks);
tst->ss->free_super(tst);
close(dfd);
}
"the\n DEVICE list in mdadm.conf"
);
close(mdfd);
+ free(devices);
+ free(devmap);
return 1;
}
if (best[i] == -1
if (st)
st->ss->free_super(st);
close(mdfd);
+ free(devices);
+ free(devmap);
return 1;
}
if (update && strcmp(update, "byteorder")==0)
st->minor_version = 90;
- st->ss->getinfo_super(st, content);
+ st->ss->getinfo_super(st, content, NULL);
clean = content->array.state & 1;
/* now we have some devices that might be suitable.
sparecnt++;
continue;
}
+ /* If this devices thinks that 'most_recent' has failed, then
+ * we must reject this device.
+ */
+ if (j != most_recent &&
+ content->array.raid_disks > 0 &&
+ devices[most_recent].i.disk.raid_disk >= 0 &&
+ devmap[j * content->array.raid_disks + devices[most_recent].i.disk.raid_disk] == 0) {
+ if (verbose > -1)
+ fprintf(stderr, Name ": ignoring %s as it reports %s as failed\n",
+ devices[j].devname, devices[most_recent].devname);
+ best[i] = -1;
+ continue;
+ }
if (devices[j].i.events+event_margin >=
devices[most_recent].i.events) {
devices[j].uptodate = 1;
if (i < content->array.raid_disks) {
- if (devices[j].i.recovery_start == MaxSector) {
+ if (devices[j].i.recovery_start == MaxSector ||
+ (content->reshape_active &&
+ j >= content->array.raid_disks - content->delta_disks)) {
okcnt++;
avail[i]=1;
} else
sparecnt++;
}
}
+ free(devmap);
while (force && !enough(content->array.level, content->array.raid_disks,
content->array.layout, 1,
avail, okcnt)) {
fprintf(stderr, Name ": Cannot open %s: %s\n",
devices[j].devname, strerror(errno));
close(mdfd);
+ free(devices);
return 1;
}
if (st->ss->load_super(st,fd, NULL)) {
fprintf(stderr, Name ": RAID superblock has disappeared from %s\n",
devices[j].devname);
close(mdfd);
+ free(devices);
return 1;
}
close(fd);
if (st->sb == NULL) {
fprintf(stderr, Name ": No suitable drives found for %s\n", mddev);
close(mdfd);
+ free(devices);
return 1;
}
- st->ss->getinfo_super(st, content);
+ st->ss->getinfo_super(st, content, NULL);
#ifndef MDASSEMBLE
sysfs_init(content, mdfd, 0);
#endif
fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
devices[chosen_drive].devname);
close(mdfd);
+ free(devices);
return 1;
}
if (st->ss->store_super(st, fd)) {
fprintf(stderr, Name ": Could not re-write superblock on %s\n",
devices[chosen_drive].devname);
close(mdfd);
+ free(devices);
return 1;
}
close(fd);
} else
fdlist[i] = -1;
}
- if (!err)
- err = Grow_restart(st, content, fdlist, bestcnt, backup_file, verbose > 0);
+ if (!err) {
+ err = Grow_restart(st, content, fdlist, bestcnt,
+ backup_file, verbose > 0);
+ if (err && invalid_backup) {
+ if (verbose > 0)
+ fprintf(stderr, Name ": continuing"
+ " without restoring backup\n");
+ err = 0;
+ }
+ }
while (i>0) {
i--;
if (fdlist[i]>=0) close(fdlist[i]);
if (backup_file == NULL)
fprintf(stderr," Possibly you needed to specify the --backup-file\n");
close(mdfd);
+ free(devices);
return err;
}
}
mddev, strerror(errno));
ioctl(mdfd, STOP_ARRAY, NULL);
close(mdfd);
+ free(devices);
return 1;
}
if (ident->bitmap_fd >= 0) {
fprintf(stderr, Name ": SET_BITMAP_FILE failed.\n");
ioctl(mdfd, STOP_ARRAY, NULL);
close(mdfd);
+ free(devices);
return 1;
}
} else if (ident->bitmap_file) {
ident->bitmap_file);
ioctl(mdfd, STOP_ARRAY, NULL);
close(mdfd);
+ free(devices);
return 1;
}
if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
close(bmfd);
ioctl(mdfd, STOP_ARRAY, NULL);
close(mdfd);
+ free(devices);
return 1;
}
close(bmfd);
j = chosen_drive;
if (j >= 0 /* && devices[j].uptodate */) {
+ int dfd = dev_open(devices[j].devname,
+ O_RDWR|O_EXCL);
+ if (dfd >= 0) {
+ remove_partitions(dfd);
+ close(dfd);
+ }
rv = add_disk(mdfd, st, content, &devices[j].i);
if (rv) {
sysfs_uevent(content, "change");
wait_for(chosen_name, mdfd);
close(mdfd);
+ free(devices);
return 0;
}
(4 * content->array.chunk_size / 4096) + 1);
}
}
+ if (okcnt < (unsigned)content->array.raid_disks) {
+ /* If any devices did not get added
+ * because the kernel rejected them based
+ * on event count, try adding them
+ * again providing the action policy is
+ * 're-add' or greater. The bitmap
+ * might allow them to be included, or
+ * they will become spares.
+ */
+ for (i = 0; i <= bestcnt; i++) {
+ int j = best[i];
+ if (j >= 0 && !devices[j].uptodate) {
+ if (!disk_action_allows(&devices[j].i, st->ss->name, act_re_add))
+ continue;
+ rv = add_disk(mdfd, st, content,
+ &devices[j].i);
+ if (rv == 0 && verbose >= 0)
+ fprintf(stderr,
+ Name ": %s has been re-added.\n",
+ devices[j].devname);
+ }
+ }
+ }
wait_for(mddev, mdfd);
close(mdfd);
if (auto_assem) {
usecs <<= 1;
}
}
+ free(devices);
return 0;
}
fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n",
if (auto_assem)
ioctl(mdfd, STOP_ARRAY, NULL);
close(mdfd);
+ free(devices);
return 1;
}
if (runstop == -1) {
fprintf(stderr, " (out of %d)", content->array.raid_disks);
fprintf(stderr, ", but not started.\n");
close(mdfd);
+ free(devices);
return 0;
}
if (verbose >= -1) {
if (auto_assem)
ioctl(mdfd, STOP_ARRAY, NULL);
close(mdfd);
+ free(devices);
return 1;
} else {
/* The "chosen_drive" is a good choice, and if necessary, the superblock has
}
close(mdfd);
+ free(devices);
return 0;
}
#define STOP_MD _IO (MD_MAJOR, 3)
int Build(char *mddev, int chunk, int level, int layout,
- int raiddisks, mddev_dev_t devlist, int assume_clean,
+ int raiddisks, struct mddev_dev *devlist, int assume_clean,
char *bitmap_file, int bitmap_chunk, int write_behind,
int delay, int verbose, int autof, unsigned long long size)
{
int vers;
struct stat stb;
int subdevs = 0, missing_disks = 0;
- mddev_dev_t dv;
+ struct mddev_dev *dv;
int bitmap_fd;
unsigned long long bitmapsize;
int mdfd;
{
int layout = UnSet;
- if (st && st->ss->default_layout)
- layout = st->ss->default_layout(level);
+ if (st && st->ss->default_geometry)
+ st->ss->default_geometry(st, &level, &layout, NULL);
if (layout == UnSet)
switch(level) {
int Create(struct supertype *st, char *mddev,
- int chunk, int level, int layout, unsigned long long size, int raiddisks, int sparedisks,
+ int chunk, int level, int layout, unsigned long long size,
+ int raiddisks, int sparedisks,
char *name, char *homehost, int *uuid,
- int subdevs, mddev_dev_t devlist,
+ int subdevs, struct mddev_dev *devlist,
int runstop, int verbose, int force, int assume_clean,
- char *bitmap_file, int bitmap_chunk, int write_behind, int delay, int autof)
+ char *bitmap_file, int bitmap_chunk, int write_behind,
+ int delay, int autof)
{
/*
* Create a new raid array.
char *mindisc = NULL;
char *maxdisc = NULL;
int dnum;
- mddev_dev_t dv;
+ struct mddev_dev *dv;
int fail=0, warn=0;
struct stat stb;
int first_missing = subdevs * 2;
int major_num = BITMAP_MAJOR_HI;
memset(&info, 0, sizeof(info));
-
- if (level == UnSet) {
- /* "ddf" and "imsm" metadata only supports one level - should possibly
- * push this into metadata handler??
- */
- if (st && (st->ss == &super_ddf || st->ss == &super_imsm))
- level = LEVEL_CONTAINER;
- }
-
+ if (level == UnSet && st && st->ss->default_geometry)
+ st->ss->default_geometry(st, &level, NULL, NULL);
if (level == UnSet) {
fprintf(stderr,
Name ": a RAID level is needed to create an array.\n");
inf.raid_disks == 0) {
/* yep, looks like a container */
if (st) {
- rv = st->ss->load_super(st, fd,
- devlist->devname);
+ rv = st->ss->load_container(st, fd,
+ devlist->devname);
if (rv == 0)
have_container = 1;
} else {
- st = guess_super(fd);
+ st = super_by_fd(fd, NULL);
if (st && !(rv = st->ss->
- load_super(st, fd,
- devlist->devname)))
+ load_container(st, fd,
+ devlist->devname)))
have_container = 1;
else
st = NULL;
case 6:
case 0:
if (chunk == 0) {
- if (st && st->ss->default_chunk)
- chunk = st->ss->default_chunk(st);
-
+ if (st && st->ss->default_geometry)
+ st->ss->default_geometry(st, NULL, NULL, &chunk);
chunk = chunk ? : 512;
-
if (verbose > 0)
fprintf(stderr, Name ": chunk size defaults to %dK\n", chunk);
}
close(fd);
}
}
+ if (raiddisks + sparedisks > st->max_devs) {
+ fprintf(stderr, Name ": Too many devices:"
+ " %s metadata only supports %d\n",
+ st->ss->name, st->max_devs);
+ return 1;
+ }
if (have_container)
info.array.working_disks = raiddisks;
if (fail) {
total_slots = info.array.nr_disks;
sysfs_init(&info, mdfd, 0);
- st->ss->getinfo_super(st, &info);
+ st->ss->getinfo_super(st, &info, NULL);
if (did_default && verbose >= 0) {
if (is_subarray(info.text_version)) {
sysfs_init(&info, mdfd, 0);
- if (st->ss->external && st->subarray[0]) {
+ if (st->ss->external && st->container_dev != NoMdDev) {
/* member */
/* When creating a member, we need to be careful
infos = malloc(sizeof(*infos) * total_slots);
for (pass=1; pass <=2 ; pass++) {
- mddev_dev_t moved_disk = NULL; /* the disk that was moved out of the insert point */
+ struct mddev_dev *moved_disk = NULL; /* the disk that was moved out of the insert point */
for (dnum=0, dv = devlist ; dv ;
dv=(dv->next)?(dv->next):moved_disk, dnum++) {
if (have_container)
fd = -1;
else {
- if (st->ss->external && st->subarray[0])
+ if (st->ss->external &&
+ st->container_dev != NoMdDev)
fd = open(dv->devname, O_RDWR);
else
fd = open(dv->devname, O_RDWR|O_EXCL);
ioctl(mdfd, STOP_ARRAY, NULL);
goto abort;
}
- st->ss->getinfo_super(st, inf);
+ st->ss->getinfo_super(st, inf, NULL);
safe_mode_delay = inf->safe_mode_delay;
if (have_container && verbose > 0)
* again returns container info.
*/
map_lock(&map);
- st->ss->getinfo_super(st, &info_new);
+ st->ss->getinfo_super(st, &info_new, NULL);
if (st->ss->external && level != LEVEL_CONTAINER &&
!same_uuid(info_new.uuid, info.uuid, 0)) {
map_update(&map, fd2devnum(mdfd),
if (me) {
char *path = strdup(me->path);
- st->ss->getinfo_super(st, &info_new);
+ st->ss->getinfo_super(st, &info_new, NULL);
map_update(&map, st->container_dev,
info_new.text_version,
info_new.uuid, path);
wait_for(chosen_name, mdfd);
} else if (runstop == 1 || subdevs >= raiddisks) {
if (st->ss->external) {
+ int err;
switch(level) {
case LEVEL_LINEAR:
case LEVEL_MULTIPATH:
case 0:
- sysfs_set_str(&info, NULL, "array_state",
- "active");
+ err = sysfs_set_str(&info, NULL, "array_state",
+ "active");
need_mdmon = 0;
break;
default:
- sysfs_set_str(&info, NULL, "array_state",
- "readonly");
+ err = sysfs_set_str(&info, NULL, "array_state",
+ "readonly");
break;
}
sysfs_set_safemode(&info, safe_mode_delay);
+ if (err) {
+ fprintf(stderr, Name ": failed to"
+ " activate array.\n");
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ goto abort;
+ }
} else {
/* param is not actually used */
mdu_param_t param;
}
if (verbose >= 0)
fprintf(stderr, Name ": array %s started.\n", mddev);
- if (st->ss->external && st->subarray[0]) {
+ if (st->ss->external && st->container_dev != NoMdDev) {
if (need_mdmon)
start_mdmon(st->container_dev);
int is_rebuilding = 0;
int failed = 0;
struct supertype *st;
+ char *subarray = NULL;
int max_disks = MD_SB_DISKS; /* just a default */
- struct mdinfo info;
+ struct mdinfo *info = NULL;
struct mdinfo *sra;
char *member = NULL;
char *container = NULL;
return rv;
}
sra = sysfs_read(fd, 0, GET_VERSION);
- st = super_by_fd(fd);
+ st = super_by_fd(fd, &subarray);
if (fstat(fd, &stb) != 0 && !S_ISBLK(stb.st_mode))
stb.st_rdev = 0;
if (st)
max_disks = st->max_devs;
- if (sra && is_subarray(sra->text_version) &&
- strchr(sra->text_version+1, '/')) {
+ if (subarray) {
/* This is a subarray of some container.
* We want the name of the container, and the member
*/
- char *s = strchr(sra->text_version+1, '/');
- int dn;
- *s++ = '\0';
- member = s;
- dn = devname2devnum(sra->text_version+1);
+ int dn = st->container_dev;
+
+ member = subarray;
container = map_dev(dev2major(dn), dev2minor(dn), 1);
}
close(fd2);
if (err)
continue;
- st->ss->getinfo_super(st, &info);
+ if (info)
+ free(info);
+ if (subarray)
+ info = st->ss->container_content(st, subarray);
+ else {
+ info = malloc(sizeof(*info));
+ st->ss->getinfo_super(st, info, NULL);
+ }
+ if (!info)
+ continue;
if (array.raid_disks != 0 && /* container */
- (info.array.ctime != array.ctime ||
- info.array.level != array.level)) {
+ (info->array.ctime != array.ctime ||
+ info->array.level != array.level)) {
st->ss->free_super(st);
continue;
}
* ->load_super.
*/
if (memcmp(uuid_match_any,
- info.uuid,
+ info->uuid,
sizeof(uuid_match_any)) == 0) {
st->ss->free_super(st);
continue;
array.major_version, array.minor_version);
}
- if (st && st->sb) {
+ if (st && st->sb && info) {
char nbuf[64];
struct map_ent *mp, *map = NULL;
- fname_from_uuid(st, &info, nbuf, ':');
+ fname_from_uuid(st, info, nbuf, ':');
printf("MD_UUID=%s\n", nbuf+5);
- mp = map_by_uuid(&map, info.uuid);
+ mp = map_by_uuid(&map, info->uuid);
if (mp && mp->path &&
strncmp(mp->path, "/dev/md/", 8) == 0)
printf("MD_DEVNAME=%s\n", mp->path+8);
if (atime)
printf(" Update Time : %.24s\n", ctime(&atime));
if (array.raid_disks) {
+ static char *sync_action[] = {", recovering",", resyncing",", reshaping",", checking"};
char *st;
if (avail_disks == array.raid_disks)
st = "";
printf(" State : %s%s%s%s\n",
(array.state&(1<<MD_SB_CLEAN))?"clean":"active",
st,
- (!e || e->percent < 0) ? "" :
- (e->resync) ? ", resyncing": ", recovering",
+ (!e || e->percent < 0) ? "" : sync_action[e->resync],
larray_size ? "": ", Not Started");
}
if (array.raid_disks)
if (e && e->percent >= 0) {
printf(" Re%s Status : %d%% complete\n",
- (st && st->sb && info.reshape_active)?
+ (st && st->sb && info->reshape_active)?
"shape":"build",
e->percent);
is_rebuilding = 1;
}
free_mdstat(ms);
- if (st->sb && info.reshape_active) {
+ if (st->sb && info->reshape_active) {
#if 0
This is pretty boring
- printf(" Reshape pos'n : %llu%s\n", (unsigned long long) info.reshape_progress<<9,
- human_size((unsigned long long)info.reshape_progress<<9));
+ printf(" Reshape pos'n : %llu%s\n", (unsigned long long) info->reshape_progress<<9,
+ human_size((unsigned long long)info->reshape_progress<<9));
#endif
- if (info.delta_disks > 0)
+ if (info->delta_disks > 0)
printf(" Delta Devices : %d, (%d->%d)\n",
- info.delta_disks, array.raid_disks - info.delta_disks, array.raid_disks);
- if (info.delta_disks < 0)
+ info->delta_disks, array.raid_disks - info->delta_disks, array.raid_disks);
+ if (info->delta_disks < 0)
printf(" Delta Devices : %d, (%d->%d)\n",
- info.delta_disks, array.raid_disks, array.raid_disks + info.delta_disks);
- if (info.new_level != array.level) {
- char *c = map_num(pers, info.new_level);
+ info->delta_disks, array.raid_disks, array.raid_disks + info->delta_disks);
+ if (info->new_level != array.level) {
+ char *c = map_num(pers, info->new_level);
printf(" New Level : %s\n", c?c:"-unknown-");
}
- if (info.new_level != array.level ||
- info.new_layout != array.layout) {
- if (info.new_level == 5) {
- char *c = map_num(r5layout, info.new_layout);
+ if (info->new_level != array.level ||
+ info->new_layout != array.layout) {
+ if (info->new_level == 5) {
+ char *c = map_num(r5layout, info->new_layout);
printf(" New Layout : %s\n",
c?c:"-unknown-");
}
- if (info.new_level == 6) {
- char *c = map_num(r6layout, info.new_layout);
+ if (info->new_level == 6) {
+ char *c = map_num(r6layout, info->new_layout);
printf(" New Layout : %s\n",
c?c:"-unknown-");
}
- if (info.new_level == 10) {
+ if (info->new_level == 10) {
printf(" New Layout : near=%d, %s=%d\n",
- info.new_layout&255,
- (info.new_layout&0x10000)?"offset":"far",
- (info.new_layout>>8)&255);
+ info->new_layout&255,
+ (info->new_layout&0x10000)?"offset":"far",
+ (info->new_layout>>8)&255);
}
}
- if (info.new_chunk != array.chunk_size)
- printf(" New Chunksize : %dK\n", info.new_chunk/1024);
+ if (info->new_chunk != array.chunk_size)
+ printf(" New Chunksize : %dK\n", info->new_chunk/1024);
printf("\n");
} else if (e && e->percent >= 0)
printf("\n");
else
printf(" Number Major Minor RaidDevice\n");
}
+ free(info);
for (d= 0; d < max_disks; d++) {
char *dv;
free(disks);
out:
close(fd);
+ free(subarray);
return rv;
}
#endif
#include "md_u.h"
#include "md_p.h"
-int Examine(mddev_dev_t devlist, int brief, int export, int scan,
+int Examine(struct mddev_dev *devlist, int brief, int export, int scan,
int SparcAdjust, struct supertype *forcest,
char *homehost)
{
for (; devlist ; devlist=devlist->next) {
struct supertype *st;
+ int have_container = 0;
fd = dev_open(devlist->devname, O_RDONLY);
if (fd < 0) {
err = 1;
}
else {
+ int container = 0;
if (forcest)
st = dup_super(forcest);
- else
+ else if (must_be_container(fd)) {
+ /* might be a container */
+ st = super_by_fd(fd, NULL);
+ container = 1;
+ } else
st = guess_super(fd);
- if (st)
- err = st->ss->load_super(st, fd,
- (brief||scan) ? NULL
- :devlist->devname);
- else {
+ if (st) {
+ err = 1;
+ if (!container)
+ err = st->ss->load_super(st, fd,
+ (brief||scan) ? NULL
+ :devlist->devname);
+ if (err && st->ss->load_container) {
+ err = st->ss->load_container(st, fd,
+ (brief||scan) ? NULL
+ :devlist->devname);
+ if (!err)
+ have_container = 1;
+ }
+ } else {
if (!brief) {
fprintf(stderr, Name ": No md superblock detected on %s.\n", devlist->devname);
rv = 1;
devlist->devname, 0, 0, NULL);
/* Ok, its good enough to try, though the checksum could be wrong */
- if (brief) {
+ if (brief && st->ss->brief_examine_super == NULL) {
+ if (!scan)
+ fprintf(stderr, Name ": No brief listing for %s on %s\n",
+ st->ss->name, devlist->devname);
+ } else if (brief) {
struct array *ap;
char *d;
for (ap=arrays; ap; ap=ap->next) {
ap->spares = 0;
ap->st = st;
arrays = ap;
- st->ss->getinfo_super(st, &ap->info);
+ st->ss->getinfo_super(st, &ap->info, NULL);
} else
- st->ss->getinfo_super(st, &ap->info);
- if (!st->loaded_container &&
+ st->ss->getinfo_super(st, &ap->info, NULL);
+ if (!have_container &&
!(ap->info.disk.state & (1<<MD_DISK_SYNC)))
ap->spares++;
d = dl_strdup(devlist->devname);
int nfd, fd2;
int d, nd;
struct supertype *st = NULL;
-
+ char *subarray = NULL;
if (ioctl(fd, GET_ARRAY_INFO, &info.array) < 0) {
fprintf(stderr, Name ": cannot get array info for %s\n", devname);
return 1;
}
- st = super_by_fd(fd);
+ if (info.array.level != -1) {
+ fprintf(stderr, Name ": can only add devices to linear arrays\n");
+ return 1;
+ }
+
+ st = super_by_fd(fd, &subarray);
if (!st) {
fprintf(stderr, Name ": cannot handle arrays with superblock version %d\n", info.array.major_version);
return 1;
}
- if (info.array.level != -1) {
- fprintf(stderr, Name ": can only add devices to linear arrays\n");
- return 1;
+ if (subarray) {
+ fprintf(stderr, Name ": Cannot grow linear sub-arrays yet\n");
+ free(subarray);
+ free(st);
}
nfd = open(newdev, O_RDWR|O_EXCL|O_DIRECT);
if (nfd < 0) {
fprintf(stderr, Name ": cannot open %s\n", newdev);
+ free(st);
return 1;
}
fstat(nfd, &stb);
if ((stb.st_mode & S_IFMT) != S_IFBLK) {
fprintf(stderr, Name ": %s is not a block device!\n", newdev);
close(nfd);
+ free(st);
return 1;
}
/* now check out all the devices and make sure we can read the superblock */
mdu_disk_info_t disk;
char *dv;
+ st->ss->free_super(st);
+
disk.number = d;
if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
fprintf(stderr, Name ": cannot get device detail for device %d\n",
d);
+ close(nfd);
+ free(st);
return 1;
}
dv = map_dev(disk.major, disk.minor, 1);
if (!dv) {
fprintf(stderr, Name ": cannot find device file for device %d\n",
d);
+ close(nfd);
+ free(st);
return 1;
}
fd2 = dev_open(dv, O_RDWR);
if (!fd2) {
fprintf(stderr, Name ": cannot open device file %s\n", dv);
+ close(nfd);
+ free(st);
return 1;
}
- st->ss->free_super(st);
if (st->ss->load_super(st, fd2, NULL)) {
fprintf(stderr, Name ": cannot find super block on %s\n", dv);
+ close(nfd);
close(fd2);
+ free(st);
return 1;
}
close(fd2);
mdu_bitmap_file_t bmf;
mdu_array_info_t array;
struct supertype *st;
+ char *subarray = NULL;
int major = BITMAP_MAJOR_HI;
int vers = md_get_version(fd);
unsigned long long bitmapsize, array_size;
if (vers < 9003) {
major = BITMAP_MAJOR_HOSTENDIAN;
-#ifdef __BIG_ENDIAN
- fprintf(stderr, Name ": Warning - bitmaps created on this kernel are not portable\n"
- " between different architectured. Consider upgrading the Linux kernel.\n");
-#endif
+ fprintf(stderr, Name ": Warning - bitmaps created on this kernel"
+ " are not portable\n"
+ " between different architectures. Consider upgrading"
+ " the Linux kernel.\n");
}
if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
devname);
return 1;
}
+
+ if (strcmp(file, "none") == 0) {
+ fprintf(stderr, Name ": no bitmap found on %s\n", devname);
+ return 1;
+ }
if (array.level <= 0) {
fprintf(stderr, Name ": Bitmaps not meaningful with level %s\n",
map_num(pers, array.level)?:"of this array");
bitmapsize = bitmapsize * array.raid_disks / ncopies;
}
- st = super_by_fd(fd);
+ st = super_by_fd(fd, &subarray);
if (!st) {
fprintf(stderr, Name ": Cannot understand version %d.%d\n",
array.major_version, array.minor_version);
return 1;
}
- if (strcmp(file, "none") == 0) {
- fprintf(stderr, Name ": no bitmap found on %s\n", devname);
+ if (subarray) {
+ fprintf(stderr, Name ": Cannot add bitmaps to sub-arrays yet\n");
+ free(subarray);
+ free(st);
return 1;
- } else if (strcmp(file, "internal") == 0) {
+ }
+ if (strcmp(file, "internal") == 0) {
int d;
if (st->ss->add_internal_bitmap == NULL) {
fprintf(stderr, Name ": Internal bitmaps not supported "
}
array.state |= (1<<MD_SB_BITMAP_PRESENT);
if (ioctl(fd, SET_ARRAY_INFO, &array)!= 0) {
+ if (errno == EBUSY)
+ fprintf(stderr, Name
+ ": Cannot add bitmap while array is"
+ " resyncing or reshaping etc.\n");
fprintf(stderr, Name ": failed to set internal bitmap.\n");
return 1;
}
return 1;
}
if (ioctl(fd, SET_BITMAP_FILE, bitmap_fd) < 0) {
+ int err = errno;
+ if (errno == EBUSY)
+ fprintf(stderr, Name
+ ": Cannot add bitmap while array is"
+ " resyncing or reshaping etc.\n");
fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
- devname, strerror(errno));
+ devname, strerror(err));
return 1;
}
}
__u8 pad[512-68-32];
} __attribute__((aligned(512))) bsb, bsb2;
-__u32 bsb_csum(char *buf, int len)
+static __u32 bsb_csum(char *buf, int len)
{
int i;
int csum = 0;
int disks, int chunk, int level, int layout, int data,
int dests, int *destfd, unsigned long long *destoffsets);
-int freeze_array(struct mdinfo *sra)
+static int freeze_container(struct supertype *st)
+{
+ int container_dev = (st->container_dev != NoMdDev
+ ? st->container_dev : st->devnum);
+ char *container = devnum2devname(container_dev);
+
+ if (!container) {
+ fprintf(stderr, Name
+ ": could not determine container name, freeze aborted\n");
+ return -2;
+ }
+
+ if (block_monitor(container, 1)) {
+ fprintf(stderr, Name ": failed to freeze container\n");
+ return -2;
+ }
+
+ return 1;
+}
+
+static void unfreeze_container(struct supertype *st)
+{
+ int container_dev = (st->container_dev != NoMdDev
+ ? st->container_dev : st->devnum);
+ char *container = devnum2devname(container_dev);
+
+ if (!container) {
+ fprintf(stderr, Name
+ ": could not determine container name, unfreeze aborted\n");
+ return;
+ }
+
+ unblock_monitor(container, 1);
+}
+
+static int freeze(struct supertype *st)
{
- /* Try to freeze resync on this array.
+ /* Try to freeze resync/rebuild on this array/container.
* Return -1 if the array is busy,
+ * return -2 container cannot be frozen,
* return 0 if this kernel doesn't support 'frozen'
* return 1 if it worked.
*/
- char buf[20];
- if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0)
- return 0;
- if (strcmp(buf, "idle\n") != 0 &&
- strcmp(buf, "frozen\n") != 0)
- return -1;
- if (sysfs_set_str(sra, NULL, "sync_action", "frozen") < 0)
- return 0;
- return 1;
+ if (st->ss->external)
+ return freeze_container(st);
+ else {
+ struct mdinfo *sra = sysfs_read(-1, st->devnum, GET_VERSION);
+ int err;
+
+ if (!sra)
+ return -1;
+ err = sysfs_freeze_array(sra);
+ sysfs_free(sra);
+ return err;
+ }
}
-void unfreeze_array(struct mdinfo *sra, int frozen)
+static void unfreeze(struct supertype *st, int frozen)
{
/* If 'frozen' is 1, unfreeze the array */
- if (frozen > 0)
- sysfs_set_str(sra, NULL, "sync_action", "idle");
+ if (frozen <= 0)
+ return;
+
+ if (st->ss->external)
+ return unfreeze_container(st);
+ else {
+ struct mdinfo *sra = sysfs_read(-1, st->devnum, GET_VERSION);
+
+ if (sra)
+ sysfs_set_str(sra, NULL, "sync_action", "idle");
+ else
+ fprintf(stderr, Name ": failed to unfreeze array\n");
+ sysfs_free(sra);
+ }
}
-void wait_reshape(struct mdinfo *sra)
+static void wait_reshape(struct mdinfo *sra)
{
int fd = sysfs_get_fd(sra, NULL, "sync_action");
char action[20];
- do {
+ if (fd < 0)
+ return;
+
+ while (sysfs_fd_get_str(fd, action, 20) > 0 &&
+ strncmp(action, "reshape", 7) == 0) {
fd_set rfds;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
select(fd+1, NULL, NULL, &rfds, NULL);
-
- if (sysfs_fd_get_str(fd, action, 20) < 0) {
- close(fd);
- return;
+ }
+ close(fd);
+}
+
+static int reshape_super(struct supertype *st, long long size, int level,
+ int layout, int chunksize, int raid_disks,
+ char *backup_file, char *dev, int verbose)
+{
+ /* nothing extra to check in the native case */
+ if (!st->ss->external)
+ return 0;
+ if (!st->ss->reshape_super ||
+ !st->ss->manage_reshape) {
+ fprintf(stderr, Name ": %s metadata does not support reshape\n",
+ st->ss->name);
+ return 1;
+ }
+
+ return st->ss->reshape_super(st, size, level, layout, chunksize,
+ raid_disks, backup_file, dev, verbose);
+}
+
+static void sync_metadata(struct supertype *st)
+{
+ if (st->ss->external) {
+ if (st->update_tail)
+ flush_metadata_updates(st);
+ else
+ st->ss->sync_metadata(st);
+ }
+}
+
+static int subarray_set_num(char *container, struct mdinfo *sra, char *name, int n)
+{
+ /* when dealing with external metadata subarrays we need to be
+ * prepared to handle EAGAIN. The kernel may need to wait for
+ * mdmon to mark the array active so the kernel can handle
+ * allocations/writeback when preparing the reshape action
+ * (md_allow_write()). We temporarily disable safe_mode_delay
+ * to close a race with the array_state going clean before the
+ * next write to raid_disks / stripe_cache_size
+ */
+ char safe[50];
+ int rc;
+
+ /* only 'raid_disks' and 'stripe_cache_size' trigger md_allow_write */
+ if (strcmp(name, "raid_disks") != 0 &&
+ strcmp(name, "stripe_cache_size") != 0)
+ return sysfs_set_num(sra, NULL, name, n);
+
+ rc = sysfs_get_str(sra, NULL, "safe_mode_delay", safe, sizeof(safe));
+ if (rc <= 0)
+ return -1;
+ sysfs_set_num(sra, NULL, "safe_mode_delay", 0);
+ rc = sysfs_set_num(sra, NULL, name, n);
+ if (rc < 0 && errno == EAGAIN) {
+ ping_monitor(container);
+ /* if we get EAGAIN here then the monitor is not active
+ * so stop trying
+ */
+ rc = sysfs_set_num(sra, NULL, name, n);
+ }
+ sysfs_set_str(sra, NULL, "safe_mode_delay", safe);
+ return rc;
+}
+
+static int reshape_container_raid_disks(char *container, int raid_disks)
+{
+ /* for each subarray switch to a raid level that can
+ * support the reshape, and set raid disks
+ */
+ struct mdstat_ent *ent, *e;
+ int changed = 0, rv = 0, err = 0;
+
+ ent = mdstat_read(1, 0);
+ if (!ent) {
+ fprintf(stderr, Name ": unable to read /proc/mdstat\n");
+ return -1;
+ }
+
+ changed = 0;
+ for (e = ent; e; e = e->next) {
+ struct mdinfo *sub;
+ unsigned int cache;
+ int level, takeover_delta = 0;
+
+ if (!is_container_member(e, container))
+ continue;
+
+ level = map_name(pers, e->level);
+ if (level == 0) {
+ sub = sysfs_read(-1, e->devnum, GET_VERSION);
+ if (!sub)
+ break;
+ /* metadata records 'orig_level' */
+ rv = sysfs_set_num(sub, NULL, "level", 4);
+ if (rv < 0) {
+ err = errno;
+ break;
+ }
+ /* we want spares to be used for capacity
+ * expansion, not rebuild
+ */
+ takeover_delta = 1;
+
+ sysfs_free(sub);
+ level = 4;
}
- } while (strncmp(action, "reshape", 7) == 0);
+
+ sub = NULL;
+ switch (level) {
+ default:
+ rv = -1;
+ break;
+ case 4:
+ case 5:
+ case 6:
+ sub = sysfs_read(-1, e->devnum, GET_CHUNK|GET_CACHE);
+ if (!sub)
+ break;
+ cache = (sub->array.chunk_size / 4096) * 4;
+ if (cache > sub->cache_size)
+ rv = subarray_set_num(container, sub,
+ "stripe_cache_size", cache);
+ if (rv) {
+ err = errno;
+ break;
+ }
+ /* fall through */
+ case 1:
+ if (!sub)
+ sub = sysfs_read(-1, e->devnum, GET_VERSION);
+ if (!sub)
+ break;
+
+ rv = subarray_set_num(container, sub, "raid_disks",
+ raid_disks + takeover_delta);
+ if (rv)
+ err = errno;
+ else
+ changed++;
+ break;
+ }
+ sysfs_free(sub);
+ if (rv)
+ break;
+ }
+ free_mdstat(ent);
+ if (rv) {
+ fprintf(stderr, Name
+ ": failed to initiate container reshape%s%s\n",
+ err ? ": " : "", err ? strerror(err) : "");
+ return rv;
+ }
+
+ return changed;
}
-
-
+
+static void revert_container_raid_disks(struct supertype *st, int fd, char *container)
+{
+ /* we failed to prepare all subarrays in the container for
+ * reshape, so cancel the changes and restore the nominal raid
+ * level
+ */
+ struct mdstat_ent *ent, *e;
+
+ ent = mdstat_read(0, 0);
+ if (!ent) {
+ fprintf(stderr, Name
+ ": failed to read /proc/mdstat while aborting reshape\n");
+ return;
+ }
+
+ if (st->ss->load_container(st, fd, NULL)) {
+ fprintf(stderr, Name
+ ": failed read metadata while aborting reshape\n");
+ return ;
+ }
+
+
+ for (e = ent; e; e = e->next) {
+ int level_fixed = 0, disks_fixed = 0;
+ struct mdinfo *sub, *prev;
+ char *subarray;
+
+ if (!is_container_member(e, container))
+ continue;
+
+ subarray = to_subarray(e, container);
+ prev = st->ss->container_content(st, subarray);
+
+ /* changing level might change raid_disks so we do it
+ * first and then check if raid_disks still needs fixing
+ */
+ if (map_name(pers, e->level) != prev->array.level) {
+ sub = sysfs_read(-1, e->devnum, GET_VERSION);
+ if (sub &&
+ !sysfs_set_num(sub, NULL, "level", prev->array.level))
+ level_fixed = 1;
+ sysfs_free(sub);
+ } else
+ level_fixed = 1;
+
+ sub = sysfs_read(-1, e->devnum, GET_DISKS);
+ if (sub && sub->array.raid_disks != prev->array.raid_disks) {
+ if (!subarray_set_num(container, sub, "raid_disks",
+ prev->array.raid_disks))
+ disks_fixed = 1;
+ } else if (sub)
+ disks_fixed = 1;
+ sysfs_free(sub);
+
+ if (!disks_fixed || !level_fixed)
+ fprintf(stderr, Name
+ ": failed to restore %s to a %d-disk %s array\n",
+ e->dev, prev->array.raid_disks,
+ map_num(pers, prev->array.level));
+ free(prev);
+ }
+ st->ss->free_super(st);
+ free_mdstat(ent);
+}
+
+int remove_disks_on_raid10_to_raid0_takeover(struct supertype *st,
+ struct mdinfo *sra,
+ int layout)
+{
+ int nr_of_copies;
+ struct mdinfo *remaining;
+ int slot;
+
+ nr_of_copies = layout & 0xff;
+
+ remaining = sra->devs;
+ sra->devs = NULL;
+ /* for each 'copy', select one device and remove from the list. */
+ for (slot = 0; slot < sra->array.raid_disks; slot += nr_of_copies) {
+ struct mdinfo **diskp;
+ int found = 0;
+
+ /* Find a working device to keep */
+ for (diskp = &remaining; *diskp ; diskp = &(*diskp)->next) {
+ struct mdinfo *disk = *diskp;
+
+ if (disk->disk.raid_disk < slot)
+ continue;
+ if (disk->disk.raid_disk >= slot + nr_of_copies)
+ continue;
+ if (disk->disk.state & (1<<MD_DISK_REMOVED))
+ continue;
+ if (disk->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ if (!(disk->disk.state & (1<<MD_DISK_SYNC)))
+ continue;
+
+ /* We have found a good disk to use! */
+ *diskp = disk->next;
+ disk->next = sra->devs;
+ sra->devs = disk;
+ found = 1;
+ break;
+ }
+ if (!found)
+ break;
+ }
+
+ if (slot < sra->array.raid_disks) {
+ /* didn't find all slots */
+ struct mdinfo **e;
+ e = &remaining;
+ while (*e)
+ e = &(*e)->next;
+ *e = sra->devs;
+ sra->devs = remaining;
+ return 1;
+ }
+
+ /* Remove all 'remaining' devices from the array */
+ while (remaining) {
+ struct mdinfo *sd = remaining;
+ remaining = sd->next;
+
+ sysfs_set_str(sra, sd, "state", "faulty");
+ sysfs_set_str(sra, sd, "slot", "none");
+ sysfs_set_str(sra, sd, "state", "remove");
+ sd->disk.state |= (1<<MD_DISK_REMOVED);
+ sd->disk.state &= ~(1<<MD_DISK_SYNC);
+ sd->next = sra->devs;
+ sra->devs = sd;
+ }
+ return 0;
+}
+
+void reshape_free_fdlist(int *fdlist,
+ unsigned long long *offsets,
+ int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ if (fdlist[i] >= 0)
+ close(fdlist[i]);
+
+ free(fdlist);
+ free(offsets);
+}
+
+int reshape_prepare_fdlist(char *devname,
+ struct mdinfo *sra,
+ int raid_disks,
+ int nrdisks,
+ unsigned long blocks,
+ char *backup_file,
+ int *fdlist,
+ unsigned long long *offsets)
+{
+ int d = 0;
+ struct mdinfo *sd;
+
+ for (d = 0; d <= nrdisks; d++)
+ fdlist[d] = -1;
+ d = raid_disks;
+ for (sd = sra->devs; sd; sd = sd->next) {
+ if (sd->disk.state & (1<<MD_DISK_FAULTY))
+ continue;
+ if (sd->disk.state & (1<<MD_DISK_SYNC)) {
+ char *dn = map_dev(sd->disk.major,
+ sd->disk.minor, 1);
+ fdlist[sd->disk.raid_disk]
+ = dev_open(dn, O_RDONLY);
+ offsets[sd->disk.raid_disk] = sd->data_offset*512;
+ if (fdlist[sd->disk.raid_disk] < 0) {
+ fprintf(stderr,
+ Name ": %s: cannot open component %s\n",
+ devname, dn ? dn : "-unknown-");
+ d = -1;
+ goto release;
+ }
+ } else if (backup_file == NULL) {
+ /* spare */
+ char *dn = map_dev(sd->disk.major,
+ sd->disk.minor, 1);
+ fdlist[d] = dev_open(dn, O_RDWR);
+ offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512;
+ if (fdlist[d] < 0) {
+ fprintf(stderr, Name ": %s: cannot open component %s\n",
+ devname, dn ? dn : "-unknown-");
+ d = -1;
+ goto release;
+ }
+ d++;
+ }
+ }
+release:
+ return d;
+}
+
+int reshape_open_backup_file(char *backup_file,
+ int fd,
+ char *devname,
+ long blocks,
+ int *fdlist,
+ unsigned long long *offsets)
+{
+ /* Return 1 on success, 0 on any form of failure */
+ /* need to check backup file is large enough */
+ char buf[512];
+ struct stat stb;
+ unsigned int dev;
+ int i;
+
+ *fdlist = open(backup_file, O_RDWR|O_CREAT|O_EXCL,
+ S_IRUSR | S_IWUSR);
+ *offsets = 8 * 512;
+ if (*fdlist < 0) {
+ fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
+ devname, backup_file, strerror(errno));
+ return 0;
+ }
+ /* Guard against backup file being on array device.
+ * If array is partitioned or if LVM etc is in the
+ * way this will not notice, but it is better than
+ * nothing.
+ */
+ fstat(*fdlist, &stb);
+ dev = stb.st_dev;
+ fstat(fd, &stb);
+ if (stb.st_rdev == dev) {
+ fprintf(stderr, Name ": backup file must NOT be"
+ " on the array being reshaped.\n");
+ close(*fdlist);
+ return 0;
+ }
+
+ memset(buf, 0, 512);
+ for (i=0; i < blocks + 1 ; i++) {
+ if (write(*fdlist, buf, 512) != 512) {
+ fprintf(stderr, Name ": %s: cannot create"
+ " backup file %s: %s\n",
+ devname, backup_file, strerror(errno));
+ return 0;
+ }
+ }
+ if (fsync(*fdlist) != 0) {
+ fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
+ devname, backup_file, strerror(errno));
+ return 0;
+ }
+
+ return 1;
+}
+
+unsigned long compute_backup_blocks(int nchunk, int ochunk,
+ unsigned int ndata, unsigned int odata)
+{
+ unsigned long a, b, blocks;
+ /* So how much do we need to backup.
+ * We need an amount of data which is both a whole number of
+ * old stripes and a whole number of new stripes.
+ * So LCM for (chunksize*datadisks).
+ */
+ a = (ochunk/512) * odata;
+ b = (nchunk/512) * ndata;
+ /* Find GCD */
+ while (a != b) {
+ if (a < b)
+ b -= a;
+ if (b < a)
+ a -= b;
+ }
+ /* LCM == product / GCD */
+ blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a;
+
+ return blocks;
+}
+
+
int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
long long size,
- int level, char *layout_str, int chunksize, int raid_disks)
+ int level, char *layout_str, int chunksize, int raid_disks,
+ int force)
{
/* Make some changes in the shape of an array.
* The kernel must support the change.
char *c;
int rv = 0;
struct supertype *st;
+ char *subarray = NULL;
int nchunk, ochunk;
int nlayout, olayout;
char alt_layout[40];
int *fdlist;
unsigned long long *offsets;
- int d, i;
+ int d;
int nrdisks;
int err;
int frozen;
- unsigned long a,b, blocks, stripes;
+ unsigned long blocks, stripes;
unsigned long cache;
unsigned long long array_size;
int changed = 0;
+ char *container = NULL;
+ int cfd = -1;
int done;
struct mdinfo *sra;
" Please use a newer kernel\n");
return 1;
}
- sra = sysfs_read(fd, 0, GET_LEVEL);
- if (sra)
- frozen = freeze_array(sra);
- else {
+
+ st = super_by_fd(fd, &subarray);
+ if (!st) {
+ fprintf(stderr, Name ": Unable to determine metadata format for %s\n", devname);
+ return 1;
+ }
+ if (raid_disks > st->max_devs) {
+ fprintf(stderr, Name ": Cannot increase raid-disks on this array"
+ " beyond %d\n", st->max_devs);
+ return 1;
+ }
+
+ /* in the external case we need to check that the requested reshape is
+ * supported, and perform an initial check that the container holds the
+ * pre-requisite spare devices (mdmon owns final validation)
+ */
+ if (st->ss->external) {
+ int container_dev;
+ int rv;
+
+ if (subarray) {
+ container_dev = st->container_dev;
+ cfd = open_dev_excl(st->container_dev);
+ } else if (size >= 0 || layout_str != NULL || chunksize != 0 ||
+ level != UnSet) {
+ fprintf(stderr,
+ Name ": %s is a container, only 'raid-devices' can be changed\n",
+ devname);
+ return 1;
+ } else {
+ container_dev = st->devnum;
+ close(fd);
+ cfd = open_dev_excl(st->devnum);
+ fd = cfd;
+ }
+ if (cfd < 0) {
+ fprintf(stderr, Name ": Unable to open container for %s\n",
+ devname);
+ free(subarray);
+ return 1;
+ }
+
+ container = devnum2devname(st->devnum);
+ if (!container) {
+ fprintf(stderr, Name ": Could not determine container name\n");
+ free(subarray);
+ return 1;
+ }
+
+ if (subarray)
+ rv = st->ss->load_container(st, cfd, NULL);
+ else
+ rv = st->ss->load_super(st, cfd, NULL);
+ if (rv) {
+ fprintf(stderr, Name ": Cannot read superblock for %s\n",
+ devname);
+ free(subarray);
+ return 1;
+ }
+
+ if (mdmon_running(container_dev))
+ st->update_tail = &st->updates;
+ }
+
+ if (raid_disks > array.raid_disks &&
+ array.spare_disks < (raid_disks - array.raid_disks) &&
+ !force) {
+ fprintf(stderr,
+ Name ": Need %d spare%s to avoid degraded array,"
+ " and only have %d.\n"
+ " Use --force to over-ride this check.\n",
+ raid_disks - array.raid_disks,
+ raid_disks - array.raid_disks == 1 ? "" : "s",
+ array.spare_disks);
+ return 1;
+ }
+
+ sra = sysfs_read(fd, 0, GET_LEVEL | GET_DISKS | GET_DEVS | GET_STATE);
+ if (sra) {
+ if (st->ss->external && subarray == NULL) {
+ array.level = LEVEL_CONTAINER;
+ sra->array.level = LEVEL_CONTAINER;
+ }
+ } else {
fprintf(stderr, Name ": failed to read sysfs parameters for %s\n",
devname);
return 1;
}
- if (frozen < 0) {
+ frozen = freeze(st);
+ if (frozen < -1) {
+ /* freeze() already spewed the reason */
+ return 1;
+ } else if (frozen < 0) {
fprintf(stderr, Name ": %s is performing resync/recovery and cannot"
" be reshaped\n", devname);
return 1;
/* ========= set size =============== */
if (size >= 0 && (size == 0 || size != array.size)) {
+ long long orig_size = array.size;
+
+ if (reshape_super(st, size, UnSet, UnSet, 0, 0, NULL, devname, !quiet)) {
+ rv = 1;
+ goto release;
+ }
+ sync_metadata(st);
array.size = size;
if (array.size != size) {
/* got truncated to 32bit, write to
rv = ioctl(fd, SET_ARRAY_INFO, &array);
if (rv != 0) {
int err = errno;
+
+ /* restore metadata */
+ if (reshape_super(st, orig_size, UnSet, UnSet, 0, 0,
+ NULL, devname, !quiet) == 0)
+ sync_metadata(st);
fprintf(stderr, Name ": Cannot set device size for %s: %s\n",
devname, strerror(err));
if (err == EBUSY &&
fprintf(stderr, Name ": component size of %s has been set to %lluK\n",
devname, size);
changed = 1;
- } else {
+ } else if (array.level != LEVEL_CONTAINER) {
size = get_component_size(fd)/2;
if (size == 0)
size = array.size;
}
+ /* ========= check for Raid10 -> Raid0 conversion ===============
+ * current implemenation assumes that following conditions must be met:
+ * - far_copies == 1
+ * - near_copies == 2
+ */
+ if (level == 0 && array.level == 10 &&
+ array.layout == ((1 << 8) + 2) && !(array.raid_disks & 1)) {
+ int err;
+ err = remove_disks_on_raid10_to_raid0_takeover(st, sra, array.layout);
+ if (err) {
+ dprintf(Name": Array cannot be reshaped\n");
+ if (container)
+ free(container);
+ if (cfd > -1)
+ close(cfd);
+ return 1;
+ }
+ }
+
/* ======= set level =========== */
if (level != UnSet && level != array.level) {
/* Trying to change the level.
} else
layout_str = "parity-last";
} else {
+ /* Level change is a simple takeover. In the external
+ * case we don't check with the metadata handler until
+ * we establish what the final layout will be. If the
+ * level change is disallowed we will revert to
+ * orig_level without disturbing the metadata, otherwise
+ * we will send an update.
+ */
c = map_num(pers, level);
if (c == NULL) {
rv = 1;/* not possible */
goto release;
}
+ if (!force) {
+ /* Need to check there are enough spares */
+ int spares_needed = 0;
+ switch (array.level * 16 + level) {
+ case 0x05:
+ spares_needed = 1; break;
+ case 0x06:
+ spares_needed = 2; break;
+ case 0x15:
+ spares_needed = 1; break;
+ case 0x16:
+ spares_needed = 2; break;
+ case 0x56:
+ spares_needed = 1; break;
+ }
+ if (raid_disks > array.raid_disks)
+ spares_needed += raid_disks-array.raid_disks;
+ if (spares_needed > array.spare_disks) {
+ fprintf(stderr,
+ Name ": Need %d spare%s to avoid"
+ " degraded array, and only have %d.\n"
+ " Use --force to over-ride"
+ " this check.\n",
+ spares_needed,
+ spares_needed == 1 ? "" : "s",
+ array.spare_disks);
+ rv = 1;
+ goto release;
+ }
+ }
err = sysfs_set_str(sra, NULL, "level", c);
if (err) {
err = errno;
/* ========= set shape (chunk_size / layout / ndisks) ============== */
/* Check if layout change is a no-op */
- if (layout_str) switch(array.level) {
+ switch (array.level) {
case 5:
- if (array.layout == map_name(r5layout, layout_str))
+ if (layout_str && array.layout == map_name(r5layout, layout_str))
layout_str = NULL;
break;
case 6:
rv = 1;
goto release;
}
- if (strcmp(layout_str, "normalise") == 0 ||
- strcmp(layout_str, "normalize") == 0) {
+ if (layout_str &&
+ (strcmp(layout_str, "normalise") == 0 ||
+ strcmp(layout_str, "normalize") == 0)) {
char *hyphen;
strcpy(alt_layout, map_num(r6layout, array.layout));
hyphen = strrchr(alt_layout, '-');
}
}
- if (array.layout == map_name(r6layout, layout_str))
+ if (layout_str && array.layout == map_name(r6layout, layout_str))
layout_str = NULL;
if (layout_str && strcmp(layout_str, "preserve") == 0)
layout_str = NULL;
if (layout_str == NULL
&& (chunksize == 0 || chunksize*1024 == array.chunk_size)
&& (raid_disks == 0 || raid_disks == array.raid_disks)) {
+ if (reshape_super(st, -1, level, UnSet, 0, 0, NULL, devname, !quiet)) {
+ rv = 1;
+ goto release;
+ }
+ sync_metadata(st);
rv = 0;
if (level != UnSet && level != array.level) {
/* Looks like this level change doesn't need
} else if (!changed && !quiet)
fprintf(stderr, Name ": %s: no change requested\n",
devname);
+
+ if (st->ss->external && !mdmon_running(st->container_dev) &&
+ level > 0) {
+ start_mdmon(st->container_dev);
+ ping_monitor(container);
+ }
goto release;
}
c = map_num(pers, array.level);
if (c == NULL) c = "-unknown-";
- switch(array.level) {
+ switch (array.level) {
default: /* raid0, linear, multipath cannot be reconfigured */
fprintf(stderr, Name ": %s array %s cannot be reshaped.\n",
c, devname);
+ /* TODO raid0 raiddisks can be reshaped via raid4 */
rv = 1;
break;
+ case LEVEL_CONTAINER: {
+ int count;
+ /* double check that we are not changing anything but raid_disks */
+ if (size >= 0 || layout_str != NULL || chunksize != 0 || level != UnSet) {
+ fprintf(stderr,
+ Name ": %s is a container, only 'raid-devices' can be changed\n",
+ devname);
+ rv = 1;
+ goto release;
+ }
+
+ st->update_tail = &st->updates;
+ if (reshape_super(st, -1, UnSet, UnSet, 0, raid_disks,
+ backup_file, devname, !quiet)) {
+ rv = 1;
+ goto release;
+ }
+
+ count = reshape_container_raid_disks(container, raid_disks);
+ if (count < 0) {
+ revert_container_raid_disks(st, fd, container);
+ rv = 1;
+ goto release;
+ } else if (count == 0) {
+ if (!quiet)
+ fprintf(stderr, Name
+ ": no active subarrays to reshape\n");
+ goto release;
+ }
+
+ if (!mdmon_running(st->devnum)) {
+ start_mdmon(st->devnum);
+ ping_monitor(container);
+ }
+ sync_metadata(st);
+
+ /* give mdmon a chance to allocate spares */
+ ping_manager(container);
+
+ /* manage_reshape takes care of releasing the array(s) */
+ st->ss->manage_reshape(st, backup_file);
+ frozen = 0;
+ goto release;
+ }
case LEVEL_FAULTY: /* only 'layout' change is permitted */
if (chunksize || raid_disks) {
break;
}
if (raid_disks > 0) {
+ if (reshape_super(st, -1, UnSet, UnSet, 0, raid_disks,
+ NULL, devname, !quiet)) {
+ rv = 1;
+ goto release;
+ }
+ sync_metadata(st);
array.raid_disks = raid_disks;
if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
fprintf(stderr, Name ": Cannot set raid-devices for %s: %s\n",
* layout/chunksize/raid_disks can be changed
* though the kernel may not support it all.
*/
- st = super_by_fd(fd);
+ if (subarray) {
+ fprintf(stderr, Name ": Cannot reshape subarrays yet\n");
+ break;
+ }
/*
* There are three possibilities.
break;
}
- /* So how much do we need to backup.
- * We need an amount of data which is both a whole number of
- * old stripes and a whole number of new stripes.
- * So LCM for (chunksize*datadisks).
- */
- a = (ochunk/512) * odata;
- b = (nchunk/512) * ndata;
- /* Find GCD */
- while (a != b) {
- if (a < b)
- b -= a;
- if (b < a)
- a -= b;
- }
- /* LCM == product / GCD */
- blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a;
+ blocks = compute_backup_blocks(nchunk, ochunk, ndata, odata);
sysfs_free(sra);
sra = sysfs_read(fd, 0,
rv = 1;
break;
}
- for (d=0; d <= nrdisks; d++)
- fdlist[d] = -1;
- d = array.raid_disks;
- for (sd = sra->devs; sd; sd=sd->next) {
- if (sd->disk.state & (1<<MD_DISK_FAULTY))
- continue;
- if (sd->disk.state & (1<<MD_DISK_SYNC)) {
- char *dn = map_dev(sd->disk.major,
- sd->disk.minor, 1);
- fdlist[sd->disk.raid_disk]
- = dev_open(dn, O_RDONLY);
- offsets[sd->disk.raid_disk] = sd->data_offset*512;
- if (fdlist[sd->disk.raid_disk] < 0) {
- fprintf(stderr, Name ": %s: cannot open component %s\n",
- devname, dn?dn:"-unknown-");
- rv = 1;
- goto release;
- }
- } else if (backup_file == NULL) {
- /* spare */
- char *dn = map_dev(sd->disk.major,
- sd->disk.minor, 1);
- fdlist[d] = dev_open(dn, O_RDWR);
- offsets[d] = (sd->data_offset + sra->component_size - blocks - 8)*512;
- if (fdlist[d]<0) {
- fprintf(stderr, Name ": %s: cannot open component %s\n",
- devname, dn?dn:"-unknown");
- rv = 1;
- goto release;
- }
- d++;
- }
+
+ d = reshape_prepare_fdlist(devname, sra, array.raid_disks,
+ nrdisks, blocks, backup_file,
+ fdlist, offsets);
+ if (d < 0) {
+ rv = 1;
+ goto release;
}
if (backup_file == NULL) {
+ if (st->ss->external && !st->ss->manage_reshape) {
+ fprintf(stderr, Name ": %s Grow operation not supported by %s metadata\n",
+ devname, st->ss->name);
+ rv = 1;
+ break;
+ }
if (ndata <= odata) {
fprintf(stderr, Name ": %s: Cannot grow - need backup-file\n",
devname);
break;
}
} else {
- /* need to check backup file is large enough */
- char buf[512];
- fdlist[d] = open(backup_file, O_RDWR|O_CREAT|O_EXCL,
- S_IRUSR | S_IWUSR);
- offsets[d] = 8 * 512;
- if (fdlist[d] < 0) {
- fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
- devname, backup_file, strerror(errno));
- rv = 1;
- break;
- }
- memset(buf, 0, 512);
- for (i=0; i < (signed)blocks + 1 ; i++) {
- if (write(fdlist[d], buf, 512) != 512) {
- fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
- devname, backup_file, strerror(errno));
- rv = 1;
- break;
- }
- }
- if (fsync(fdlist[d]) != 0) {
- fprintf(stderr, Name ": %s: cannot create backup file %s: %s\n",
- devname, backup_file, strerror(errno));
+ if (!reshape_open_backup_file(backup_file, fd, devname,
+ (signed)blocks,
+ fdlist+d, offsets+d)) {
rv = 1;
break;
}
d++;
}
+ /* check that the operation is supported by the metadata */
+ if (reshape_super(st, -1, level, nlayout, nchunk, ndisks,
+ backup_file, devname, !quiet)) {
+ rv = 1;
+ break;
+ }
+
/* lastly, check that the internal stripe cache is
* large enough, or it won't work.
*/
* If only changing raid_disks, use ioctl, else use
* sysfs.
*/
+ sync_metadata(st);
if (ochunk == nchunk && olayout == nlayout) {
array.raid_disks = ndisks;
if (ioctl(fd, SET_ARRAY_INFO, &array) != 0) {
break;
}
+ if (st->ss->external) {
+ /* metadata handler takes it from here */
+ ping_manager(container);
+ st->ss->manage_reshape(st, backup_file);
+ frozen = 0;
+ break;
+ }
+
/* set up the backup-super-block. This requires the
* uuid from the array.
*/
if (c && sysfs_set_str(sra, NULL, "level", c) == 0)
fprintf(stderr, Name ": aborting level change\n");
}
- if (sra)
- unfreeze_array(sra, frozen);
+ unfreeze(st, frozen);
return rv;
}
*/
/* FIXME return status is never checked */
-int grow_backup(struct mdinfo *sra,
+static int grow_backup(struct mdinfo *sra,
unsigned long long offset, /* per device */
unsigned long stripes, /* per device */
int *sources, unsigned long long *offsets,
* every works.
*/
/* FIXME return value is often ignored */
-int wait_backup(struct mdinfo *sra,
+static int wait_backup(struct mdinfo *sra,
unsigned long long offset, /* per device */
unsigned long long blocks, /* per device */
unsigned long long blocks2, /* per device - hack */
sysfs_set_num(sra, NULL, "sync_max", offset + blocks + blocks2);
if (offset == 0)
sysfs_set_str(sra, NULL, "sync_action", "reshape");
- do {
+
+ if (sysfs_fd_get_ll(fd, &completed) < 0) {
+ close(fd);
+ return -1;
+ }
+ while (completed < offset + blocks) {
char action[20];
fd_set rfds;
FD_ZERO(&rfds);
action, 20) > 0 &&
strncmp(action, "reshape", 7) != 0)
break;
- } while (completed < offset + blocks);
+ }
close(fd);
if (part) {
if (st->ss->load_super(st, fd, NULL))
continue;
- st->ss->getinfo_super(st, &dinfo);
+ st->ss->getinfo_super(st, &dinfo, NULL);
st->ss->free_super(st);
if (lseek64(fd,
if (st->ss->load_super(st, fdlist[j], NULL))
/* FIXME should be this be an error */
continue;
- st->ss->getinfo_super(st, &dinfo);
+ st->ss->getinfo_super(st, &dinfo, NULL);
st->ss->free_super(st);
offsets[j] = dinfo.data_offset * 512;
}
if (fdlist[j] < 0) continue;
if (st->ss->load_super(st, fdlist[j], NULL))
continue;
- st->ss->getinfo_super(st, &dinfo);
+ st->ss->getinfo_super(st, &dinfo, NULL);
dinfo.reshape_progress = info->reshape_progress;
st->ss->update_super(st, &dinfo,
"_reshape_progress",
bsb.devstart2 = blocks;
backup_fd = open(backup_file, O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
+ if (backup_fd < 0) {
+ fprintf(stderr, Name ": Cannot open backup file %s\n",
+ backup_file ?: "- no backup-file given");
+ return 1;
+ }
backup_list[0] = backup_fd;
backup_offsets[0] = 8 * 512;
fds = malloc(odisks * sizeof(fds[0]));
*/
#include "mdadm.h"
+#include <dirent.h>
+#include <ctype.h>
-static int count_active(struct supertype *st, int mdfd, char **availp,
+static int count_active(struct supertype *st, struct mdinfo *sra,
+ int mdfd, char **availp,
struct mdinfo *info);
static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
int number, __u64 events, int verbose,
char *array_name);
+static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ struct map_ent *target,
+ struct supertype *st, int verbose);
+
+static int Incremental_container(struct supertype *st, char *devname,
+ char *homehost,
+ int verbose, int runstop, int autof);
+
+static struct mddev_ident *search_mdstat(struct supertype *st,
+ struct mdinfo *info,
+ char *devname,
+ int verbose, int *rvp);
int Incremental(char *devname, int verbose, int runstop,
struct supertype *st, char *homehost, int require_homehost,
* start the array (auto-readonly).
*/
struct stat stb;
- struct mdinfo info;
- struct mddev_ident_s *array_list, *match;
+ struct mdinfo info, dinfo;
+ struct mdinfo *sra = NULL, *d;
+ struct mddev_ident *match;
char chosen_name[1024];
- int rv;
+ int rv = 1;
struct map_ent *mp, *map = NULL;
- int dfd, mdfd;
- char *avail;
+ int dfd = -1, mdfd = -1;
+ char *avail = NULL;
int active_disks;
- int trustworthy = FOREIGN;
+ int trustworthy;
char *name_to_use;
mdu_array_info_t ainf;
+ struct dev_policy *policy = NULL;
+ struct map_ent target_array;
+ int have_target;
struct createinfo *ci = conf_get_create_info();
+ if (stat(devname, &stb) < 0) {
+ if (verbose >= 0)
+ fprintf(stderr, Name ": stat failed for %s: %s.\n",
+ devname, strerror(errno));
+ return rv;
+ }
+ if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ if (verbose >= 0)
+ fprintf(stderr, Name ": %s is not a block device.\n",
+ devname);
+ return rv;
+ }
+ dfd = dev_open(devname, O_RDONLY|O_EXCL);
+ if (dfd < 0) {
+ if (verbose >= 0)
+ fprintf(stderr, Name ": cannot open %s: %s.\n",
+ devname, strerror(errno));
+ return rv;
+ }
+ /* If the device is a container, we do something very different */
+ if (must_be_container(dfd)) {
+ if (!st)
+ st = super_by_fd(dfd, NULL);
+ if (st && st->ss->load_container)
+ rv = st->ss->load_container(st, dfd, NULL);
+
+ close(dfd);
+ if (!rv && st->ss->container_content)
+ return Incremental_container(st, devname, homehost,
+ verbose, runstop, autof);
+
+ fprintf(stderr, Name ": %s is not part of an md array.\n",
+ devname);
+ return rv;
+ }
/* 1/ Check if device is permitted by mdadm.conf */
fprintf(stderr, Name
": %s not permitted by mdadm.conf.\n",
devname);
- return 1;
+ goto out;
}
/* 2/ Find metadata, reject if none appropriate (check
* version/name from args) */
- dfd = dev_open(devname, O_RDONLY|O_EXCL);
- if (dfd < 0) {
- if (verbose >= 0)
- fprintf(stderr, Name ": cannot open %s: %s.\n",
- devname, strerror(errno));
- return 1;
- }
if (fstat(dfd, &stb) < 0) {
if (verbose >= 0)
fprintf(stderr, Name ": fstat failed for %s: %s.\n",
devname, strerror(errno));
- close(dfd);
- return 1;
+ goto out;
}
if ((stb.st_mode & S_IFMT) != S_IFBLK) {
if (verbose >= 0)
fprintf(stderr, Name ": %s is not a block device.\n",
devname);
- close(dfd);
- return 1;
+ goto out;
}
+ dinfo.disk.major = major(stb.st_rdev);
+ dinfo.disk.minor = minor(stb.st_rdev);
+
+ policy = disk_policy(&dinfo);
+ have_target = policy_check_path(&dinfo, &target_array);
+
if (st == NULL && (st = guess_super(dfd)) == NULL) {
if (verbose >= 0)
fprintf(stderr, Name
": no recognisable superblock on %s.\n",
devname);
- close(dfd);
- return 1;
+ rv = try_spare(devname, &dfd, policy,
+ have_target ? &target_array : NULL,
+ st, verbose);
+ goto out;
}
- if (st->ss->load_super(st, dfd, NULL)) {
+ if (st->ss->compare_super == NULL ||
+ st->ss->load_super(st, dfd, NULL)) {
if (verbose >= 0)
fprintf(stderr, Name ": no RAID superblock on %s.\n",
devname);
- close(dfd);
- return 1;
+ rv = try_spare(devname, &dfd, policy,
+ have_target ? &target_array : NULL,
+ st, verbose);
+ free(st);
+ goto out;
}
- close (dfd);
+ close (dfd); dfd = -1;
memset(&info, 0, sizeof(info));
- st->ss->getinfo_super(st, &info);
- /* 3/ Check if there is a match in mdadm.conf */
-
- array_list = conf_get_ident(NULL);
- match = NULL;
- for (; array_list; array_list = array_list->next) {
- if (array_list->uuid_set &&
- same_uuid(array_list->uuid, info.uuid, st->ss->swapuuid)
- == 0) {
- if (verbose >= 2 && array_list->devname)
- fprintf(stderr, Name
- ": UUID differs from %s.\n",
- array_list->devname);
- continue;
- }
- if (array_list->name[0] &&
- strcasecmp(array_list->name, info.name) != 0) {
- if (verbose >= 2 && array_list->devname)
- fprintf(stderr, Name
- ": Name differs from %s.\n",
- array_list->devname);
- continue;
- }
- if (array_list->devices &&
- !match_oneof(array_list->devices, devname)) {
- if (verbose >= 2 && array_list->devname)
- fprintf(stderr, Name
- ": Not a listed device for %s.\n",
- array_list->devname);
- continue;
- }
- if (array_list->super_minor != UnSet &&
- array_list->super_minor != info.array.md_minor) {
- if (verbose >= 2 && array_list->devname)
- fprintf(stderr, Name
- ": Different super-minor to %s.\n",
- array_list->devname);
- continue;
- }
- if (!array_list->uuid_set &&
- !array_list->name[0] &&
- !array_list->devices &&
- array_list->super_minor == UnSet) {
- if (verbose >= 2 && array_list->devname)
- fprintf(stderr, Name
- ": %s doesn't have any identifying information.\n",
- array_list->devname);
- continue;
- }
- /* FIXME, should I check raid_disks and level too?? */
+ st->ss->getinfo_super(st, &info, NULL);
- if (match) {
- if (verbose >= 0) {
- if (match->devname && array_list->devname)
- fprintf(stderr, Name
- ": we match both %s and %s - cannot decide which to use.\n",
- match->devname, array_list->devname);
- else
- fprintf(stderr, Name
- ": multiple lines in mdadm.conf match\n");
- }
- return 2;
- }
- match = array_list;
- }
+ /* 3/ Check if there is a match in mdadm.conf */
+ match = search_mdstat(st, &info, devname, verbose, &rv);
+ if (!match && rv == 2)
+ goto out;
if (match && match->devname
&& strcasecmp(match->devname, "<ignore>") == 0) {
fprintf(stderr, Name ": array containing %s is explicitly"
" ignored by mdadm.conf\n",
devname);
- return 1;
+ goto out;
}
/* 3a/ if not, check for homehost match. If no match, continue
trustworthy = FOREIGN;
- if (!match && !conf_test_metadata(st->ss->name,
+ if (!match && !conf_test_metadata(st->ss->name, policy,
(trustworthy == LOCAL))) {
if (verbose >= 1)
fprintf(stderr, Name
": %s has metadata type %s for which "
"auto-assembly is disabled\n",
devname, st->ss->name);
- return 1;
+ goto out;
}
if (trustworthy == LOCAL_ANY)
trustworthy = LOCAL;
if (autof == 0)
autof = ci->autof;
- if (st->ss->container_content && st->loaded_container) {
- if ((runstop > 0 && info.container_enough >= 0) ||
- info.container_enough > 0)
- /* pass */;
- else {
- if (verbose)
- fprintf(stderr, Name ": not enough devices to start the container\n");
- return 0;
- }
-
- /* This is a pre-built container array, so we do something
- * rather different.
- */
- return Incremental_container(st, devname, verbose, runstop,
- autof, trustworthy);
- }
-
name_to_use = info.name;
if (name_to_use[0] == 0 &&
info.array.level == LEVEL_CONTAINER &&
mdfd = -1;
if (mdfd < 0) {
- struct mdinfo *sra;
- struct mdinfo dinfo;
/* Couldn't find an existing array, maybe make a new one */
mdfd = create_mddev(match ? match->devname : NULL,
name_to_use, autof, trustworthy, chosen_name);
if (mdfd < 0)
- return 1;
+ goto out;
sysfs_init(&info, mdfd, 0);
if (set_array_info(mdfd, st, &info) != 0) {
fprintf(stderr, Name ": failed to set array info for %s: %s\n",
chosen_name, strerror(errno));
- close(mdfd);
- return 2;
+ rv = 2;
+ goto out;
}
dinfo = info;
fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
devname, chosen_name, strerror(errno));
ioctl(mdfd, STOP_ARRAY, 0);
- close(mdfd);
- return 2;
+ rv = 2;
+ goto out;
}
- sra = sysfs_read(mdfd, fd2devnum(mdfd), GET_DEVS);
+ sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+ GET_OFFSET | GET_SIZE));
+
if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) {
/* It really should be 'none' - must be old buggy
* kernel, and mdadm -I may not be able to complete.
fprintf(stderr, Name
": You have an old buggy kernel which cannot support\n"
" --incremental reliably. Aborting.\n");
- close(mdfd);
sysfs_free(sra);
- return 2;
+ rv = 2;
+ goto out;
}
info.array.working_disks = 1;
- sysfs_free(sra);
/* 6/ Make sure /var/run/mdadm.map contains this array. */
map_update(&map, fd2devnum(mdfd),
info.text_version,
char dn[20];
int dfd2;
int err;
- struct mdinfo *sra;
struct supertype *st2;
struct mdinfo info2, *d;
+ sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+ GET_OFFSET | GET_SIZE));
+
if (mp->path)
strcpy(chosen_name, mp->path);
else
/* It is generally not OK to add non-spare drives to a
* running array as they are probably missing because
* they failed. However if runstop is 1, then the
- * array was possibly started early and our best be is
- * to add this anyway. It would probably be good to
- * allow explicit policy statement about this.
+ * array was possibly started early and our best bet is
+ * to add this anyway.
+ * Also if action policy is re-add or better we allow
+ * re-add
*/
if ((info.disk.state & (1<<MD_DISK_SYNC)) != 0
+ && ! policy_action_allows(policy, st->ss->name,
+ act_re_add)
&& runstop < 1) {
int active = 0;
fprintf(stderr, Name
": not adding %s to active array (without --run) %s\n",
devname, chosen_name);
- close(mdfd);
- return 2;
+ rv = 2;
+ goto out;
}
}
- sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE));
- if (!sra)
- return 2;
-
+ if (!sra) {
+ rv = 2;
+ goto out;
+ }
if (sra->devs) {
sprintf(dn, "%d:%d", sra->devs->disk.major,
sra->devs->disk.minor);
": metadata mismatch between %s and "
"chosen array %s\n",
devname, chosen_name);
- close(mdfd);
close(dfd2);
- return 2;
+ rv = 2;
+ goto out;
}
close(dfd2);
memset(&info2, 0, sizeof(info2));
- st2->ss->getinfo_super(st2, &info2);
+ st2->ss->getinfo_super(st2, &info2, NULL);
st2->ss->free_super(st2);
if (info.array.level != info2.array.level ||
memcmp(info.uuid, info2.uuid, 16) != 0 ||
fprintf(stderr, Name
": unexpected difference between %s and %s.\n",
chosen_name, devname);
- close(mdfd);
- return 2;
+ rv = 2;
+ goto out;
}
}
info2.disk.major = major(stb.st_rdev);
if (err < 0) {
fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
devname, chosen_name, strerror(errno));
- close(mdfd);
- return 2;
+ rv = 2;
+ goto out;
}
info.array.working_disks = 0;
for (d = sra->devs; d; d=d->next)
chosen_name, info.array.working_disks);
wait_for(chosen_name, mdfd);
close(mdfd);
+ sysfs_free(sra);
rv = Incremental(chosen_name, verbose, runstop,
NULL, homehost, require_homehost, autof);
if (rv == 1)
rv = 0;
return rv;
}
- avail = NULL;
- active_disks = count_active(st, mdfd, &avail, &info);
+
+ /* We have added something to the array, so need to re-read the
+ * state. Eventually this state should be kept up-to-date as
+ * things change.
+ */
+ sysfs_free(sra);
+ sra = sysfs_read(mdfd, -1, (GET_DEVS | GET_STATE |
+ GET_OFFSET | GET_SIZE));
+ active_disks = count_active(st, sra, mdfd, &avail, &info);
if (enough(info.array.level, info.array.raid_disks,
info.array.layout, info.array.state & 1,
avail, active_disks) == 0) {
- free(avail);
if (verbose >= 0)
fprintf(stderr, Name
": %s attached to %s, not enough to start (%d).\n",
devname, chosen_name, active_disks);
map_unlock(&map);
- close(mdfd);
- return 0;
+ rv = 0;
+ goto out;
}
- free(avail);
/* 7b/ if yes, */
/* - if number of OK devices match expected, or -R and there */
fprintf(stderr, Name
": %s attached to %s which is already active.\n",
devname, chosen_name);
- close(mdfd);
map_unlock(&map);
- return 0;
+ rv = 0;
+ goto out;
}
map_unlock(&map);
if (runstop > 0 || active_disks >= info.array.working_disks) {
- struct mdinfo *sra;
+ struct mdinfo *dsk;
/* Let's try to start it */
if (match && match->bitmap_file) {
int bmfd = open(match->bitmap_file, O_RDWR);
fprintf(stderr, Name
": Could not open bitmap file %s.\n",
match->bitmap_file);
- close(mdfd);
- return 1;
+ goto out;
}
if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
close(bmfd);
fprintf(stderr, Name
": Failed to set bitmapfile for %s.\n",
chosen_name);
- close(mdfd);
- return 1;
+ goto out;
}
close(bmfd);
}
- sra = sysfs_read(mdfd, fd2devnum(mdfd), 0);
+ /* Need to remove from the array any devices which
+ * 'count_active' discerned were too old or inappropriate
+ */
+ for (d = sra ? sra->devs : NULL ; d ; d = d->next)
+ if (d->disk.state & (1<<MD_DISK_REMOVED))
+ remove_disk(mdfd, st, sra, d);
+
if ((sra == NULL || active_disks >= info.array.working_disks)
&& trustworthy != FOREIGN)
rv = ioctl(mdfd, RUN_ARRAY, NULL);
if (rv == 0) {
if (verbose >= 0)
fprintf(stderr, Name
- ": %s attached to %s, which has been started.\n",
+ ": %s attached to %s, which has been started.\n",
devname, chosen_name);
rv = 0;
wait_for(chosen_name, mdfd);
+ /* We just started the array, so some devices
+ * might have been evicted from the array
+ * because their event counts were too old.
+ * If the action=re-add policy is in-force for
+ * those devices we should re-add them now.
+ */
+ for (dsk = sra->devs; dsk ; dsk = dsk->next) {
+ if (disk_action_allows(dsk, st->ss->name, act_re_add) &&
+ add_disk(mdfd, st, sra, dsk) == 0)
+ fprintf(stderr, Name
+ ": %s re-added to %s\n",
+ dsk->sys_name, chosen_name);
+ }
} else {
fprintf(stderr, Name
": %s attached to %s, but failed to start: %s.\n",
devname, chosen_name);
rv = 0;
}
- close(mdfd);
+out:
+ free(avail);
+ if (dfd >= 0)
+ close(dfd);
+ if (mdfd >= 0)
+ close(mdfd);
+ if (policy)
+ dev_policy_free(policy);
+ if (sra)
+ sysfs_free(sra);
return rv;
}
+static struct mddev_ident *search_mdstat(struct supertype *st,
+ struct mdinfo *info,
+ char *devname,
+ int verbose, int *rvp)
+{
+ struct mddev_ident *array_list, *match;
+ array_list = conf_get_ident(NULL);
+ match = NULL;
+ for (; array_list; array_list = array_list->next) {
+ if (array_list->uuid_set &&
+ same_uuid(array_list->uuid, info->uuid, st->ss->swapuuid)
+ == 0) {
+ if (verbose >= 2 && array_list->devname)
+ fprintf(stderr, Name
+ ": UUID differs from %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (array_list->name[0] &&
+ strcasecmp(array_list->name, info->name) != 0) {
+ if (verbose >= 2 && array_list->devname)
+ fprintf(stderr, Name
+ ": Name differs from %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (array_list->devices &&
+ !match_oneof(array_list->devices, devname)) {
+ if (verbose >= 2 && array_list->devname)
+ fprintf(stderr, Name
+ ": Not a listed device for %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (array_list->super_minor != UnSet &&
+ array_list->super_minor != info->array.md_minor) {
+ if (verbose >= 2 && array_list->devname)
+ fprintf(stderr, Name
+ ": Different super-minor to %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (!array_list->uuid_set &&
+ !array_list->name[0] &&
+ !array_list->devices &&
+ array_list->super_minor == UnSet) {
+ if (verbose >= 2 && array_list->devname)
+ fprintf(stderr, Name
+ ": %s doesn't have any identifying information.\n",
+ array_list->devname);
+ continue;
+ }
+ /* FIXME, should I check raid_disks and level too?? */
+
+ if (match) {
+ if (verbose >= 0) {
+ if (match->devname && array_list->devname)
+ fprintf(stderr, Name
+ ": we match both %s and %s - cannot decide which to use.\n",
+ match->devname, array_list->devname);
+ else
+ fprintf(stderr, Name
+ ": multiple lines in mdadm.conf match\n");
+ }
+ *rvp = 2;
+ match = NULL;
+ break;
+ }
+ match = array_list;
+ }
+ return match;
+}
+
static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
int number, __u64 events, int verbose,
char *array_name)
close(dfd);
continue;
}
- st->ss->getinfo_super(st, &info);
+ st->ss->getinfo_super(st, &info, NULL);
st->ss->free_super(st);
close(dfd);
}
}
-static int count_active(struct supertype *st, int mdfd, char **availp,
+static int count_active(struct supertype *st, struct mdinfo *sra,
+ int mdfd, char **availp,
struct mdinfo *bestinfo)
{
/* count how many devices in sra think they are active */
struct mdinfo *d;
- int cnt = 0, cnt1 = 0;
+ int cnt = 0;
__u64 max_events = 0;
- struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE);
char *avail = NULL;
+ int *best;
+ char *devmap = NULL;
+ int numdevs = 0;
+ int devnum;
+ int b, i;
+ int raid_disks = 0;
if (!sra)
return 0;
- for (d = sra->devs ; d ; d = d->next) {
+ for (d = sra->devs ; d ; d = d->next)
+ numdevs++;
+ for (d = sra->devs, devnum=0 ; d ; d = d->next, devnum++) {
char dn[30];
int dfd;
int ok;
close(dfd);
if (ok != 0)
continue;
- st->ss->getinfo_super(st, &info);
+ info.array.raid_disks = raid_disks;
+ st->ss->getinfo_super(st, &info, devmap + raid_disks * devnum);
if (!avail) {
- avail = malloc(info.array.raid_disks);
+ raid_disks = info.array.raid_disks;
+ avail = calloc(raid_disks, 1);
if (!avail) {
fprintf(stderr, Name ": out of memory.\n");
exit(1);
}
- memset(avail, 0, info.array.raid_disks);
*availp = avail;
+
+ best = calloc(raid_disks, sizeof(int));
+ devmap = calloc(raid_disks * numdevs, 1);
+
+ st->ss->getinfo_super(st, &info, devmap);
}
if (info.disk.state & (1<<MD_DISK_SYNC))
cnt++;
max_events = info.events;
avail[info.disk.raid_disk] = 2;
- st->ss->getinfo_super(st, bestinfo);
+ best[info.disk.raid_disk] = devnum;
+ st->ss->getinfo_super(st, bestinfo, NULL);
} else if (info.events == max_events) {
- cnt++;
avail[info.disk.raid_disk] = 2;
+ best[info.disk.raid_disk] = devnum;
} else if (info.events == max_events-1) {
- cnt1++;
- avail[info.disk.raid_disk] = 1;
+ if (avail[info.disk.raid_disk] == 0) {
+ avail[info.disk.raid_disk] = 1;
+ best[info.disk.raid_disk] = devnum;
+ }
} else if (info.events < max_events - 1)
;
else if (info.events == max_events+1) {
int i;
- cnt1 = cnt;
- cnt = 1;
max_events = info.events;
- for (i=0; i<info.array.raid_disks; i++)
+ for (i=0; i < raid_disks; i++)
if (avail[i])
avail[i]--;
avail[info.disk.raid_disk] = 2;
- st->ss->getinfo_super(st, bestinfo);
+ best[info.disk.raid_disk] = devnum;
+ st->ss->getinfo_super(st, bestinfo, NULL);
} else { /* info.events much bigger */
- cnt = 1; cnt1 = 0;
memset(avail, 0, info.disk.raid_disk);
max_events = info.events;
- st->ss->getinfo_super(st, bestinfo);
+ avail[info.disk.raid_disk] = 2;
+ st->ss->getinfo_super(st, bestinfo, NULL);
}
}
st->ss->free_super(st);
}
- return cnt + cnt1;
+ if (!avail)
+ return 0;
+ /* We need to reject any device that thinks the best device is
+ * failed or missing */
+ for (b = 0; b < raid_disks; b++)
+ if (avail[b] == 2)
+ break;
+ cnt = 0;
+ for (i = 0 ; i < raid_disks ; i++) {
+ if (i != b && avail[i])
+ if (devmap[raid_disks * best[i] + b] == 0) {
+ /* This device thinks 'b' is failed -
+ * don't use it */
+ devnum = best[i];
+ for (d=sra->devs ; devnum; d = d->next)
+ devnum--;
+ d->disk.state |= (1 << MD_DISK_REMOVED);
+ avail[i] = 0;
+ }
+ if (avail[i])
+ cnt++;
+ }
+ free(best);
+ free(devmap);
+ return cnt;
+}
+
+static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ struct map_ent *target, int bare,
+ struct supertype *st, int verbose)
+{
+ /* This device doesn't have any md metadata
+ * The device policy allows 'spare' and if !bare, it allows spare-same-slot.
+ * If 'st' is not set, then we only know that some metadata allows this,
+ * others possibly don't.
+ * So look for a container or array to attach the device to.
+ * Prefer 'target' if that is set and the array is found.
+ *
+ * If st is set, then only arrays of that type are considered
+ * Return 0 on success, or some exit code on failure, probably 1.
+ */
+ int rv = -1;
+ struct stat stb;
+ struct map_ent *mp, *map = NULL;
+ struct mdinfo *chosen = NULL;
+ int dfd = *dfdp;
+
+ if (fstat(dfd, &stb) != 0)
+ return 1;
+
+ /*
+ * Now we need to find a suitable array to add this to.
+ * We only accept arrays that:
+ * - match 'st'
+ * - are in the same domains as the device
+ * - are of an size for which the device will be useful
+ * and we choose the one that is the most degraded
+ */
+
+ if (map_lock(&map)) {
+ fprintf(stderr, Name ": failed to get exclusive lock on "
+ "mapfile\n");
+ return 1;
+ }
+ for (mp = map ; mp ; mp = mp->next) {
+ struct supertype *st2;
+ struct domainlist *dl = NULL;
+ struct mdinfo *sra;
+ unsigned long long devsize;
+
+ if (is_subarray(mp->metadata))
+ continue;
+ if (st) {
+ st2 = st->ss->match_metadata_desc(mp->metadata);
+ if (!st2 ||
+ (st->minor_version >= 0 &&
+ st->minor_version != st2->minor_version)) {
+ if (verbose > 1)
+ fprintf(stderr, Name ": not adding %s to %s as metadata type doesn't match\n",
+ devname, mp->path);
+ free(st2);
+ continue;
+ }
+ free(st2);
+ }
+ sra = sysfs_read(-1, mp->devnum,
+ GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
+ GET_DEGRADED|GET_COMPONENT|GET_VERSION);
+ if (!sra) {
+ /* Probably a container - no degraded info */
+ sra = sysfs_read(-1, mp->devnum,
+ GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
+ GET_COMPONENT|GET_VERSION);
+ if (sra)
+ sra->array.failed_disks = 0;
+ }
+ if (!sra)
+ continue;
+ if (st == NULL) {
+ int i;
+ st2 = NULL;
+ for(i=0; !st2 && superlist[i]; i++)
+ st2 = superlist[i]->match_metadata_desc(
+ sra->text_version);
+ if (!st2) {
+ if (verbose > 1)
+ fprintf(stderr, Name ": not adding %s to %s"
+ " as metadata not recognised.\n",
+ devname, mp->path);
+ goto next;
+ }
+ /* Need to double check the 'act_spare' permissions applies
+ * to this metadata.
+ */
+ if (!policy_action_allows(pol, st2->ss->name, act_spare))
+ goto next;
+ if (!bare && !policy_action_allows(pol, st2->ss->name,
+ act_spare_same_slot))
+ goto next;
+ } else
+ st2 = st;
+ get_dev_size(dfd, NULL, &devsize);
+ if (st2->ss->avail_size(st2, devsize) < sra->component_size) {
+ if (verbose > 1)
+ fprintf(stderr, Name ": not adding %s to %s as it is too small\n",
+ devname, mp->path);
+ goto next;
+ }
+ dl = domain_from_array(sra, st2->ss->name);
+ if (!domain_test(dl, pol, st2->ss->name)) {
+ /* domain test fails */
+ if (verbose > 1)
+ fprintf(stderr, Name ": not adding %s to %s as it is not in a compatible domain\n",
+ devname, mp->path);
+
+ goto next;
+ }
+ /* test against target.
+ * If 'target' is set and 'bare' is false, we only accept
+ * arrays/containers that match 'target'.
+ * If 'target' is set and 'bare' is true, we prefer the
+ * array which matches 'target'.
+ */
+ if (target) {
+ if (strcmp(target->metadata, mp->metadata) == 0 &&
+ memcmp(target->uuid, mp->uuid,
+ sizeof(target->uuid)) == 0) {
+ /* This is our target!! */
+ if (chosen)
+ sysfs_free(chosen);
+ chosen = sra;
+ sra = NULL;
+ /* skip to end so we don't check any more */
+ while (mp->next)
+ mp = mp->next;
+ goto next;
+ }
+ /* not our target */
+ if (!bare)
+ goto next;
+ }
+
+ /* all tests passed, OK to add to this array */
+ if (!chosen) {
+ chosen = sra;
+ sra = NULL;
+ } else if (chosen->array.failed_disks < sra->array.failed_disks) {
+ sysfs_free(chosen);
+ chosen = sra;
+ sra = NULL;
+ }
+ next:
+ if (sra)
+ sysfs_free(sra);
+ if (st != st2)
+ free(st2);
+ if (dl)
+ domain_free(dl);
+ }
+ if (chosen) {
+ /* add current device to chosen array as a spare */
+ int mdfd = open_dev(devname2devnum(chosen->sys_name));
+ if (mdfd >= 0) {
+ struct mddev_dev devlist;
+ char devname[20];
+ devlist.next = NULL;
+ devlist.used = 0;
+ devlist.re_add = 0;
+ devlist.writemostly = 0;
+ devlist.devname = devname;
+ sprintf(devname, "%d:%d", major(stb.st_rdev),
+ minor(stb.st_rdev));
+ devlist.disposition = 'a';
+ close(dfd);
+ *dfdp = -1;
+ rv = Manage_subdevs(chosen->sys_name, mdfd, &devlist,
+ -1, 0, NULL);
+ close(mdfd);
+ }
+ if (verbose > 0) {
+ if (rv == 0)
+ fprintf(stderr, Name ": added %s as spare for %s\n",
+ devname, chosen->sys_name);
+ else
+ fprintf(stderr, Name ": failed to add %s as spare for %s\n",
+ devname, chosen->sys_name);
+ }
+ sysfs_free(chosen);
+ }
+ return rv ? 0 : 1;
+}
+
+static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ struct supertype *st, int verbose)
+{
+ /* we know that at least one partition virtual-metadata is
+ * allowed to incorporate spares like this device. We need to
+ * find a suitable device to copy partition information from.
+ *
+ * Getting a list of all disk (not partition) devices is
+ * slightly non-trivial. We could look at /sys/block, but
+ * that is theoretically due to be removed. Maybe best to use
+ * /dev/disk/by-path/?* and ignore names ending '-partNN' as
+ * we depend on this directory of 'path' info. But that fails
+ * to find loop devices and probably others. Maybe don't
+ * worry about that, they aren't the real target.
+ *
+ * So: check things in /dev/disk/by-path to see if they are in
+ * a compatible domain, then load the partition table and see
+ * if it is OK for the new device, and choose the largest
+ * partition table that fits.
+ */
+ DIR *dir;
+ struct dirent *de;
+ char *chosen = NULL;
+ unsigned long long chosen_size;
+ struct supertype *chosen_st = NULL;
+ int fd;
+
+ dir = opendir("/dev/disk/by-path");
+ if (!dir)
+ return 1;
+ while ((de = readdir(dir)) != NULL) {
+ char *ep;
+ struct dev_policy *pol2 = NULL;
+ struct domainlist *domlist = NULL;
+ int fd = -1;
+ struct mdinfo info;
+ struct supertype *st2 = NULL;
+ char *devname = NULL;
+ unsigned long long devsectors;
+
+ if (de->d_ino == 0 ||
+ de->d_name[0] == '.' ||
+ (de->d_type != DT_LNK && de->d_type != DT_UNKNOWN))
+ goto next;
+
+ ep = de->d_name + strlen(de->d_name);
+ while (ep > de->d_name &&
+ isdigit(ep[-1]))
+ ep--;
+ if (ep > de->d_name + 5 &&
+ strncmp(ep-5, "-part", 5) == 0)
+ /* This is a partition - skip it */
+ goto next;
+
+ pol2 = path_policy(de->d_name, type_disk);
+
+ domain_merge(&domlist, pol2, st ? st->ss->name : NULL);
+ if (domain_test(domlist, pol, st ? st->ss->name : NULL) == 0)
+ /* new device is incompatible with this device. */
+ goto next;
+
+ domain_free(domlist);
+ domlist = NULL;
+
+ asprintf(&devname, "/dev/disk/by-path/%s", de->d_name);
+ fd = open(devname, O_RDONLY);
+ if (fd < 0)
+ goto next;
+ if (get_dev_size(fd, devname, &devsectors) == 0)
+ goto next;
+ devsectors >>= 9;
+
+ if (st)
+ st2 = dup_super(st);
+ else
+ st2 = guess_super_type(fd, guess_partitions);
+ if (st2 == NULL ||
+ st2->ss->load_super(st2, fd, NULL) < 0)
+ goto next;
+
+ if (!st) {
+ /* Check domain policy again, this time referring to metadata */
+ domain_merge(&domlist, pol2, st2->ss->name);
+ if (domain_test(domlist, pol, st2->ss->name) == 0)
+ /* Incompatible devices for this metadata type */
+ goto next;
+ if (!policy_action_allows(pol, st2->ss->name, act_spare))
+ /* Some partition types allow sparing, but not
+ * this one.
+ */
+ goto next;
+ }
+
+ st2->ss->getinfo_super(st2, &info, NULL);
+ if (info.component_size > devsectors)
+ /* This partitioning doesn't fit in the device */
+ goto next;
+
+ /* This is an acceptable device to copy partition
+ * metadata from. We could just stop here, but I
+ * think I want to keep looking incase a larger
+ * metadata which makes better use of the device can
+ * be found.
+ */
+ if (chosen == NULL ||
+ chosen_size < info.component_size) {
+ chosen_size = info.component_size;
+ free(chosen);
+ chosen = devname;
+ devname = NULL;
+ if (chosen_st) {
+ chosen_st->ss->free_super(chosen_st);
+ free(chosen_st);
+ }
+ chosen_st = st2;
+ st2 = NULL;
+ }
+
+ next:
+ free(devname);
+ domain_free(domlist);
+ dev_policy_free(pol2);
+ if (st2)
+ st2->ss->free_super(st2);
+ free(st2);
+
+ if (fd >= 0)
+ close(fd);
+ }
+
+ if (!chosen)
+ return 1;
+
+ /* 'chosen' is the best device we can find. Let's write its
+ * metadata to devname dfd is read-only so don't use that
+ */
+ fd = open(devname, O_RDWR);
+ if (fd >= 0) {
+ chosen_st->ss->store_super(chosen_st, fd);
+ close(fd);
+ }
+ free(chosen);
+ chosen_st->ss->free_super(chosen_st);
+ free(chosen_st);
+ return 0;
+}
+
+static int is_bare(int dfd)
+{
+ unsigned long long size = 0;
+ char bufpad[4096 + 4096];
+ char *buf = (char*)(((long)bufpad + 4096) & ~4095);
+
+ if (lseek(dfd, 0, SEEK_SET) != 0 ||
+ read(dfd, buf, 4096) != 4096)
+ return 0;
+
+ if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
+ return 0;
+ if (memcmp(buf, buf+1, 4095) != 0)
+ return 0;
+
+ /* OK, first 4K appear blank, try the end. */
+ get_dev_size(dfd, NULL, &size);
+ if (lseek(dfd, size-4096, SEEK_SET) < 0 ||
+ read(dfd, buf, 4096) != 4096)
+ return 0;
+
+ if (buf[0] != '\0' && buf[0] != '\x5a' && buf[0] != '\xff')
+ return 0;
+ if (memcmp(buf, buf+1, 4095) != 0)
+ return 0;
+
+ return 1;
+}
+
+/* adding a spare to a regular array is quite different from adding one to
+ * a set-of-partitions virtual array.
+ * This function determines which is worth trying and tries as appropriate.
+ * Arrays are given priority over partitions.
+ */
+static int try_spare(char *devname, int *dfdp, struct dev_policy *pol,
+ struct map_ent *target,
+ struct supertype *st, int verbose)
+{
+ int i;
+ int rv;
+ int arrays_ok = 0;
+ int partitions_ok = 0;
+ int dfd = *dfdp;
+ int bare;
+
+ /* Can only add a spare if device has at least one domain */
+ if (pol_find(pol, pol_domain) == NULL)
+ return 1;
+ /* And only if some action allows spares */
+ if (!policy_action_allows(pol, st?st->ss->name:NULL, act_spare))
+ return 1;
+
+ /* Now check if the device is bare.
+ * bare devices can always be added as a spare
+ * non-bare devices can only be added if spare-same-slot is permitted,
+ * and this device is replacing a previous device - in which case 'target'
+ * will be set.
+ */
+ if (!is_bare(dfd)) {
+ /* Must have a target and allow same_slot */
+ /* Later - may allow force_spare without target */
+ if (!target ||
+ !policy_action_allows(pol, st?st->ss->name:NULL,
+ act_spare_same_slot)) {
+ if (verbose > 1)
+ fprintf(stderr, Name ": %s is not bare, so not "
+ "considering as a spare\n",
+ devname);
+ return 1;
+ }
+ bare = 0;
+ } else
+ bare = 1;
+
+ /* It might be OK to add this device to an array - need to see
+ * what arrays might be candidates.
+ */
+ if (st) {
+ /* just try try 'array' or 'partition' based on this metadata */
+ if (st->ss->add_to_super)
+ return array_try_spare(devname, dfdp, pol, target, bare,
+ st, verbose);
+ else
+ return partition_try_spare(devname, dfdp, pol,
+ st, verbose);
+ }
+ /* No metadata was specified or found so options are open.
+ * Check for whether any array metadata, or any partition metadata
+ * might allow adding the spare. This check is just help to avoid
+ * a more costly scan of all arrays when we can be sure that will
+ * fail.
+ */
+ for (i = 0; (!arrays_ok || !partitions_ok) && superlist[i] ; i++) {
+ if (superlist[i]->add_to_super && !arrays_ok &&
+ policy_action_allows(pol, superlist[i]->name, act_spare))
+ arrays_ok = 1;
+ if (superlist[i]->add_to_super == NULL && !partitions_ok &&
+ policy_action_allows(pol, superlist[i]->name, act_spare))
+ partitions_ok = 1;
+ }
+ rv = 1;
+ if (arrays_ok)
+ rv = array_try_spare(devname, dfdp, pol, target, bare,
+ st, verbose);
+ if (rv != 0 && partitions_ok)
+ rv = partition_try_spare(devname, dfdp, pol, st, verbose);
+ return rv;
}
int IncrementalScan(int verbose)
*/
struct map_ent *mapl = NULL;
struct map_ent *me;
- mddev_ident_t devs, mddev;
+ struct mddev_ident *devs, *mddev;
int rv = 0;
map_read(&mapl);
return mdname;
}
-int Incremental_container(struct supertype *st, char *devname, int verbose,
- int runstop, int autof, int trustworthy)
+static int Incremental_container(struct supertype *st, char *devname,
+ char *homehost, int verbose,
+ int runstop, int autof)
{
/* Collect the contents of this container and for each
* array, choose a device name and assemble the array.
*/
- struct mdinfo *list = st->ss->container_content(st);
+ struct mdinfo *list;
struct mdinfo *ra;
struct map_ent *map = NULL;
+ struct mdinfo info;
+ int trustworthy;
+ struct mddev_ident *match;
+ int rv = 0;
+ memset(&info, 0, sizeof(info));
+ st->ss->getinfo_super(st, &info, NULL);
+
+ if ((runstop > 0 && info.container_enough >= 0) ||
+ info.container_enough > 0)
+ /* pass */;
+ else {
+ if (verbose)
+ fprintf(stderr, Name ": not enough devices to start the container\n");
+ return 0;
+ }
+
+ match = search_mdstat(st, &info, devname, verbose, &rv);
+ if (match == NULL && rv == 2)
+ return rv;
+
+ /* Need to compute 'trustworthy' */
+ if (match)
+ trustworthy = LOCAL;
+ else if (st->ss->match_home(st, homehost) == 1)
+ trustworthy = LOCAL;
+ else if (st->ss->match_home(st, "any") == 1)
+ trustworthy = LOCAL;
+ else
+ trustworthy = FOREIGN;
+
+ list = st->ss->container_content(st, NULL);
if (map_lock(&map))
fprintf(stderr, Name ": failed to get exclusive lock on "
"mapfile\n");
int mdfd;
char chosen_name[1024];
struct map_ent *mp;
- struct mddev_ident_s *match = NULL;
+ struct mddev_ident *match = NULL;
mp = map_by_uuid(&map, ra->uuid);
* member == ra->text_version after second slash.
*/
char *sub = strchr(ra->text_version+1, '/');
- struct mddev_ident_s *array_list;
+ struct mddev_ident *array_list;
if (sub) {
sub++;
array_list = conf_get_ident(NULL);
* raid arrays, and if so first fail (if needed) and then remove the device.
*
* @devname - The device we want to remove
+ * @id_path - name as found in /dev/disk/by-path for this device
*
* Note: the device name must be a kernel name like "sda", so
* that we can find it in /proc/mdstat
*/
-int IncrementalRemove(char *devname, int verbose)
+int IncrementalRemove(char *devname, char *id_path, int verbose)
{
int mdfd;
int rv;
struct mdstat_ent *ent;
- struct mddev_dev_s devlist;
+ struct mddev_dev devlist;
+
+ if (!id_path)
+ dprintf(Name ": incremental removal without --path <id_path> "
+ "lacks the possibility to re-add new device in this "
+ "port\n");
if (strchr(devname, '/')) {
fprintf(stderr, Name ": incremental removal requires a "
mdfd = open_dev(ent->devnum);
if (mdfd < 0) {
fprintf(stderr, Name ": Cannot open array %s!!\n", ent->dev);
+ free_mdstat(ent);
return 1;
}
+
+ if (id_path) {
+ struct map_ent *map = NULL, *me;
+ me = map_by_devnum(&map, ent->devnum);
+ if (me)
+ policy_save_path(id_path, me);
+ map_free(map);
+ }
+
memset(&devlist, 0, sizeof(devlist));
devlist.devname = devname;
devlist.disposition = 'f';
- Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
+ /* for a container, we must fail each member array */
+ if (ent->metadata_version &&
+ strncmp(ent->metadata_version, "external:", 9) == 0) {
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+ struct mdstat_ent *memb;
+ for (memb = mdstat ; memb ; memb = memb->next)
+ if (is_container_member(memb, ent->dev)) {
+ int subfd = open_dev(memb->devnum);
+ if (subfd >= 0) {
+ Manage_subdevs(memb->dev, subfd,
+ &devlist, verbose, 0,
+ NULL);
+ close(subfd);
+ }
+ }
+ free_mdstat(mdstat);
+ } else
+ Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
devlist.disposition = 'r';
- rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0);
+ rv = Manage_subdevs(ent->dev, mdfd, &devlist, verbose, 0, NULL);
close(mdfd);
+ free_mdstat(ent);
return rv;
}
}
if (st == NULL)
st = guess_super(fd);
- if (st == NULL) {
+ if (st == NULL || st->ss->init_super == NULL) {
if (!quiet)
fprintf(stderr, Name ": Unrecognised md component device - %s\n", dev);
close(fd);
memset(st, 0, sizeof(*st));
- if (snprintf(st->subarray, sizeof(st->subarray), "%s", subarray) >=
- (int)sizeof(st->subarray)) {
- if (!quiet)
- fprintf(stderr,
- Name ": Input overflow for subarray '%s' > %zu bytes\n",
- subarray, sizeof(st->subarray) - 1);
- return 2;
- }
-
- fd = open_subarray(dev, st, quiet);
+ fd = open_subarray(dev, subarray, st, quiet);
if (fd < 0)
return 2;
MAP_DIR=/dev/.mdadm
MAP_FILE = map
MDMON_DIR = /dev/.mdadm
+# place for autoreplace cookies
+FAILED_SLOTS_DIR = /dev/.mdadm/failed-slots
DIRFLAGS = -DMAP_DIR=\"$(MAP_DIR)\" -DMAP_FILE=\"$(MAP_FILE)\"
DIRFLAGS += -DMDMON_DIR=\"$(MDMON_DIR)\"
+DIRFLAGS += -DFAILED_SLOTS_DIR=\"$(FAILED_SLOTS_DIR)\"
CFLAGS = $(CWFLAGS) $(CXFLAGS) -DSendmail=\""$(MAILCMD)"\" $(CONFFILEFLAGS) $(DIRFLAGS)
# The glibc TLS ABI requires applications that call clone(2) to set up
MAN5DIR = $(MANDIR)/man5
MAN8DIR = $(MANDIR)/man8
-OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \
+OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \
Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
Incremental.o \
mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
+ super-mbr.o super-gpt.o \
restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o \
platform-intel.o probe_roms.o
-SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \
+SRCS = mdadm.c config.c policy.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \
Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \
Incremental.c \
mdopen.c super0.c super1.c super-ddf.c super-intel.c bitmap.c \
+ super-mbr.c super-gpt.c \
restripe.c sysfs.c sha1.c mapfile.c crc32.c sg_io.c msg.c \
platform-intel.c probe_roms.c
-MON_OBJS = mdmon.o monitor.o managemon.o util.o mdstat.o sysfs.o config.o \
+INCL = mdadm.h part.h bitmap.h
+
+MON_OBJS = mdmon.o monitor.o managemon.o util.o mdstat.o sysfs.o config.o policy.o \
Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \
+ super-mbr.o super-gpt.o \
super-ddf.o sha1.o crc32.o msg.o bitmap.o \
platform-intel.o probe_roms.o
-MON_SRCS = mdmon.c monitor.c managemon.c util.c mdstat.c sysfs.c config.c \
+MON_SRCS = mdmon.c monitor.c managemon.c util.c mdstat.c sysfs.c config.c policy.c \
Kill.c sg_io.c dlink.c ReadMe.c super0.c super1.c super-intel.c \
+ super-mbr.c super-gpt.c \
super-ddf.c sha1.c crc32.c msg.c bitmap.c \
platform-intel.c probe_roms.c
STATICSRC = pwgr.c
STATICOBJS = pwgr.o
-ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c dlink.c util.c \
+ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \
super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \
- platform-intel.c probe_roms.c sysfs.c
+ platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c
ASSEMBLE_AUTO_SRCS := mdopen.c
ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
ifdef MDASSEMBLE_AUTO
mdadm.static : $(OBJS) $(STATICOBJS)
$(CC) $(LDFLAGS) -static -o mdadm.static $(OBJS) $(STATICOBJS)
-mdadm.tcc : $(SRCS) mdadm.h
+mdadm.tcc : $(SRCS) $(INCL)
$(TCC) -o mdadm.tcc $(SRCS)
-mdadm.klibc : $(SRCS) mdadm.h
+mdadm.klibc : $(SRCS) $(INCL)
rm -f $(OBJS)
$(CC) -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) $(SRCS)
-mdadm.Os : $(SRCS) mdadm.h
+mdadm.Os : $(SRCS) $(INCL)
$(CC) -o mdadm.Os $(CFLAGS) $(LDFLAGS) -DHAVE_STDINT_H -Os $(SRCS)
-mdadm.O2 : $(SRCS) mdadm.h mdmon.O2
+mdadm.O2 : $(SRCS) $(INCL) mdmon.O2
$(CC) -o mdadm.O2 $(CFLAGS) $(LDFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(SRCS)
-mdmon.O2 : $(MON_SRCS) mdadm.h mdmon.h
+mdmon.O2 : $(MON_SRCS) $(INCL) mdmon.h
$(CC) -o mdmon.O2 $(CFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(MON_SRCS)
# use '-z now' to guarantee no dynamic linker interactions with the monitor thread
test_stripe : restripe.c mdadm.h
$(CC) $(CXFLAGS) $(LDFLAGS) -o test_stripe -DMAIN restripe.c
-mdassemble : $(ASSEMBLE_SRCS) mdadm.h
+mdassemble : $(ASSEMBLE_SRCS) $(INCL)
rm -f $(OBJS)
$(DIET_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS) $(STATICSRC)
-mdassemble.static : $(ASSEMBLE_SRCS) mdadm.h
+mdassemble.static : $(ASSEMBLE_SRCS) $(INCL)
rm -f $(OBJS)
$(CC) $(LDFLAGS) $(ASSEMBLE_FLAGS) -static -DHAVE_STDINT_H -o mdassemble.static $(ASSEMBLE_SRCS) $(STATICSRC)
-mdassemble.auto : $(ASSEMBLE_SRCS) mdadm.h $(ASSEMBLE_AUTO_SRCS)
+mdassemble.auto : $(ASSEMBLE_SRCS) $(INCL) $(ASSEMBLE_AUTO_SRCS)
rm -f mdassemble.static
$(MAKE) MDASSEMBLE_AUTO=1 mdassemble.static
mv mdassemble.static mdassemble.auto
-mdassemble.uclibc : $(ASSEMBLE_SRCS) mdadm.h
+mdassemble.uclibc : $(ASSEMBLE_SRCS) $(INCL)
rm -f $(OJS)
$(UCLIBC_GCC) $(ASSEMBLE_FLAGS) -DUCLIBC -DHAVE_STDINT_H -static -o mdassemble.uclibc $(ASSEMBLE_SRCS) $(STATICSRC)
# This doesn't work
-mdassemble.klibc : $(ASSEMBLE_SRCS) mdadm.h
+mdassemble.klibc : $(ASSEMBLE_SRCS) $(INCL)
rm -f $(OBJS)
$(KLIBC_GCC) $(ASSEMBLE_FLAGS) -o mdassemble $(ASSEMBLE_SRCS)
mdassemble.man : mdassemble.8
nroff -man mdassemble.8 > mdassemble.man
-$(OBJS) : mdadm.h mdmon.h bitmap.h
-$(MON_OBJS) : mdadm.h mdmon.h bitmap.h
+$(OBJS) : $(INCL) mdmon.h
+$(MON_OBJS) : $(INCL) mdmon.h
sha1.o : sha1.c sha1.h md5.h
$(CC) $(CFLAGS) -DHAVE_STDINT_H -o sha1.o -c sha1.c
mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
if (mdi &&
mdi->array.major_version == -1 &&
- mdi->array.level > 0 &&
is_subarray(mdi->text_version)) {
char vers[64];
strcpy(vers, "external:");
if (*cp)
*cp = 0;
ping_monitor(vers+10);
+ if (mdi->array.level <= 0)
+ sysfs_set_str(mdi, NULL, "array_state", "active");
}
return 0;
}
}
int Manage_subdevs(char *devname, int fd,
- mddev_dev_t devlist, int verbose, int test)
+ struct mddev_dev *devlist, int verbose, int test,
+ char *update)
{
/* do something to each dev.
* devmode can be
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
*/
- mddev_dev_t add_devlist = NULL;
+ struct mddev_dev *add_devlist = NULL;
mdu_array_info_t array;
mdu_disk_info_t disc;
unsigned long long array_size;
- mddev_dev_t dv, next = NULL;
+ struct mddev_dev *dv, *next = NULL;
struct stat stb;
int j, jnext = 0;
int tfd = -1;
struct supertype *st, *tst;
+ char *subarray = NULL;
int duuid[4];
int ouuid[4];
int lfd = -1;
if (array_size <= 0)
array_size = array.size * 2;
- tst = super_by_fd(fd);
+ tst = super_by_fd(fd, &subarray);
if (!tst) {
fprintf(stderr, Name ": unsupport array - version %d.%d\n",
array.major_version, array.minor_version);
char *dnprintable = dv->devname;
char *add_dev = dv->devname;
int err;
+ int re_add_failed = 0;
next = dv->next;
jnext = 0;
return 1;
case 'a':
/* add the device */
- if (tst->subarray[0]) {
+ if (subarray) {
fprintf(stderr, Name ": Cannot add disks to a"
" \'member\' array, perform this"
" operation on the parent container\n");
if (tst->sb)
/* already loaded */;
else if (tst->ss->external) {
- tst->ss->load_super(tst, fd, NULL);
+ tst->ss->load_container(tst, fd, NULL);
} else for (j = 0; j < tst->max_devs; j++) {
char *dev;
int dfd;
get_linux_version() <= 2006018)
;
else if (st->sb) {
+ struct mdinfo mdi;
+ st->ss->getinfo_super(st, &mdi, NULL);
st->ss->uuid_from_super(st, ouuid);
- if (memcmp(duuid, ouuid, sizeof(ouuid))==0) {
- /* looks close enough for now. Kernel
- * will worry about whether a bitmap
- * based reconstruction is possible.
+ if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
+ !(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
+ memcmp(duuid, ouuid, sizeof(ouuid))==0) {
+ /* look like it is worth a try. Need to
+ * make sure kernel will accept it though.
*/
- struct mdinfo mdi;
- st->ss->getinfo_super(st, &mdi);
+ disc.number = mdi.disk.number;
+ if (ioctl(fd, GET_DISK_INFO, &disc) != 0
+ || disc.major != 0 || disc.minor != 0
+ || !enough_fd(fd))
+ goto skip_re_add;
disc.major = major(stb.st_rdev);
disc.minor = minor(stb.st_rdev);
disc.number = mdi.disk.number;
remove_partitions(tfd);
close(tfd);
tfd = -1;
+ if (update) {
+ int rv = -1;
+ tfd = dev_open(dv->devname, O_RDWR);
+
+ if (tfd >= 0)
+ rv = st->ss->update_super(
+ st, NULL, update,
+ devname, verbose, 0, NULL);
+ if (rv == 0)
+ rv = tst->ss->store_super(st, tfd);
+ close(tfd);
+ tfd = -1;
+ if (rv != 0) {
+ fprintf(stderr, Name ": failed to update"
+ " superblock during re-add\n");
+ return 1;
+ }
+ }
/* don't even try if disk is marked as faulty */
errno = 0;
- if ((disc.state & 1) == 0 &&
- ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
+ if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
if (verbose >= 0)
fprintf(stderr, Name ": re-added %s\n", add_dev);
count++;
continue;
return 1;
}
- /* fall back on normal-add */
+ skip_re_add:
+ re_add_failed = 1;
}
}
if (add_dev != dv->devname) {
dv->devname, devname);
return 1;
}
+ if (re_add_failed) {
+ fprintf(stderr, Name ": %s reports being an active member for %s, but a --re-add fails.\n",
+ dv->devname, devname);
+ fprintf(stderr, Name ": not performing --add as that would convert %s in to a spare.\n",
+ dv->devname);
+ fprintf(stderr, Name ": To make this a spare, use \"mdadm --zero-superblock %s\" first.\n",
+ dv->devname);
+ if (tfd >= 0)
+ close(tfd);
+ return 1;
+ }
} else {
/* non-persistent. Must ensure that new drive
* is at least array.size big.
}
sra->array.level = LEVEL_CONTAINER;
/* Need to set data_offset and component_size */
- tst->ss->getinfo_super(tst, &new_mdi);
+ tst->ss->getinfo_super(tst, &new_mdi, NULL);
new_mdi.disk.major = disc.major;
new_mdi.disk.minor = disc.minor;
new_mdi.recovery_start = 0;
case 'r':
/* hot remove */
- if (tst->subarray[0]) {
+ if (subarray) {
fprintf(stderr, Name ": Cannot remove disks from a"
" \'member\' array, perform this"
" operation on the parent container\n");
return rv;
}
-int Update_subarray(char *dev, char *subarray, char *update, mddev_ident_t ident, int quiet)
+int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int quiet)
{
struct supertype supertype, *st = &supertype;
int fd, rv = 2;
memset(st, 0, sizeof(*st));
- if (snprintf(st->subarray, sizeof(st->subarray), "%s", subarray) >=
- (signed)sizeof(st->subarray)) {
- if (!quiet)
- fprintf(stderr,
- Name ": Input overflow for subarray '%s' > %zu bytes\n",
- subarray, sizeof(st->subarray) - 1);
- return 2;
- }
- fd = open_subarray(dev, st, quiet);
+ fd = open_subarray(dev, subarray, st, quiet);
if (fd < 0)
return 2;
if (mdmon_running(st->devnum))
st->update_tail = &st->updates;
- rv = st->ss->update_subarray(st, update, ident);
+ rv = st->ss->update_subarray(st, subarray, update, ident);
if (rv) {
if (!quiet)
#include <limits.h>
#include <syslog.h>
-static void alert(char *event, char *dev, char *disc, char *mailaddr, char *mailfrom,
- char *cmd, int dosyslog);
-
/* The largest number of disks current arrays can manage is 384
* This really should be dynamically, but that will have to wait
* At least it isn't MD_SB_DISKS.
*/
#define MaxDisks 384
-int Monitor(mddev_dev_t devlist,
+struct state {
+ char *devname;
+ int devnum; /* to sync with mdstat info */
+ long utime;
+ int err;
+ char *spare_group;
+ int active, working, failed, spare, raid;
+ int expected_spares;
+ int devstate[MaxDisks];
+ dev_t devid[MaxDisks];
+ int percent;
+ int parent_dev; /* For subarray, devnum of parent.
+ * For others, NoMdDev
+ */
+ struct supertype *metadata;
+ struct state *subarray;/* for a container it is a link to first subarray
+ * for a subarray it is a link to next subarray
+ * in the same container */
+ struct state *parent; /* for a subarray it is a link to its container
+ */
+ struct state *next;
+};
+
+struct alert_info {
+ char *mailaddr;
+ char *mailfrom;
+ char *alert_cmd;
+ int dosyslog;
+};
+static int make_daemon(char *pidfile);
+static int check_one_sharer(int scan);
+static void alert(char *event, char *dev, char *disc, struct alert_info *info);
+static int check_array(struct state *st, struct mdstat_ent *mdstat,
+ int test, struct alert_info *info,
+ int increments);
+static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
+ int test, struct alert_info *info);
+static void try_spare_migration(struct state *statelist, struct alert_info *info);
+static void link_containers_with_subarrays(struct state *list);
+
+int Monitor(struct mddev_dev *devlist,
char *mailaddr, char *alert_cmd,
int period, int daemonise, int scan, int oneshot,
- int dosyslog, int test, char* pidfile, int increments)
+ int dosyslog, int test, char *pidfile, int increments,
+ int share)
{
/*
* Every few seconds, scan every md device looking for changes
* that appears in /proc/mdstat
*/
- struct state {
- char *devname;
- int devnum; /* to sync with mdstat info */
- long utime;
- int err;
- char *spare_group;
- int active, working, failed, spare, raid;
- int expected_spares;
- int devstate[MaxDisks];
- unsigned devid[MaxDisks];
- int percent;
- struct state *next;
- } *statelist = NULL;
+ struct state *statelist = NULL;
int finished = 0;
struct mdstat_ent *mdstat = NULL;
char *mailfrom = NULL;
+ struct alert_info info;
if (!mailaddr) {
mailaddr = conf_get_mailaddr();
fprintf(stderr, Name ": No mail address or alert command - not monitoring.\n");
return 1;
}
+ info.alert_cmd = alert_cmd;
+ info.mailaddr = mailaddr;
+ info.mailfrom = mailfrom;
+ info.dosyslog = dosyslog;
- if (daemonise) {
- int pid = fork();
- if (pid > 0) {
- if (!pidfile)
- printf("%d\n", pid);
- else {
- FILE *pid_file;
- pid_file=fopen(pidfile, "w");
- if (!pid_file)
- perror("cannot create pid file");
- else {
- fprintf(pid_file,"%d\n", pid);
- fclose(pid_file);
- }
- }
- return 0;
- }
- if (pid < 0) {
- perror("daemonise");
+ if (daemonise)
+ if (make_daemon(pidfile))
+ return 1;
+
+ if (share)
+ if (check_one_sharer(scan))
return 1;
- }
- close(0);
- open("/dev/null", O_RDWR);
- dup2(0,1);
- dup2(0,2);
- setsid();
- }
if (devlist == NULL) {
- mddev_ident_t mdlist = conf_get_ident(NULL);
+ struct mddev_ident *mdlist = conf_get_ident(NULL);
for (; mdlist; mdlist=mdlist->next) {
struct state *st;
if (mdlist->devname == NULL)
continue;
if (strcasecmp(mdlist->devname, "<ignore>") == 0)
continue;
- st = malloc(sizeof *st);
+ st = calloc(1, sizeof *st);
if (st == NULL)
continue;
if (mdlist->devname[0] == '/')
strcpy(strcpy(st->devname, "/dev/md/"),
mdlist->devname);
}
- st->utime = 0;
st->next = statelist;
- st->err = 0;
st->devnum = INT_MAX;
st->percent = -2;
st->expected_spares = mdlist->spare_disks;
if (mdlist->spare_group)
st->spare_group = strdup(mdlist->spare_group);
- else
- st->spare_group = NULL;
statelist = st;
}
} else {
- mddev_dev_t dv;
+ struct mddev_dev *dv;
for (dv=devlist ; dv; dv=dv->next) {
- mddev_ident_t mdlist = conf_get_ident(dv->devname);
- struct state *st = malloc(sizeof *st);
+ struct mddev_ident *mdlist = conf_get_ident(dv->devname);
+ struct state *st = calloc(1, sizeof *st);
if (st == NULL)
continue;
st->devname = strdup(dv->devname);
- st->utime = 0;
st->next = statelist;
- st->err = 0;
st->devnum = INT_MAX;
st->percent = -2;
st->expected_spares = -1;
- st->spare_group = NULL;
if (mdlist) {
st->expected_spares = mdlist->spare_disks;
if (mdlist->spare_group)
while (! finished) {
int new_found = 0;
struct state *st;
+ int anydegraded = 0;
if (mdstat)
free_mdstat(mdstat);
mdstat = mdstat_read(oneshot?0:1, 0);
- for (st=statelist; st; st=st->next) {
- struct { int state, major, minor; } info[MaxDisks];
- mdu_array_info_t array;
- struct mdstat_ent *mse = NULL, *mse2;
- char *dev = st->devname;
- int fd;
- int i;
-
- if (test)
- alert("TestMessage", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
- fd = open(dev, O_RDONLY);
- if (fd < 0) {
- if (!st->err)
- alert("DeviceDisappeared", dev, NULL,
- mailaddr, mailfrom, alert_cmd, dosyslog);
-/* fprintf(stderr, Name ": cannot open %s: %s\n",
- dev, strerror(errno));
-*/ st->err=1;
- continue;
- }
- fcntl(fd, F_SETFD, FD_CLOEXEC);
- if (ioctl(fd, GET_ARRAY_INFO, &array)<0) {
- if (!st->err)
- alert("DeviceDisappeared", dev, NULL,
- mailaddr, mailfrom, alert_cmd, dosyslog);
-/* fprintf(stderr, Name ": cannot get array info for %s: %s\n",
- dev, strerror(errno));
-*/ st->err=1;
- close(fd);
- continue;
- }
- /* It's much easier to list what array levels can't
- * have a device disappear than all of them that can
- */
- if (array.level == 0 || array.level == -1) {
- if (!st->err)
- alert("DeviceDisappeared", dev, "Wrong-Level",
- mailaddr, mailfrom, alert_cmd, dosyslog);
- st->err = 1;
- close(fd);
- continue;
- }
- if (st->devnum == INT_MAX) {
- struct stat stb;
- if (fstat(fd, &stb) == 0 &&
- (S_IFMT&stb.st_mode)==S_IFBLK) {
- if (major(stb.st_rdev) == MD_MAJOR)
- st->devnum = minor(stb.st_rdev);
- else
- st->devnum = -1- (minor(stb.st_rdev)>>6);
- }
- }
-
- for (mse2 = mdstat ; mse2 ; mse2=mse2->next)
- if (mse2->devnum == st->devnum) {
- mse2->devnum = INT_MAX; /* flag it as "used" */
- mse = mse2;
- }
-
- if (array.utime == 0)
- /* external arrays don't update utime */
- array.utime = time(0);
-
- if (st->utime == array.utime &&
- st->failed == array.failed_disks &&
- st->working == array.working_disks &&
- st->spare == array.spare_disks &&
- (mse == NULL || (
- mse->percent == st->percent
- ))) {
- close(fd);
- st->err = 0;
- continue;
- }
- if (st->utime == 0 && /* new array */
- mse && /* is in /proc/mdstat */
- mse->pattern && strchr(mse->pattern, '_') /* degraded */
- )
- alert("DegradedArray", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
-
- if (st->utime == 0 && /* new array */
- st->expected_spares > 0 &&
- array.spare_disks < st->expected_spares)
- alert("SparesMissing", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
- if (mse &&
- st->percent == -1 &&
- mse->percent >= 0)
- alert("RebuildStarted", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
- if (mse &&
- st->percent >= 0 &&
- mse->percent >= 0 &&
- (mse->percent / increments) > (st->percent / increments)) {
- char percentalert[15]; // "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
-
- if((mse->percent / increments) == 0)
- snprintf(percentalert, sizeof(percentalert), "RebuildStarted");
- else
- snprintf(percentalert, sizeof(percentalert), "Rebuild%02d", mse->percent);
-
- alert(percentalert,
- dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
- }
-
- if (mse &&
- mse->percent == -1 &&
- st->percent >= 0) {
- /* Rebuild/sync/whatever just finished.
- * If there is a number in /mismatch_cnt,
- * we should report that.
- */
- struct mdinfo *sra =
- sysfs_read(-1, st->devnum, GET_MISMATCH);
- if (sra && sra->mismatch_cnt > 0) {
- char cnt[40];
- sprintf(cnt, " mismatches found: %d", sra->mismatch_cnt);
- alert("RebuildFinished", dev, cnt, mailaddr, mailfrom, alert_cmd, dosyslog);
- } else
- alert("RebuildFinished", dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
- if (sra)
- free(sra);
- }
-
- if (mse)
- st->percent = mse->percent;
-
-
- for (i=0; i<MaxDisks && i <= array.raid_disks + array.nr_disks;
- i++) {
- mdu_disk_info_t disc;
- disc.number = i;
- if (ioctl(fd, GET_DISK_INFO, &disc) >= 0) {
- info[i].state = disc.state;
- info[i].major = disc.major;
- info[i].minor = disc.minor;
- } else
- info[i].major = info[i].minor = 0;
- }
- close(fd);
-
- for (i=0; i<MaxDisks; i++) {
- mdu_disk_info_t disc = {0,0,0,0,0};
- int newstate=0;
- int change;
- char *dv = NULL;
- disc.number = i;
- if (i > array.raid_disks + array.nr_disks) {
- newstate = 0;
- disc.major = disc.minor = 0;
- } else if (info[i].major || info[i].minor) {
- newstate = info[i].state;
- dv = map_dev(info[i].major, info[i].minor, 1);
- disc.state = newstate;
- disc.major = info[i].major;
- disc.minor = info[i].minor;
- } else if (mse && mse->pattern && i < (int)strlen(mse->pattern)) {
- switch(mse->pattern[i]) {
- case 'U': newstate = 6 /* ACTIVE/SYNC */; break;
- case '_': newstate = 0; break;
- }
- disc.major = disc.minor = 0;
- }
- if (dv == NULL && st->devid[i])
- dv = map_dev(major(st->devid[i]),
- minor(st->devid[i]), 1);
- change = newstate ^ st->devstate[i];
- if (st->utime && change && !st->err) {
- if (i < array.raid_disks &&
- (((newstate&change)&(1<<MD_DISK_FAULTY)) ||
- ((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) ||
- ((st->devstate[i]&change)&(1<<MD_DISK_SYNC)))
- )
- alert("Fail", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
- else if (i >= array.raid_disks &&
- (disc.major || disc.minor) &&
- st->devid[i] == makedev(disc.major, disc.minor) &&
- ((newstate&change)&(1<<MD_DISK_FAULTY))
- )
- alert("FailSpare", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
- else if (i < array.raid_disks &&
- ! (newstate & (1<<MD_DISK_REMOVED)) &&
- (((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) ||
- ((newstate&change)&(1<<MD_DISK_ACTIVE)) ||
- ((newstate&change)&(1<<MD_DISK_SYNC)))
- )
- alert("SpareActive", dev, dv, mailaddr, mailfrom, alert_cmd, dosyslog);
- }
- st->devstate[i] = newstate;
- st->devid[i] = makedev(disc.major, disc.minor);
- }
- st->active = array.active_disks;
- st->working = array.working_disks;
- st->spare = array.spare_disks;
- st->failed = array.failed_disks;
- st->utime = array.utime;
- st->raid = array.raid_disks;
- st->err = 0;
- }
+ for (st=statelist; st; st=st->next)
+ if (check_array(st, mdstat, test, &info, increments))
+ anydegraded = 1;
+
/* now check if there are any new devices found in mdstat */
- if (scan) {
- struct mdstat_ent *mse;
- for (mse=mdstat; mse; mse=mse->next)
- if (mse->devnum != INT_MAX &&
- mse->level &&
- (strcmp(mse->level, "raid0")!=0 &&
- strcmp(mse->level, "linear")!=0)
- ) {
- struct state *st = malloc(sizeof *st);
- mdu_array_info_t array;
- int fd;
- if (st == NULL)
- continue;
- st->devname = strdup(get_md_name(mse->devnum));
- if ((fd = open(st->devname, O_RDONLY)) < 0 ||
- ioctl(fd, GET_ARRAY_INFO, &array)< 0) {
- /* no such array */
- if (fd >=0) close(fd);
- put_md_name(st->devname);
- free(st->devname);
- free(st);
- continue;
- }
- close(fd);
- st->utime = 0;
- st->next = statelist;
- st->err = 1;
- st->devnum = mse->devnum;
- st->percent = -2;
- st->spare_group = NULL;
- st->expected_spares = -1;
- statelist = st;
- if (test)
- alert("TestMessage", st->devname, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
- alert("NewArray", st->devname, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
- new_found = 1;
- }
- }
+ if (scan)
+ new_found = add_new_arrays(mdstat, &statelist, test,
+ &info);
+
/* If an array has active < raid && spare == 0 && spare_group != NULL
* Look for another array with spare > 0 and active == raid and same spare_group
* if found, choose a device and hotremove/hotadd
*/
- for (st = statelist; st; st=st->next)
- if (st->active < st->raid &&
- st->spare == 0 &&
- st->spare_group != NULL) {
- struct state *st2;
- for (st2=statelist ; st2 ; st2=st2->next)
- if (st2 != st &&
- st2->spare > 0 &&
- st2->active == st2->raid &&
- st2->spare_group != NULL &&
- strcmp(st->spare_group, st2->spare_group) == 0) {
- /* try to remove and add */
- int fd1 = open(st->devname, O_RDONLY);
- int fd2 = open(st2->devname, O_RDONLY);
- int dev = -1;
- int d;
- if (fd1 < 0 || fd2 < 0) {
- if (fd1>=0) close(fd1);
- if (fd2>=0) close(fd2);
- continue;
- }
- for (d=st2->raid; d < MaxDisks; d++) {
- if (st2->devid[d] > 0 &&
- st2->devstate[d] == 0) {
- dev = st2->devid[d];
- break;
- }
- }
- if (dev > 0) {
- struct mddev_dev_s devlist;
- char devname[20];
- devlist.next = NULL;
- devlist.used = 0;
- devlist.re_add = 0;
- devlist.writemostly = 0;
- devlist.devname = devname;
- sprintf(devname, "%d:%d", major(dev), minor(dev));
-
- devlist.disposition = 'r';
- if (Manage_subdevs(st2->devname, fd2, &devlist, -1, 0) == 0) {
- devlist.disposition = 'a';
- if (Manage_subdevs(st->devname, fd1, &devlist, -1, 0) == 0) {
- alert("MoveSpare", st->devname, st2->devname, mailaddr, mailfrom, alert_cmd, dosyslog);
- close(fd1);
- close(fd2);
- break;
- }
- else Manage_subdevs(st2->devname, fd2, &devlist, -1, 0);
- }
- }
- close(fd1);
- close(fd2);
- }
- }
+ if (share && anydegraded)
+ try_spare_migration(statelist, &info);
if (!new_found) {
if (oneshot)
break;
return 0;
}
+static int make_daemon(char *pidfile)
+{
+ int pid = fork();
+ if (pid > 0) {
+ if (!pidfile)
+ printf("%d\n", pid);
+ else {
+ FILE *pid_file;
+ pid_file=fopen(pidfile, "w");
+ if (!pid_file)
+ perror("cannot create pid file");
+ else {
+ fprintf(pid_file,"%d\n", pid);
+ fclose(pid_file);
+ }
+ }
+ return 0;
+ }
+ if (pid < 0) {
+ perror("daemonise");
+ return 1;
+ }
+ close(0);
+ open("/dev/null", O_RDWR);
+ dup2(0,1);
+ dup2(0,2);
+ setsid();
+ return 0;
+}
-static void alert(char *event, char *dev, char *disc, char *mailaddr, char *mailfrom, char *cmd,
- int dosyslog)
+static int check_one_sharer(int scan)
+{
+ int pid, rv;
+ FILE *fp;
+ char dir[20];
+ struct stat buf;
+ fp = fopen("/var/run/mdadm/autorebuild.pid", "r");
+ if (fp) {
+ fscanf(fp, "%d", &pid);
+ sprintf(dir, "/proc/%d", pid);
+ rv = stat(dir, &buf);
+ if (rv != -1) {
+ if (scan) {
+ fprintf(stderr, Name ": Only one "
+ "autorebuild process allowed"
+ " in scan mode, aborting\n");
+ fclose(fp);
+ return 1;
+ } else {
+ fprintf(stderr, Name ": Warning: One"
+ " autorebuild process already"
+ " running.\n");
+ }
+ }
+ fclose(fp);
+ }
+ if (scan) {
+ if (mkdir("/var/run/mdadm", S_IRWXU) < 0 &&
+ errno != EEXIST) {
+ fprintf(stderr, Name ": Can't create "
+ "autorebuild.pid file\n");
+ } else {
+ fp = fopen("/var/run/mdadm/autorebuild.pid", "w");
+ if (!fp)
+ fprintf(stderr, Name ": Cannot create"
+ " autorebuild.pid"
+ "file\n");
+ else {
+ pid = getpid();
+ fprintf(fp, "%d\n", pid);
+ fclose(fp);
+ }
+ }
+ }
+ return 0;
+}
+
+static void alert(char *event, char *dev, char *disc, struct alert_info *info)
{
int priority;
- if (!cmd && !mailaddr) {
+ if (!info->alert_cmd && !info->mailaddr) {
time_t now = time(0);
printf("%1.15s: %s on %s %s\n", ctime(&now)+4, event, dev, disc?disc:"unknown device");
}
- if (cmd) {
+ if (info->alert_cmd) {
int pid = fork();
switch(pid) {
default:
case -1:
break;
case 0:
- execl(cmd, cmd, event, dev, disc, NULL);
+ execl(info->alert_cmd, info->alert_cmd,
+ event, dev, disc, NULL);
exit(2);
}
}
- if (mailaddr &&
+ if (info->mailaddr &&
(strncmp(event, "Fail", 4)==0 ||
strncmp(event, "Test", 4)==0 ||
strncmp(event, "Spares", 6)==0 ||
char hname[256];
gethostname(hname, sizeof(hname));
signal(SIGPIPE, SIG_IGN);
- if (mailfrom)
- fprintf(mp, "From: %s\n", mailfrom);
+ if (info->mailfrom)
+ fprintf(mp, "From: %s\n", info->mailfrom);
else
fprintf(mp, "From: " Name " monitoring <root>\n");
- fprintf(mp, "To: %s\n", mailaddr);
- fprintf(mp, "Subject: %s event on %s:%s\n\n", event, dev, hname);
+ fprintf(mp, "To: %s\n", info->mailaddr);
+ fprintf(mp, "Subject: %s event on %s:%s\n\n",
+ event, dev, hname);
- fprintf(mp, "This is an automatically generated mail message from " Name "\n");
+ fprintf(mp,
+ "This is an automatically generated"
+ " mail message from " Name "\n");
fprintf(mp, "running on %s\n\n", hname);
- fprintf(mp, "A %s event had been detected on md device %s.\n\n", event, dev);
+ fprintf(mp,
+ "A %s event had been detected on"
+ " md device %s.\n\n", event, dev);
if (disc && disc[0] != ' ')
- fprintf(mp, "It could be related to component device %s.\n\n", disc);
+ fprintf(mp,
+ "It could be related to"
+ " component device %s.\n\n", disc);
if (disc && disc[0] == ' ')
fprintf(mp, "Extra information:%s.\n\n", disc);
if (mdstat) {
char buf[8192];
int n;
- fprintf(mp, "\nP.S. The /proc/mdstat file currently contains the following:\n\n");
+ fprintf(mp,
+ "\nP.S. The /proc/mdstat file"
+ " currently contains the following:\n\n");
while ( (n=fread(buf, 1, sizeof(buf), mdstat)) > 0)
- n=fwrite(buf, 1, n, mp); /* yes, i don't care about the result */
+ n=fwrite(buf, 1, n, mp);
fclose(mdstat);
}
pclose(mp);
}
-
}
/* log the event to syslog maybe */
- if (dosyslog) {
+ if (info->dosyslog) {
/* Log at a different severity depending on the event.
*
* These are the critical events: */
priority = LOG_INFO;
if (disc)
- syslog(priority, "%s event detected on md device %s, component device %s", event, dev, disc);
+ syslog(priority,
+ "%s event detected on md device %s,"
+ " component device %s", event, dev, disc);
+ else
+ syslog(priority,
+ "%s event detected on md device %s",
+ event, dev);
+ }
+}
+
+static int check_array(struct state *st, struct mdstat_ent *mdstat,
+ int test, struct alert_info *ainfo,
+ int increments)
+{
+ /* Update the state 'st' to reflect any changes shown in mdstat,
+ * or found by directly examining the array, and return
+ * '1' if the array is degraded, or '0' if it is optimal (or dead).
+ */
+ struct { int state, major, minor; } info[MaxDisks];
+ mdu_array_info_t array;
+ struct mdstat_ent *mse = NULL, *mse2;
+ char *dev = st->devname;
+ int fd;
+ int i;
+
+ if (test)
+ alert("TestMessage", dev, NULL, ainfo);
+ fd = open(dev, O_RDONLY);
+ if (fd < 0) {
+ if (!st->err)
+ alert("DeviceDisappeared", dev, NULL, ainfo);
+ st->err=1;
+ return 0;
+ }
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+ if (ioctl(fd, GET_ARRAY_INFO, &array)<0) {
+ if (!st->err)
+ alert("DeviceDisappeared", dev, NULL, ainfo);
+ st->err=1;
+ close(fd);
+ return 0;
+ }
+ /* It's much easier to list what array levels can't
+ * have a device disappear than all of them that can
+ */
+ if (array.level == 0 || array.level == -1) {
+ if (!st->err)
+ alert("DeviceDisappeared", dev, "Wrong-Level", ainfo);
+ st->err = 1;
+ close(fd);
+ return 0;
+ }
+ if (st->devnum == INT_MAX) {
+ struct stat stb;
+ if (fstat(fd, &stb) == 0 &&
+ (S_IFMT&stb.st_mode)==S_IFBLK) {
+ if (major(stb.st_rdev) == MD_MAJOR)
+ st->devnum = minor(stb.st_rdev);
+ else
+ st->devnum = -1- (minor(stb.st_rdev)>>6);
+ }
+ }
+
+ for (mse2 = mdstat ; mse2 ; mse2=mse2->next)
+ if (mse2->devnum == st->devnum) {
+ mse2->devnum = INT_MAX; /* flag it as "used" */
+ mse = mse2;
+ }
+
+ if (!mse) {
+ /* duplicated array in statelist
+ * or re-created after reading mdstat*/
+ st->err = 1;
+ close(fd);
+ return 0;
+ }
+ /* this array is in /proc/mdstat */
+ if (array.utime == 0)
+ /* external arrays don't update utime, so
+ * just make sure it is always different. */
+ array.utime = st->utime + 1;;
+
+ if (st->utime == array.utime &&
+ st->failed == array.failed_disks &&
+ st->working == array.working_disks &&
+ st->spare == array.spare_disks &&
+ (mse == NULL || (
+ mse->percent == st->percent
+ ))) {
+ close(fd);
+ st->err = 0;
+ if ((st->active < st->raid) && st->spare == 0)
+ return 1;
+ else
+ return 0;
+ }
+ if (st->utime == 0 && /* new array */
+ mse->pattern && strchr(mse->pattern, '_') /* degraded */
+ )
+ alert("DegradedArray", dev, NULL, ainfo);
+
+ if (st->utime == 0 && /* new array */
+ st->expected_spares > 0 &&
+ array.spare_disks < st->expected_spares)
+ alert("SparesMissing", dev, NULL, ainfo);
+ if (st->percent == -1 &&
+ mse->percent >= 0)
+ alert("RebuildStarted", dev, NULL, ainfo);
+ if (st->percent >= 0 &&
+ mse->percent >= 0 &&
+ (mse->percent / increments) > (st->percent / increments)) {
+ char percentalert[15]; // "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
+
+ if((mse->percent / increments) == 0)
+ snprintf(percentalert, sizeof(percentalert), "RebuildStarted");
else
- syslog(priority, "%s event detected on md device %s", event, dev);
+ snprintf(percentalert, sizeof(percentalert), "Rebuild%02d", mse->percent);
+
+ alert(percentalert, dev, NULL, ainfo);
+ }
+
+ if (mse->percent == -1 &&
+ st->percent >= 0) {
+ /* Rebuild/sync/whatever just finished.
+ * If there is a number in /mismatch_cnt,
+ * we should report that.
+ */
+ struct mdinfo *sra =
+ sysfs_read(-1, st->devnum, GET_MISMATCH);
+ if (sra && sra->mismatch_cnt > 0) {
+ char cnt[40];
+ sprintf(cnt, " mismatches found: %d", sra->mismatch_cnt);
+ alert("RebuildFinished", dev, cnt, ainfo);
+ } else
+ alert("RebuildFinished", dev, NULL, ainfo);
+ if (sra)
+ free(sra);
+ }
+ st->percent = mse->percent;
+
+ for (i=0; i<MaxDisks && i <= array.raid_disks + array.nr_disks;
+ i++) {
+ mdu_disk_info_t disc;
+ disc.number = i;
+ if (ioctl(fd, GET_DISK_INFO, &disc) >= 0) {
+ info[i].state = disc.state;
+ info[i].major = disc.major;
+ info[i].minor = disc.minor;
+ } else
+ info[i].major = info[i].minor = 0;
+ }
+
+ if (strncmp(mse->metadata_version, "external:", 9) == 0 &&
+ is_subarray(mse->metadata_version+9))
+ st->parent_dev =
+ devname2devnum(mse->metadata_version+10);
+ else
+ st->parent_dev = NoMdDev;
+ if (st->metadata == NULL &&
+ st->parent_dev == NoMdDev)
+ st->metadata = super_by_fd(fd, NULL);
+
+ close(fd);
+
+ for (i=0; i<MaxDisks; i++) {
+ mdu_disk_info_t disc = {0,0,0,0,0};
+ int newstate=0;
+ int change;
+ char *dv = NULL;
+ disc.number = i;
+ if (i > array.raid_disks + array.nr_disks) {
+ newstate = 0;
+ disc.major = disc.minor = 0;
+ } else if (info[i].major || info[i].minor) {
+ newstate = info[i].state;
+ dv = map_dev(info[i].major, info[i].minor, 1);
+ disc.state = newstate;
+ disc.major = info[i].major;
+ disc.minor = info[i].minor;
+ } else if (mse && mse->pattern && i < (int)strlen(mse->pattern)) {
+ switch(mse->pattern[i]) {
+ case 'U': newstate = 6 /* ACTIVE/SYNC */; break;
+ case '_': newstate = 0; break;
+ }
+ disc.major = disc.minor = 0;
+ }
+ if (dv == NULL && st->devid[i])
+ dv = map_dev(major(st->devid[i]),
+ minor(st->devid[i]), 1);
+ change = newstate ^ st->devstate[i];
+ if (st->utime && change && !st->err) {
+ if (i < array.raid_disks &&
+ (((newstate&change)&(1<<MD_DISK_FAULTY)) ||
+ ((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) ||
+ ((st->devstate[i]&change)&(1<<MD_DISK_SYNC)))
+ )
+ alert("Fail", dev, dv, ainfo);
+ else if (i >= array.raid_disks &&
+ (disc.major || disc.minor) &&
+ st->devid[i] == makedev(disc.major, disc.minor) &&
+ ((newstate&change)&(1<<MD_DISK_FAULTY))
+ )
+ alert("FailSpare", dev, dv, ainfo);
+ else if (i < array.raid_disks &&
+ ! (newstate & (1<<MD_DISK_REMOVED)) &&
+ (((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) ||
+ ((newstate&change)&(1<<MD_DISK_ACTIVE)) ||
+ ((newstate&change)&(1<<MD_DISK_SYNC)))
+ )
+ alert("SpareActive", dev, dv, ainfo);
+ }
+ st->devstate[i] = newstate;
+ st->devid[i] = makedev(disc.major, disc.minor);
+ }
+ st->active = array.active_disks;
+ st->working = array.working_disks;
+ st->spare = array.spare_disks;
+ st->failed = array.failed_disks;
+ st->utime = array.utime;
+ st->raid = array.raid_disks;
+ st->err = 0;
+ if ((st->active < st->raid) && st->spare == 0)
+ return 1;
+ return 0;
+}
+
+static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist,
+ int test, struct alert_info *info)
+{
+ struct mdstat_ent *mse;
+ int new_found = 0;
+
+ for (mse=mdstat; mse; mse=mse->next)
+ if (mse->devnum != INT_MAX &&
+ (!mse->level || /* retrieve containers */
+ (strcmp(mse->level, "raid0") != 0 &&
+ strcmp(mse->level, "linear") != 0))
+ ) {
+ struct state *st = calloc(1, sizeof *st);
+ mdu_array_info_t array;
+ int fd;
+ if (st == NULL)
+ continue;
+ st->devname = strdup(get_md_name(mse->devnum));
+ if ((fd = open(st->devname, O_RDONLY)) < 0 ||
+ ioctl(fd, GET_ARRAY_INFO, &array)< 0) {
+ /* no such array */
+ if (fd >=0) close(fd);
+ put_md_name(st->devname);
+ free(st->devname);
+ if (st->metadata) {
+ st->metadata->ss->free_super(st->metadata);
+ free(st->metadata);
+ }
+ free(st);
+ continue;
+ }
+ close(fd);
+ st->next = *statelist;
+ st->err = 1;
+ st->devnum = mse->devnum;
+ st->percent = -2;
+ st->expected_spares = -1;
+ if (strncmp(mse->metadata_version, "external:", 9) == 0 &&
+ is_subarray(mse->metadata_version+9))
+ st->parent_dev =
+ devname2devnum(mse->metadata_version+10);
+ else
+ st->parent_dev = NoMdDev;
+ *statelist = st;
+ if (test)
+ alert("TestMessage", st->devname, NULL, info);
+ alert("NewArray", st->devname, NULL, info);
+ new_found = 1;
+ }
+ return new_found;
+}
+
+unsigned long long min_spare_size_required(struct state *st)
+{
+ int fd;
+ unsigned long long rv = 0;
+
+ if (!st->metadata ||
+ !st->metadata->ss->min_acceptable_spare_size)
+ return rv;
+
+ fd = open(st->devname, O_RDONLY);
+ if (fd < 0)
+ return 0;
+ st->metadata->ss->load_super(st->metadata, fd, st->devname);
+ close(fd);
+ rv = st->metadata->ss->min_acceptable_spare_size(st->metadata);
+ st->metadata->ss->free_super(st->metadata);
+
+ return rv;
+}
+
+static int move_spare(struct state *from, struct state *to,
+ dev_t devid,
+ struct alert_info *info)
+{
+ struct mddev_dev devlist;
+ char devname[20];
+
+ /* try to remove and add */
+ int fd1 = open(to->devname, O_RDONLY);
+ int fd2 = open(from->devname, O_RDONLY);
+
+ if (fd1 < 0 || fd2 < 0) {
+ if (fd1>=0) close(fd1);
+ if (fd2>=0) close(fd2);
+ return 0;
+ }
+
+ devlist.next = NULL;
+ devlist.used = 0;
+ devlist.re_add = 0;
+ devlist.writemostly = 0;
+ devlist.devname = devname;
+ sprintf(devname, "%d:%d", major(devid), minor(devid));
+
+ devlist.disposition = 'r';
+ if (Manage_subdevs(from->devname, fd2, &devlist, -1, 0, NULL) == 0) {
+ devlist.disposition = 'a';
+ if (Manage_subdevs(to->devname, fd1, &devlist, -1, 0, NULL) == 0) {
+ alert("MoveSpare", to->devname, from->devname, info);
+ /* make sure we will see newly added spare before next
+ * time through loop
+ */
+ ping_manager(to->devname);
+ ping_manager(from->devname);
+ close(fd1);
+ close(fd2);
+ return 1;
+ }
+ else Manage_subdevs(from->devname, fd2, &devlist, -1, 0, NULL);
}
+ close(fd1);
+ close(fd2);
+ return 0;
+}
+
+static int check_donor(struct state *from, struct state *to,
+ struct domainlist *domlist)
+{
+ struct state *sub;
+
+ if (from == to)
+ return 0;
+ if (from->parent)
+ /* Cannot move from a member */
+ return 0;
+ if (from->err)
+ return 0;
+ for (sub = from->subarray; sub; sub = sub->subarray)
+ /* If source array has degraded subarrays, don't
+ * remove anything
+ */
+ if (sub->active < sub->raid)
+ return 0;
+ if (from->metadata->ss->external == 0)
+ if (from->active < from->raid)
+ return 0;
+ if (from->spare <= 0)
+ return 0;
+ if (domlist == NULL)
+ return 0;
+ return 1;
+}
+
+static dev_t choose_spare(struct state *from, struct state *to,
+ struct domainlist *domlist, unsigned long long min_size)
+{
+ int d;
+ dev_t dev = 0;
+
+ for (d = from->raid; !dev && d < MaxDisks; d++) {
+ if (from->devid[d] > 0 &&
+ from->devstate[d] == 0) {
+ struct dev_policy *pol;
+ unsigned long long dev_size;
+
+ if (min_size &&
+ dev_size_from_id(from->devid[d], &dev_size) &&
+ dev_size < min_size)
+ continue;
+
+ pol = devnum_policy(from->devid[d]);
+ if (from->spare_group)
+ pol_add(&pol, pol_domain,
+ from->spare_group, NULL);
+ if (domain_test(domlist, pol, to->metadata->ss->name))
+ dev = from->devid[d];
+ dev_policy_free(pol);
+ }
+ }
+ return dev;
+}
+
+static dev_t container_choose_spare(struct state *from, struct state *to,
+ struct domainlist *domlist,
+ unsigned long long min_size)
+{
+ /* This is similar to choose_spare, but we cannot trust devstate,
+ * so we need to read the metadata instead
+ */
+
+ struct supertype *st = from->metadata;
+ int fd = open(from->devname, O_RDONLY);
+ int err;
+ struct mdinfo *disks, *d;
+ dev_t dev = 0;
+
+ if (fd < 0)
+ return 0;
+ if (!st->ss->getinfo_super_disks)
+ return 0;
+
+ err = st->ss->load_container(st, fd, NULL);
+ close(fd);
+ if (err)
+ return 0;
+
+ disks = st->ss->getinfo_super_disks(st);
+ st->ss->free_super(st);
+
+ if (!disks)
+ return 0;
+
+ for (d = disks->devs ; d && !dev ; d = d->next) {
+ if (d->disk.state == 0) {
+ struct dev_policy *pol;
+ unsigned long long dev_size;
+ dev = makedev(d->disk.major,d->disk.minor);
+
+ if (min_size &&
+ dev_size_from_id(dev, &dev_size) &&
+ dev_size < min_size) {
+ dev = 0;
+ continue;
+ }
+ if (from == to)
+ /* Just checking if destination already has
+ * a spare, no need to check policy, we are
+ * done.
+ */
+ break;
+
+ pol = devnum_policy(dev);
+ if (from->spare_group)
+ pol_add(&pol, pol_domain,
+ from->spare_group, NULL);
+ if (!domain_test(domlist, pol, to->metadata->ss->name))
+ dev = 0;
+
+ dev_policy_free(pol);
+ }
+ }
+ sysfs_free(disks);
+ return dev;
+}
+
+
+static void try_spare_migration(struct state *statelist, struct alert_info *info)
+{
+ struct state *from;
+ struct state *st;
+
+ link_containers_with_subarrays(statelist);
+ for (st = statelist; st; st = st->next)
+ if (st->active < st->raid &&
+ st->spare == 0 && !st->err) {
+ struct domainlist *domlist = NULL;
+ int d;
+ struct state *to = st;
+ unsigned long long min_size;
+
+ if (to->parent)
+ /* member of a container */
+ to = to->parent;
+
+ min_size = min_spare_size_required(to);
+ if (to->metadata->ss->external) {
+ /* We must make sure there is
+ * no suitable spare in container already.
+ * If there is we don't add more */
+ dev_t devid = container_choose_spare(
+ to, to, NULL, min_size);
+ if (devid > 0)
+ continue;
+ }
+ for (d = 0; d < MaxDisks; d++)
+ if (to->devid[d])
+ domainlist_add_dev(&domlist,
+ to->devid[d],
+ to->metadata->ss->name);
+ if (to->spare_group)
+ domain_add(&domlist, to->spare_group);
+
+ for (from=statelist ; from ; from=from->next) {
+ dev_t devid;
+ if (!check_donor(from, to, domlist))
+ continue;
+ if (from->metadata->ss->external)
+ devid = container_choose_spare(
+ from, to, domlist, min_size);
+ else
+ devid = choose_spare(from, to, domlist,
+ min_size);
+ if (devid > 0
+ && move_spare(from, to, devid, info))
+ break;
+ }
+ domain_free(domlist);
+ }
+}
+
+/* search the statelist to connect external
+ * metadata subarrays with their containers
+ * We always completely rebuild the tree from scratch as
+ * that is safest considering the possibility of entries
+ * disappearing or changing.
+ */
+static void link_containers_with_subarrays(struct state *list)
+{
+ struct state *st;
+ struct state *cont;
+ for (st = list; st; st = st->next) {
+ st->parent = NULL;
+ st->subarray = NULL;
+ }
+ for (st = list; st; st = st->next)
+ if (st->parent_dev != NoMdDev)
+ for (cont = list; cont; cont = cont->next)
+ if (!cont->err &&
+ cont->parent_dev == NoMdDev &&
+ cont->devnum == st->parent_dev) {
+ st->parent = cont;
+ st->subarray = cont->subarray;
+ cont->subarray = st;
+ break;
+ }
}
/* Not really Monitor but ... */
close(fd);
if (superror == 0) {
/* array might be active... */
- st->ss->getinfo_super(st, &info);
+ st->ss->getinfo_super(st, &info, NULL);
if (st->ss == &super0) {
mddev = get_md_name(info.array.md_minor);
disc.number = info.disk.number;
#include "mdadm.h"
-char Version[] = Name " - v3.1.4 - 31st August 2010\n";
+char Version[] = Name " - v3.2-devel - 23rd November 2010\n";
/*
* File: ReadMe.c
"-ABCDEFGIQhVXWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sa:rfRSow1tye:";
struct option long_options[] = {
- {"manage", 0, 0, '@'},
- {"misc", 0, 0, '#'},
+ {"manage", 0, 0, ManageOpt},
+ {"misc", 0, 0, MiscOpt},
{"assemble", 0, 0, 'A'},
{"build", 0, 0, 'B'},
{"create", 0, 0, 'C'},
/* after those will normally come the name of the md device */
{"help", 0, 0, 'h'},
- {"help-options",0,0,'h'},
+ {"help-options",0,0, HelpOptions},
{"version", 0, 0, 'V'},
{"verbose", 0, 0, 'v'},
{"quiet", 0, 0, 'q'},
/* For create or build: */
- {"chunk", 1, 0, 'c'},
- {"rounding", 1, 0, 'c'}, /* for linear, chunk is really a rounding number */
+ {"chunk", 1, 0, ChunkSize},
+ {"rounding", 1, 0, ChunkSize}, /* for linear, chunk is really a
+ * rounding number */
{"level", 1, 0, 'l'}, /* 0,1,4,5,6,linear */
- {"parity", 1, 0, 'p'}, /* {left,right}-{a,}symmetric */
- {"layout", 1, 0, 'p'},
+ {"parity", 1, 0, Layout}, /* {left,right}-{a,}symmetric */
+ {"layout", 1, 0, Layout},
{"raid-disks",1, 0, 'n'},
{"raid-devices",1, 0, 'n'},
{"spare-disks",1,0, 'x'},
{"spare-devices",1,0, 'x'},
{"size", 1, 0, 'z'},
- {"auto", 1, 0, 'a'}, /* also for --assemble */
+ {"auto", 1, 0, Auto}, /* also for --assemble */
{"assume-clean",0,0, AssumeClean },
{"metadata", 1, 0, 'e'}, /* superblock format */
- {"bitmap", 1, 0, 'b'},
+ {"bitmap", 1, 0, Bitmap},
{"bitmap-chunk", 1, 0, BitmapChunk},
{"write-behind", 2, 0, WriteBehind},
- {"write-mostly",0, 0, 'W'},
+ {"write-mostly",0, 0, WriteMostly},
{"re-add", 0, 0, ReAdd},
{"homehost", 1, 0, HomeHost},
#if 0
/* For assemble */
{"uuid", 1, 0, 'u'},
- {"super-minor",1,0, 'm'},
+ {"super-minor",1,0, SuperMinor},
{"name", 1, 0, 'N'},
- {"config", 1, 0, 'c'},
+ {"config", 1, 0, ConfigFile},
{"scan", 0, 0, 's'},
- {"force", 0, 0, 'f'},
+ {"force", 0, 0, Force},
{"update", 1, 0, 'U'},
/* Management */
- {"add", 0, 0, 'a'},
- {"remove", 0, 0, 'r'},
- {"fail", 0, 0, 'f'},
- {"set-faulty",0, 0, 'f'},
+ {"add", 0, 0, Add},
+ {"remove", 0, 0, Remove},
+ {"fail", 0, 0, Fail},
+ {"set-faulty",0, 0, Fail},
{"run", 0, 0, 'R'},
{"stop", 0, 0, 'S'},
{"readonly", 0, 0, 'o'},
{"readwrite", 0, 0, 'w'},
{"no-degraded",0,0, NoDegraded },
- {"wait", 0, 0, 'W'},
+ {"wait", 0, 0, WaitOpt},
{"wait-clean", 0, 0, Waitclean },
/* For Detail/Examine */
- {"brief", 0, 0, 'b'},
+ {"brief", 0, 0, Brief},
{"export", 0, 0, 'Y'},
{"sparc2.2", 0, 0, Sparc22},
{"test", 0, 0, 't'},
/* For Follow/monitor */
- {"mail", 1, 0, 'm'},
- {"program", 1, 0, 'p'},
- {"alert", 1, 0, 'p'},
- {"increment", 1, 0, 'r'},
+ {"mail", 1, 0, EMail},
+ {"program", 1, 0, ProgramOpt},
+ {"alert", 1, 0, ProgramOpt},
+ {"increment", 1, 0, Increment},
{"delay", 1, 0, 'd'},
- {"daemonise", 0, 0, 'f'},
- {"daemonize", 0, 0, 'f'},
+ {"daemonise", 0, 0, Fork},
+ {"daemonize", 0, 0, Fork},
{"oneshot", 0, 0, '1'},
{"pid-file", 1, 0, 'i'},
{"syslog", 0, 0, 'y'},
+ {"no-sharing", 0, 0, NoSharing},
+
/* For Grow */
{"backup-file", 1,0, BackupFile},
+ {"invalid-backup",0,0,InvalidBackup},
{"array-size", 1, 0, 'Z'},
/* For Incremental */
- {"rebuild-map", 0, 0, 'r'},
+ {"rebuild-map", 0, 0, RebuildMapOpt},
+ {"path", 1, 0, IncrementalPath},
+
{0, 0, 0, 0}
};
char DefaultAltConfFile[] = CONFFILE2;
enum linetype { Devices, Array, Mailaddr, Mailfrom, Program, CreateDev,
- Homehost, AutoMode, LTEnd };
+ Homehost, AutoMode, Policy, PartPolicy, LTEnd };
char *keywords[] = {
[Devices] = "devices",
[Array] = "array",
[CreateDev]= "create",
[Homehost] = "homehost",
[AutoMode] = "auto",
+ [Policy] = "policy",
+ [PartPolicy]="part-policy",
[LTEnd] = NULL
};
char *name;
} *cdevlist = NULL;
-mddev_dev_t load_partitions(void)
+struct mddev_dev *load_partitions(void)
{
FILE *f = fopen("/proc/partitions", "r");
char buf[1024];
- mddev_dev_t rv = NULL;
+ struct mddev_dev *rv = NULL;
if (f == NULL) {
fprintf(stderr, Name ": cannot open /proc/partitions\n");
return NULL;
while (fgets(buf, 1024, f)) {
int major, minor;
char *name, *mp;
- mddev_dev_t d;
+ struct mddev_dev *d;
buf[1023] = '\0';
if (buf[0] != ' ')
d->devname = strdup(name);
d->next = rv;
d->used = 0;
- d->content = NULL;
rv = d;
}
fclose(f);
return rv;
}
-mddev_dev_t load_containers(void)
+struct mddev_dev *load_containers(void)
{
struct mdstat_ent *mdstat = mdstat_read(1, 0);
struct mdstat_ent *ent;
- mddev_dev_t d;
- mddev_dev_t rv = NULL;
+ struct mddev_dev *d;
+ struct mddev_dev *rv = NULL;
if (!mdstat)
return NULL;
}
d->next = rv;
d->used = 0;
- d->content = NULL;
rv = d;
}
free_mdstat(mdstat);
}
}
-mddev_ident_t mddevlist = NULL;
-mddev_ident_t *mddevlp = &mddevlist;
+struct mddev_ident *mddevlist = NULL;
+struct mddev_ident **mddevlp = &mddevlist;
static int is_number(char *w)
{
{
char *w;
- struct mddev_ident_s mis;
- mddev_ident_t mi;
+ struct mddev_ident mis;
+ struct mddev_ident *mi;
mis.uuid_set = 0;
mis.super_minor = UnSet;
}
}
-static char *auto_options = NULL;
+char auto_yes[] = "yes";
+char auto_no[] = "no";
+char auto_homehost[] = "homehost";
+
+static int auto_seen = 0;
void autoline(char *line)
{
char *w;
+ char *seen;
+ int super_cnt;
+ char *dflt = auto_yes;
+ int homehost = 0;
+ int i;
- if (auto_options) {
+ if (auto_seen) {
fprintf(stderr, Name ": AUTO line may only be give once."
" Subsequent lines ignored\n");
return;
}
+ /* Parse the 'auto' line creating policy statements for the 'auto' policy.
+ *
+ * The default is 'yes' but the 'auto' line might over-ride that.
+ * Words in the line are processed in order with the first
+ * match winning.
+ * word can be:
+ * +version - that version can be assembled
+ * -version - that version cannot be auto-assembled
+ * yes or +all - any other version can be assembled
+ * no or -all - no other version can be assembled.
+ * homehost - any array associated by 'homehost' to this
+ * host can be assembled.
+ *
+ * Thus:
+ * +ddf -0.90 homehost -all
+ * will auto-assemble any ddf array, no 0.90 array, and
+ * any other array (imsm, 1.x) if and only if it is identified
+ * as belonging to this host.
+ *
+ * We translate that to policy by creating 'auto=yes' when we see
+ * a '+version' line, 'auto=no' if we see '-version' before 'homehost',
+ * or 'auto=homehost' if we see '-version' after 'homehost'.
+ * When we see yes, no, +all or -all we stop an any version that hasn't
+ * been seen gets an appropriate auto= entry.
+ */
- auto_options = dl_strdup(line);
- dl_init(auto_options);
+ for (super_cnt = 0; superlist[super_cnt]; super_cnt++)
+ ;
+ seen = calloc(super_cnt, 1);
- for (w=dl_next(line); w != line ; w=dl_next(w)) {
- char *w2 = dl_strdup(w);
- dl_add(auto_options, w2);
+ for (w = dl_next(line); w != line ; w = dl_next(w)) {
+ char *val;
+
+ if (strcasecmp(w, "yes") == 0) {
+ dflt = auto_yes;
+ break;
+ }
+ if (strcasecmp(w, "no") == 0) {
+ if (homehost)
+ dflt = auto_homehost;
+ else
+ dflt = auto_no;
+ break;
+ }
+ if (strcasecmp(w, "homehost") == 0) {
+ homehost = 1;
+ continue;
+ }
+ if (w[0] == '+')
+ val = auto_yes;
+ else if (w[0] == '-') {
+ if (homehost)
+ val = auto_homehost;
+ else
+ val = auto_no;
+ } else
+ continue;
+
+ if (strcasecmp(w+1, "all") == 0) {
+ dflt = val;
+ break;
+ }
+ for (i = 0; superlist[i]; i++) {
+ const char *version = superlist[i]->name;
+ if (strcasecmp(w+1, version) == 0)
+ break;
+ /* 1 matches 1.x, 0 matches 0.90 */
+ if (version[1] == '.' &&
+ strlen(w+1) == 1 &&
+ w[1] == version[0])
+ break;
+ /* 1.anything matches 1.x */
+ if (strcmp(version, "1.x") == 0 &&
+ strncmp(w+1, "1.", 2) == 0)
+ break;
+ }
+ if (superlist[i] == NULL)
+ /* ignore this word */
+ continue;
+ if (seen[i])
+ /* already know about this metadata */
+ continue;
+ policy_add(rule_policy, pol_auto, val, pol_metadata, superlist[i]->name, NULL);
+ seen[i] = 1;
}
+ for (i = 0; i < super_cnt; i++)
+ if (!seen[i])
+ policy_add(rule_policy, pol_auto, dflt, pol_metadata, superlist[i]->name, NULL);
}
int loaded = 0;
case AutoMode:
autoline(line);
break;
+ case Policy:
+ policyline(line, rule_policy);
+ break;
+ case PartPolicy:
+ policyline(line, rule_part);
+ break;
default:
fprintf(stderr, Name ": Unknown keyword %s\n", line);
}
return &createinfo;
}
-mddev_ident_t conf_get_ident(char *dev)
+struct mddev_ident *conf_get_ident(char *dev)
{
- mddev_ident_t rv;
+ struct mddev_ident *rv;
load_conffile();
rv = mddevlist;
while (dev && rv && (rv->devname == NULL
return rv;
}
-static void append_dlist(mddev_dev_t *dlp, mddev_dev_t list)
+static void append_dlist(struct mddev_dev **dlp, struct mddev_dev *list)
{
while (*dlp)
dlp = &(*dlp)->next;
*dlp = list;
}
-mddev_dev_t conf_get_devs()
+struct mddev_dev *conf_get_devs()
{
glob_t globbuf;
struct conf_dev *cd;
int flags = 0;
- static mddev_dev_t dlist = NULL;
+ static struct mddev_dev *dlist = NULL;
unsigned int i;
while (dlist) {
- mddev_dev_t t = dlist;
+ struct mddev_dev *t = dlist;
dlist = dlist->next;
free(t->devname);
free(t);
}
if (flags & GLOB_APPEND) {
for (i=0; i<globbuf.gl_pathc; i++) {
- mddev_dev_t t = malloc(sizeof(*t));
+ struct mddev_dev *t = malloc(sizeof(*t));
t->devname = strdup(globbuf.gl_pathv[i]);
t->next = dlist;
t->used = 0;
- t->content = NULL;
dlist = t;
/* printf("one dev is %s\n", t->devname);*/
}
return 0;
}
-int conf_test_metadata(const char *version, int is_homehost)
+int conf_test_metadata(const char *version, struct dev_policy *pol, int is_homehost)
{
- /* Check if the given metadata version is allowed
- * to be auto-assembled.
- * The default is 'yes' but the 'auto' line might over-ride that.
- * Words in auto_options are processed in order with the first
- * match winning.
- * word can be:
- * +version - that version can be assembled
- * -version - that version cannot be auto-assembled
- * yes or +all - any other version can be assembled
- * no or -all - no other version can be assembled.
- * homehost - any array associated by 'homehost' to this
- * host can be assembled.
- *
- * Thus:
- * +ddf -0.90 homehost -all
- * will auto-assemble any ddf array, no 0.90 array, and
- * any other array (imsm, 1.x) if and only if it is identified
- * as belonging to this host.
+ /* If anyone said 'yes', that sticks.
+ * else if homehost applies, use that
+ * else if there is a 'no', say 'no'.
+ * else 'yes'.
*/
- char *w;
+ struct dev_policy *p;
+ int no=0, found_auto=0;
load_conffile();
- if (!auto_options)
- return 1;
- for (w = dl_next(auto_options); w != auto_options; w = dl_next(w)) {
- int rv;
- if (strcasecmp(w, "yes") == 0)
+
+ pol = pol_find(pol, pol_auto);
+ pol_for_each(p, pol, version) {
+ if (strcmp(p->value, "yes") == 0)
return 1;
- if (strcasecmp(w, "no") == 0)
- return 0;
- if (strcasecmp(w, "homehost") == 0) {
- if (is_homehost)
- return 1;
- else
- continue;
- }
- if (w[0] == '+')
- rv = 1;
- else if (w[0] == '-')
- rv = 0;
- else continue;
-
- if (strcasecmp(w+1, "all") == 0)
- return rv;
- if (strcasecmp(w+1, version) == 0)
- return rv;
- /* allow '0' to match version '0.90'
- * and 1 or 1.whatever to match version '1.x'
- */
- if (version[1] == '.' &&
- strlen(w+1) == 1 &&
- w[1] == version[0])
- return rv;
- if (version[1] == '.' && version[2] == 'x' &&
- strncmp(w+1, version, 2) == 0)
- return rv;
+ if (strcmp(p->value, "auto") == 0)
+ found_auto = 1;
+ if (strcmp(p->value, "no") == 0)
+ no = 1;
}
+ if (is_homehost && found_auto)
+ return 1;
+ if (no)
+ return 0;
return 1;
}
* matches devname
*/
-
while (devices && *devices) {
char patn[1024];
char *p = devices;
* It can be taken either by a match on devname, name, or
* even super-minor.
*/
- mddev_ident_t dev;
+ struct mddev_ident *dev;
load_conffile();
for (dev = mddevlist; dev; dev = dev->next) {
return 1;
}
-struct mddev_ident_s *conf_match(struct mdinfo *info, struct supertype *st)
+struct mddev_ident *conf_match(struct mdinfo *info, struct supertype *st)
{
- struct mddev_ident_s *array_list, *match;
+ struct mddev_ident *array_list, *match;
int verbose = 0;
char *devname = NULL;
array_list = conf_get_ident(NULL);
--- /dev/null
+External Reshape
+
+1 Problem statement
+
+External (third-party metadata) reshape differs from native-metadata
+reshape in three key ways:
+
+1.1 Format specific constraints
+
+In the native case reshape is limited by what is implemented in the
+generic reshape routine (Grow_reshape()) and what is supported by the
+kernel. There are exceptional cases where Grow_reshape() may block
+operations when it knows that the kernel implementation is broken, but
+otherwise the kernel is relied upon to be the final arbiter of what
+reshape operations are supported.
+
+In the external case the kernel, and the generic checks in
+Grow_reshape(), become the super-set of what reshapes are possible. The
+metadata format may not support, or have yet to implement a given
+reshape type. The implication for Grow_reshape() is that it must query
+the metadata handler and effect changes in the metadata before the new
+geometry is posted to the kernel. The ->reshape_super method allows
+Grow_reshape() to validate the requested operation and post the metadata
+update.
+
+1.2 Scope of reshape
+
+Native metadata reshape is always performed at the array scope (no
+metadata relationship with sibling arrays on the same disks). External
+reshape, depending on the format, may not allow the number of member
+disks to be changed in a subarray unless the change is simultaneously
+applied to all subarrays in the container. For example the imsm format
+requires all member disks to be a member of all subarrays, so a 4-disk
+raid5 in a container that also houses a 4-disk raid10 array could not be
+reshaped to 5 disks as the imsm format does not support a 5-disk raid10
+representation. This requires the ->reshape_super method to check the
+contents of the array and ask the user to run the reshape at container
+scope (if both subarrays are agreeable to the change), or report an
+error in the case where one subarray cannot support the change.
+
+1.3 Monitoring / checkpointing
+
+Reshape, unlike rebuild/resync, requires strict checkpointing to survive
+interrupted reshape operations. For example when expanding a raid5
+array the first few stripes of the array will be overwritten in a
+destructive manner. When restarting the reshape process we need to know
+the exact location of the last successfully written stripe, and we need
+to restore the data in any partially overwritten stripe. Native
+metadata stores this backup data in the unused portion of spares that
+are being promoted to array members, or in an external backup file
+(located on a non-involved block device).
+
+The kernel is in charge of recording checkpoints of reshape progress,
+but mdadm is delegated the task of managing the backup space which
+involves:
+1/ Identifying what data will be overwritten in the next unit of reshape
+ operation
+2/ Suspending access to that region so that a snapshot of the data can
+ be transferred to the backup space.
+3/ Allowing the kernel to reshape the saved region and setting the
+ boundary for the next backup.
+
+In the external reshape case we want to preserve this mdadm
+'reshape-manager' arrangement, but have a third actor, mdmon, to
+consider. It is tempting to give the role of managing reshape to mdmon,
+but that is counter to its role as a monitor, and conflicts with the
+existing capabilities and role of mdadm to manage the progress of
+reshape. For clarity the external reshape implementation maintains the
+role of mdmon as a (mostly) passive recorder of raid events, and mdadm
+treats it as it would the kernel in the native reshape case (modulo
+needing to send explicit metadata update messages and checking that
+mdmon took the expected action).
+
+External reshape can use the generic md backup file as a fallback, but in the
+optimal/firmware-compatible case the reshape-manager will use the metadata
+specific areas for managing reshape. The implementation also needs to spawn a
+reshape-manager per subarray when the reshape is being carried out at the
+container level. For these two reasons the ->manage_reshape() method is
+introduced. This method in addition to base tasks mentioned above:
+1/ Spawns a manager per-subarray, when necessary
+2/ Uses either generic routines in Grow.c for md-style backup file
+ support, or uses the metadata-format specific location for storing
+ recovery data.
+This aims to avoid a "midlayer mistake"[1] and lets the metadata handler
+optionally take advantage of generic infrastructure in Grow.c
+
+2 Details for specific reshape requests
+
+There are quite a few moving pieces spread out across md, mdadm, and mdmon for
+the support of external reshape, and there are several different types of
+reshape that need to be comprehended by the implementation. A rundown of
+these details follows.
+
+2.0 General provisions:
+
+Obtain an exclusive open on the container to make sure we are not
+running concurrently with a Create() event.
+
+2.1 Freezing sync_action
+
+2.2 Reshape size
+
+ 1/ mdadm::Grow_reshape(): checks if mdmon is running and optionally
+ initializes st->update_tail
+ 2/ mdadm::Grow_reshape() calls ->reshape_super() to check that the size change
+ is allowed (being performed at subarray scope / enough room) prepares a
+ metadata update
+ 3/ mdadm::Grow_reshape(): flushes the metadata update (via
+ flush_metadata_update(), or ->sync_metadata())
+ 4/ mdadm::Grow_reshape(): post the new size to the kernel
+
+
+2.3 Reshape level (simple-takeover)
+
+"simple-takeover" implies the level change can be satisfied without touching
+sync_action
+
+ 1/ mdadm::Grow_reshape(): checks if mdmon is running and optionally
+ initializes st->update_tail
+ 2/ mdadm::Grow_reshape() calls ->reshape_super() to check that the level change
+ is allowed (being performed at subarray scope) prepares a
+ metadata update
+ 2a/ raid10 --> raid0: degrade all mirror legs prior to calling
+ ->reshape_super
+ 3/ mdadm::Grow_reshape(): flushes the metadata update (via
+ flush_metadata_update(), or ->sync_metadata())
+ 4/ mdadm::Grow_reshape(): post the new level to the kernel
+
+2.4 Reshape chunk, layout
+
+2.5 Reshape raid disks (grow)
+
+ 1/ mdadm::Grow_reshape(): unconditionally initializes st->update_tail
+ because only redundant raid levels can modify the number of raid disks
+ 2/ mdadm::Grow_reshape(): calls ->reshape_super() to check that the level
+ change is allowed (being performed at proper scope / permissible
+ geometry / proper spares available in the container) prepares a metadata
+ update.
+ 3/ mdadm::Grow_reshape(): Converts each subarray in the container to the
+ raid level that can perform the reshape and starts mdmon.
+ 4/ mdadm::Grow_reshape(): Pushes the update to mdmon...
+ 4a/ mdmon::process_update(): marks the array as reshaping
+ 4b/ mdmon::manage_member(): adds the spares (without assigning a slot)
+ 5/ mdadm::Grow_reshape(): Notes that mdmon has assigned spares and invokes
+ ->manage_reshape()
+ 5/ mdadm::<format>->manage_reshape(): (for each subarray) sets sync_max to
+ zero, starts the reshape, and pings mdmon
+ 5a/ mdmon::read_and_act(): notices that reshape has started and notifies
+ the metadata handler to record the slots chosen by the kernel
+ 6/ mdadm::<format>->manage_reshape(): saves data that will be overwritten by
+ the kernel to either the backup file or the metadata specific location,
+ advances sync_max, waits for reshape, ping mdmon, repeat.
+ 6a/ mdmon::read_and_act(): records checkpoints
+ 7/ mdadm::<format>->manage_reshape(): Once reshape completes changes the raid
+ level back to the nominal raid level (if necessary)
+
+ FIXME: native metadata does not have the capability to record the original
+ raid level in reshape-restart case because the kernel always records current
+ raid level to the metadata, whereas external metadata can masquerade at an
+ alternate level based on the reshape state.
+
+2.6 Reshape raid disks (shrink)
+
+3 TODO
+
+...
+
+[1]: Linux kernel design patterns - part 3, Neil Brown http://lwn.net/Articles/336262/
close(aa->action_fd);
close(aa->info.state_fd);
close(aa->resync_start_fd);
+ close(aa->metadata_fd);
+ close(aa->sync_completed_fd);
}
static void free_aa(struct active_array *aa)
*/
st2 = dup_super(st);
if (st2->ss->load_super(st2, dfd, NULL) == 0) {
- st2->ss->getinfo_super(st, &info);
+ st2->ss->getinfo_super(st, &info, NULL);
if (st->ss->compare_super(st, st2) == 0 &&
info.disk.raid_disk >= 0) {
/* Looks like a good member of array.
* trying to find and assign a spare.
* We do that whenever the monitor tells us too.
*/
+ char buf[64];
+ int frozen;
+
// FIXME
a->info.array.raid_disks = mdstat->raid_disks;
- a->info.array.chunk_size = mdstat->chunk_size;
// MORE
- if (a->check_degraded) {
+ /* honor 'frozen' */
+ if (sysfs_get_str(&a->info, NULL, "metadata_version", buf, sizeof(buf)) > 0)
+ frozen = buf[9] == '-';
+ else
+ frozen = 1; /* can't read metadata_version assume the worst */
+
+ if (a->check_degraded && !frozen) {
struct metadata_update *updates = NULL;
struct mdinfo *newdev = NULL;
struct active_array *newa;
new->container = container;
- inst = &mdstat->metadata_version[10+strlen(container->devname)+1];
+ inst = to_subarray(mdstat, container->devname);
new->info.array = mdi->array;
new->info.component_size = mdi->component_size;
/* read and validate the message */
if (receive_message(fd, &msg, tmo) == 0) {
handle_message(container, &msg);
- if (ack(fd, tmo) < 0)
+ if (msg.len == 0) {
+ /* ping reply with version */
+ msg.buf = Version;
+ msg.len = strlen(Version) + 1;
+ if (send_message(fd, &msg, tmo) < 0)
+ terminate = 1;
+ } else if (ack(fd, tmo) < 0)
terminate = 1;
} else
terminate = 1;
* version super_by_fd does this automatically, this routine is meant as
* a supplement for guess_super()
*/
-static void set_member_info(struct supertype *st, struct mdstat_ent *ent)
+static char *get_member_info(struct mdstat_ent *ent)
{
- st->subarray[0] = '\0';
-
if (ent->metadata_version == NULL ||
strncmp(ent->metadata_version, "external:", 9) != 0)
- return;
+ return NULL;
if (is_subarray(&ent->metadata_version[9])) {
- char version[strlen(ent->metadata_version)+1];
char *subarray;
- char *name = &version[10];
-
- strcpy(version, ent->metadata_version);
- subarray = strrchr(version, '/');
- name = &version[10];
- if (!subarray)
- return;
- *subarray++ = '\0';
-
- st->container_dev = devname2devnum(name);
- strncpy(st->subarray, subarray, sizeof(st->subarray));
+ subarray = strrchr(ent->metadata_version, '/');
+ return subarray + 1;
}
+ return NULL;
}
void RebuildMap(void)
int dfd;
int ok;
struct supertype *st;
+ char *subarray;
char *path;
- struct mdinfo info;
+ struct mdinfo *info;
sprintf(dn, "%d:%d", sd->disk.major, sd->disk.minor);
dfd = dev_open(dn, O_RDONLY);
if ( st == NULL)
ok = -1;
else {
- set_member_info(st, md);
+ subarray = get_member_info(md);
ok = st->ss->load_super(st, dfd, NULL);
}
close(dfd);
if (ok != 0)
continue;
- st->ss->getinfo_super(st, &info);
+ info = st->ss->container_content(st, subarray);
+
if (md->devnum >= 0)
path = map_dev(MD_MAJOR, md->devnum, 0);
else
* find a unique name based on metadata name.
*
*/
- struct mddev_ident_s *match = conf_match(&info, st);
+ struct mddev_ident *match = conf_match(info, st);
struct stat stb;
if (match && match->devname && match->devname[0] == '/') {
path = match->devname;
st->ss->match_home(st, homehost) != 1) &&
st->ss->match_home(st, "any") != 1 &&
(require_homehost
- || ! conf_name_is_free(info.name)))
+ || ! conf_name_is_free(info->name)))
/* require a numeric suffix */
unum = 0;
else
/* allow name to be used as-is if no conflict */
unum = -1;
- name = info.name;
+ name = info->name;
if (!*name) {
name = st->ss->name;
if (!isdigit(name[strlen(name)-1]) &&
}
}
map_add(&map, md->devnum,
- info.text_version,
- info.uuid, path);
+ info->text_version,
+ info->uuid, path);
st->ss->free_super(st);
+ free(info);
break;
}
sysfs_free(sra);
size, though if there is a variance among the drives of greater than 1%, a warning is
issued.
+A suffix of 'M' or 'G' can be given to indicate Megabytes or
+Gigabytes respectively.
+
This value can be set with
.B \-\-grow
for RAID level 1/4/5/6. If the array was created with a size smaller
.BR \-Z ", " \-\-array-size=
This is only meaningful with
.B \-\-grow
-and its effect is not persistent: when the array is stopped an
+and its effect is not persistent: when the array is stopped and
restarted the default array size will be restored.
Setting the array-size causes the array to appear smaller to programs
is, it is required that the array size is reduced as appropriate
before the number of devices in the array is reduced.
+A suffix of 'M' or 'G' can be given to indicate Megabytes or
+Gigabytes respectively.
+A value of
+.B max
+restores the apparent size of the array to be whatever the real
+amount of available space is.
+
.TP
.BR \-c ", " \-\-chunk=
Specify chunk size of kibibytes. The default when creating an
default when Building and array with no persistent metadata is 64KB.
This is only meaningful for RAID0, RAID4, RAID5, RAID6, and RAID10.
+A suffix of 'M' or 'G' can be given to indicate Megabytes or
+Gigabytes respectively.
+
.TP
.BR \-\-rounding=
Specify rounding factor for a Linear array. The size of each
bitmap, the chunksize defaults to 64Meg, or larger if necessary to
fit the bitmap into the available space.
+A suffix of 'M' or 'G' can be given to indicate Megabytes or
+Gigabytes respectively.
+
.TP
.BR \-W ", " \-\-write\-mostly
subsequent devices listed in a
The file must be stored on a separate device, not on the RAID array
being reshaped.
-.TP
-.BR \-\-array-size= ", " \-Z
-Set the size of the array which is seen by users of the device such as
-filesystems. This can be less that the real size, but never greater.
-The size set this way does not persist across restarts of the array.
-
-This is most useful when reducing the number of devices in a RAID5 or
-RAID6. Such arrays require the array-size to be reduced before a
-reshape can be performed that reduces the real size.
-
-A value of
-.B max
-restores the apparent size of the array to be whatever the real
-amount of available space is.
-
.TP
.BR \-N ", " \-\-name=
Set a
.BR \-\-backup\-file=
If
.B \-\-backup\-file
-was used when requesting a grow, shrink, RAID level change or other
-reshape, and the system crashed during the critical section, then the
-same
+was used while reshaping an array (e.g. changing number of devices or
+chunk size) and the system crashed during the critical section, then the same
.B \-\-backup\-file
must be presented to
.B \-\-assemble
to allow possibly corrupted data to be restored, and the reshape
to be completed.
+.TP
+.BR \-\-invalid\-backup
+If the file needed for the above option is not available for any
+reason an empty file can be given together with this option to
+indicate that the backup file is invalid. In this case the data that
+was being rearranged at the time of the crash could be irrecoverably
+lost, but the rest of the array may still be recoverable. This option
+should only be used as a last resort if there is no way to recover the
+backup file.
+
+
.TP
.BR \-U ", " \-\-update=
Update the superblock on each device while assembling the array. The
.BR resync ,
.BR byteorder ,
.BR devicesize ,
+.BR no\-bitmap ,
or
.BR super\-minor .
The
.B devicesize
-will rarely be of use. It applies to version 1.1 and 1.2 metadata
+option will rarely be of use. It applies to version 1.1 and 1.2 metadata
only (where the metadata is at the start of the device) and is only
useful when the component device has changed size (typically become
larger). The version 1 metadata records the amount of the device that
to determine the maximum usable amount of space on each device and
update the relevant field in the metadata.
+The
+.B no\-bitmap
+option can be used when an array has an internal bitmap which is
+corrupt in some way so that assembling the array normally fails. It
+will cause any internal bitmap to be ignored.
+
.ig
.TP
.B \-\-auto\-update\-homehost
.BR \-a ", " \-\-add
hot-add listed devices.
If a device appears to have recently been part of the array
-(possibly it failed or was removed) the device is re-added as describe
+(possibly it failed or was removed) the device is re\-added as describe
in the next point.
If that fails or the device was never part of the array, the device is
added as a hot-spare.
it will be assumed that bitmap-based recovery is enough to make the
device fully consistent with the array.
+When
+.B \-\-re\-add
+can be accompanied by
+.BR \-\-update=devicesize .
+See the description of this option when used in Assemble mode for an
+explanation of its use.
+
If the device name given is
.B missing
then mdadm will try to find any device that looks like it should be
int layout = UnSet;
char *layout_str = NULL;
int raiddisks = 0;
- int max_disks = MD_SB_DISKS; /* just a default */
int sparedisks = 0;
- struct mddev_ident_s ident;
+ struct mddev_ident ident;
char *configfile = NULL;
char *cp;
char *update = NULL;
int scan = 0;
- char devmode = 0;
+ int devmode = 0;
int runstop = 0;
int readonly = 0;
int write_behind = 0;
int bitmap_fd = -1;
char *bitmap_file = NULL;
char *backup_file = NULL;
+ int invalid_backup = 0;
int bitmap_chunk = UnSet;
int SparcAdjust = 0;
- mddev_dev_t devlist = NULL;
- mddev_dev_t *devlistend = & devlist;
- mddev_dev_t dv;
+ struct mddev_dev *devlist = NULL;
+ struct mddev_dev **devlistend = & devlist;
+ struct mddev_dev *dv;
int devs_found = 0;
int verbose = 0;
int quiet = 0;
int daemonise = 0;
char *pidfile = NULL;
int oneshot = 0;
+ int spare_sharing = 1;
struct supertype *ss = NULL;
int writemostly = 0;
int re_add = 0;
int rebuild_map = 0;
int auto_update_home = 0;
char *subarray = NULL;
+ char *remove_path = NULL;
int print_help = 0;
FILE *outf;
int newmode = mode;
/* firstly, some mode-independent options */
switch(opt) {
+ case HelpOptions:
+ print_help = 2;
+ continue;
case 'h':
- if (option_index > 0 &&
- strcmp(long_options[option_index].name, "help-options")==0)
- print_help = 2;
- else
- print_help = 1;
+ print_help = 1;
continue;
case 'V':
continue;
case 'b':
- if (mode == ASSEMBLE || mode == BUILD || mode == CREATE || mode == GROW ||
- mode == INCREMENTAL || mode == MANAGE)
+ if (mode == ASSEMBLE || mode == BUILD || mode == CREATE
+ || mode == GROW || mode == INCREMENTAL
+ || mode == MANAGE)
break; /* b means bitmap */
+ case Brief:
brief = 1;
continue;
*/
switch(opt) {
- case '@': /* just incase they say --manage */
+ case ManageOpt:
newmode = MANAGE;
shortopt = short_bitmap_options;
break;
case 'a':
+ case Add:
case 'r':
+ case Remove:
case 'f':
+ case Fail:
case ReAdd: /* re-add */
if (!mode) {
newmode = MANAGE;
case AutoDetect:
newmode = AUTODETECT; break;
- case '#':
+ case MiscOpt:
case 'D':
case 'E':
case 'X':
case 'o':
case 'w':
case 'W':
+ case WaitOpt:
case Waitclean:
case DetailPlatform:
case KillSubarray:
case UpdateSubarray:
if (opt == KillSubarray || opt == UpdateSubarray) {
if (subarray) {
- fprintf(stderr, Name ": subarray can only be specified once\n");
+ fprintf(stderr, Name ": subarray can only"
+ " be specified once\n");
exit(2);
}
subarray = optarg;
}
case 'K': if (!mode) newmode = MISC; break;
+ case NoSharing: newmode = MONITOR; break;
}
if (mode && newmode == mode) {
/* everybody happy ! */
mode = newmode;
} else {
/* special case of -c --help */
- if (opt == 'c' &&
+ if ((opt == 'c' || opt == ConfigFile) &&
( strncmp(optarg, "--h", 3)==0 ||
strncmp(optarg, "-h", 2)==0)) {
fputs(Help_config, stdout);
dv->writemostly = writemostly;
dv->re_add = re_add;
dv->used = 0;
- dv->content = NULL;
dv->next = NULL;
*devlistend = dv;
devlistend = &dv->next;
/* if we just set the mode, then done */
switch(opt) {
- case '@':
- case '#':
+ case ManageOpt:
+ case MiscOpt:
case 'A':
case 'B':
case 'C':
if (opt == 1) {
/* an undecorated option - must be a device name.
*/
- if (devs_found > 0 && mode == '@' && !devmode) {
- fprintf(stderr, Name ": Must give one of -a/-r/-f for subsequent devices at %s\n", optarg);
+ if (devs_found > 0 && mode == MANAGE && !devmode) {
+ fprintf(stderr, Name ": Must give one of -a/-r/-f"
+ " for subsequent devices at %s\n", optarg);
exit(2);
}
- if (devs_found > 0 && mode == 'G' && !devmode) {
- fprintf(stderr, Name ": Must give one of -a for devices do add: %s\n", optarg);
+ if (devs_found > 0 && mode == GROW && !devmode) {
+ fprintf(stderr, Name ": Must give -a/--add for"
+ " devices to add: %s\n", optarg);
exit(2);
}
dv = malloc(sizeof(*dv));
dv->writemostly = writemostly;
dv->re_add = re_add;
dv->used = 0;
- dv->content = NULL;
dv->next = NULL;
*devlistend = dv;
devlistend = &dv->next;
/* We've got a mode, and opt is now something else which
* could depend on the mode */
-#define O(a,b) ((a<<8)|b)
+#define O(a,b) ((a<<16)|b)
switch (O(mode,opt)) {
case O(GROW,'c'):
+ case O(GROW,ChunkSize):
case O(CREATE,'c'):
+ case O(CREATE,ChunkSize):
case O(BUILD,'c'): /* chunk or rounding */
+ case O(BUILD,ChunkSize): /* chunk or rounding */
if (chunk) {
fprintf(stderr, Name ": chunk/rounding may only be specified once. "
"Second value is %s.\n", optarg);
exit(2);
}
- chunk = strtol(optarg, &c, 10);
- if (!optarg[0] || *c || chunk<4 || ((chunk-1)&chunk)) {
+ chunk = parse_size(optarg);
+ if (chunk < 8 || ((chunk-1)&chunk)) {
fprintf(stderr, Name ": invalid chunk/rounding value: %s\n",
optarg);
exit(2);
}
+ /* Covert sectors to K */
+ chunk /= 2;
continue;
#if 0
fprintf(stderr, Name ": unrecognised metadata identifier: %s\n", optarg);
exit(2);
}
- max_disks = ss->max_devs;
continue;
case O(MANAGE,'W'):
+ case O(MANAGE,WriteMostly):
case O(BUILD,'W'):
+ case O(BUILD,WriteMostly):
case O(CREATE,'W'):
+ case O(CREATE,WriteMostly):
/* set write-mostly for following devices */
writemostly = 1;
continue;
continue;
case O(GROW, 'p'): /* new layout */
+ case O(GROW, Layout):
if (layout_str) {
fprintf(stderr,Name ": layout may only be sent once. "
"Second value was %s\n", optarg);
continue;
case O(CREATE,'p'): /* raid5 layout */
+ case O(CREATE,Layout):
case O(BUILD,'p'): /* faulty layout */
+ case O(BUILD,Layout):
if (layout != UnSet) {
fprintf(stderr,Name ": layout may only be sent once. "
"Second value was %s\n", optarg);
continue;
case O(CREATE,'a'):
+ case O(CREATE,Auto):
case O(BUILD,'a'):
+ case O(BUILD,Auto):
case O(INCREMENTAL,'a'):
- case O(ASSEMBLE,'a'): /* auto-creation of device node */
+ case O(INCREMENTAL,Auto):
+ case O(ASSEMBLE,'a'):
+ case O(ASSEMBLE,Auto): /* auto-creation of device node */
autof = parse_auto(optarg, "--auto flag", 0);
continue;
continue;
case O(BUILD,'f'): /* force honouring '-n 1' */
+ case O(BUILD,Force): /* force honouring '-n 1' */
case O(GROW,'f'): /* ditto */
+ case O(GROW,Force): /* ditto */
case O(CREATE,'f'): /* force honouring of device list */
+ case O(CREATE,Force): /* force honouring of device list */
case O(ASSEMBLE,'f'): /* force assembly */
+ case O(ASSEMBLE,Force): /* force assembly */
case O(MISC,'f'): /* force zero */
+ case O(MISC,Force): /* force zero */
force=1;
continue;
continue;
case O(ASSEMBLE,'m'): /* super-minor for array */
+ case O(ASSEMBLE,SuperMinor):
if (ident.super_minor != UnSet) {
fprintf(stderr, Name ": super-minor cannot be set twice. "
"Second value: %s.\n", optarg);
case O(ASSEMBLE,'U'): /* update the superblock */
case O(MISC,'U'):
if (update) {
- fprintf(stderr, Name ": Can only update one aspect of superblock, both %s and %s given.\n",
+ fprintf(stderr, Name ": Can only update one aspect"
+ " of superblock, both %s and %s given.\n",
update, optarg);
exit(2);
}
if (mode == MISC && !subarray) {
- fprintf(stderr, Name ": Only subarrays can be updated in misc mode\n");
+ fprintf(stderr, Name ": Only subarrays can be"
+ " updated in misc mode\n");
exit(2);
}
update = optarg;
continue;
if (strcmp(update, "devicesize")==0)
continue;
+ if (strcmp(update, "no-bitmap")==0)
+ continue;
if (strcmp(update, "byteorder")==0) {
if (ss) {
- fprintf(stderr, Name ": must not set metadata type with --update=byteorder.\n");
+ fprintf(stderr,
+ Name ": must not set metadata"
+ " type with --update=byteorder.\n");
exit(2);
}
for(i=0; !ss && superlist[i]; i++)
- ss = superlist[i]->match_metadata_desc("0.swap");
+ ss = superlist[i]->match_metadata_desc(
+ "0.swap");
if (!ss) {
- fprintf(stderr, Name ": INTERNAL ERROR cannot find 0.swap\n");
+ fprintf(stderr, Name ": INTERNAL ERROR"
+ " cannot find 0.swap\n");
exit(2);
}
}
fprintf(outf, "Valid --update options are:\n"
" 'sparc2.2', 'super-minor', 'uuid', 'name', 'resync',\n"
- " 'summaries', 'homehost', 'byteorder', 'devicesize'.\n");
+ " 'summaries', 'homehost', 'byteorder', 'devicesize',\n"
+ " 'no-bitmap'\n");
exit(outf == stdout ? 0 : 2);
+ case O(MANAGE,'U'):
+ /* update=devicesize is allowed with --re-add */
+ if (devmode != 'a' || re_add != 1) {
+ fprintf(stderr, Name "--update in Manage mode only"
+ " allowed with --re-add.\n");
+ exit(1);
+ }
+ if (update) {
+ fprintf(stderr, Name ": Can only update one aspect"
+ " of superblock, both %s and %s given.\n",
+ update, optarg);
+ exit(2);
+ }
+ update = optarg;
+ if (strcmp(update, "devicesize") != 0) {
+ fprintf(stderr, Name ": only 'devicesize' can be"
+ " updated with --re-add\n");
+ exit(2);
+ }
+ continue;
+
case O(INCREMENTAL,NoDegraded):
fprintf(stderr, Name ": --no-degraded is deprecated in Incremental mode\n");
case O(ASSEMBLE,NoDegraded): /* --no-degraded */
* so we overload slightly */
continue;
- case O(ASSEMBLE,'c'): /* config file */
+ case O(ASSEMBLE,'c'):
+ case O(ASSEMBLE,ConfigFile):
case O(INCREMENTAL, 'c'):
+ case O(INCREMENTAL, ConfigFile):
case O(MISC, 'c'):
+ case O(MISC, ConfigFile):
case O(MONITOR,'c'):
+ case O(MONITOR,ConfigFile):
if (configfile) {
fprintf(stderr, Name ": configfile cannot be set twice. "
"Second value is %s.\n", optarg);
continue;
case O(MONITOR,'m'): /* mail address */
+ case O(MONITOR,EMail):
if (mailaddr)
fprintf(stderr, Name ": only specify one mailaddress. %s ignored.\n",
optarg);
continue;
case O(MONITOR,'p'): /* alert program */
+ case O(MONITOR,ProgramOpt): /* alert program */
if (program)
fprintf(stderr, Name ": only specify one alter program. %s ignored.\n",
optarg);
continue;
case O(MONITOR,'r'): /* rebuild increments */
+ case O(MONITOR,Increment):
increments = atoi(optarg);
if (increments>99 || increments<1) {
fprintf(stderr, Name ": please specify positive integer between 1 and 99 as rebuild increments.\n");
}
continue;
case O(MONITOR,'f'): /* daemonise */
+ case O(MONITOR,Fork):
daemonise = 1;
continue;
case O(MONITOR,'i'): /* pid */
openlog("mdadm", LOG_PID, SYSLOG_FACILITY);
dosyslog = 1;
continue;
-
+ case O(MONITOR, NoSharing):
+ spare_sharing = 0;
+ continue;
/* now the general management options. Some are applicable
* to other modes. None have arguments.
*/
case O(GROW,'a'):
- case O(MANAGE,'a'): /* add a drive */
+ case O(GROW,Add):
+ case O(MANAGE,'a'):
+ case O(MANAGE,Add): /* add a drive */
devmode = 'a';
re_add = 0;
continue;
re_add = 1;
continue;
case O(MANAGE,'r'): /* remove a drive */
+ case O(MANAGE,Remove):
devmode = 'r';
continue;
case O(MANAGE,'f'): /* set faulty */
- case O(INCREMENTAL,'f'): /* r for incremental is taken, use f
+ case O(MANAGE,Fail):
+ case O(INCREMENTAL,'f'):
+ case O(INCREMENTAL,Remove):
+ case O(INCREMENTAL,Fail): /* r for incremental is taken, use f
* even though we will both fail and
* remove the device */
devmode = 'f';
case O(MISC,'o'):
case O(MISC,'w'):
case O(MISC,'W'):
+ case O(MISC, WaitOpt):
case O(MISC, Waitclean):
case O(MISC, DetailPlatform):
case O(MISC, KillSubarray):
continue;
case O(ASSEMBLE,'b'): /* here we simply set the bitmap file */
+ case O(ASSEMBLE,Bitmap):
if (!optarg) {
fprintf(stderr, Name ": bitmap file needed with -b in --assemble mode\n");
exit(2);
backup_file = optarg;
continue;
+ case O(ASSEMBLE, InvalidBackup):
+ /* Acknowledge that the backupfile is invalid, but ask
+ * to continue anyway
+ */
+ invalid_backup = 1;
+ continue;
+
case O(BUILD,'b'):
- case O(CREATE,'b'): /* here we create the bitmap */
+ case O(BUILD,Bitmap):
+ case O(CREATE,'b'):
+ case O(CREATE,Bitmap): /* here we create the bitmap */
if (strcmp(optarg, "none") == 0) {
fprintf(stderr, Name ": '--bitmap none' only"
" support for --grow\n");
}
/* FALL THROUGH */
case O(GROW,'b'):
+ case O(GROW,Bitmap):
if (strcmp(optarg, "internal")== 0 ||
strcmp(optarg, "none")== 0 ||
strchr(optarg, '/') != NULL) {
case O(GROW,BitmapChunk):
case O(BUILD,BitmapChunk):
case O(CREATE,BitmapChunk): /* bitmap chunksize */
- bitmap_chunk = strtol(optarg, &c, 10);
- if (!optarg[0] || *c || bitmap_chunk < 0 ||
- bitmap_chunk & (bitmap_chunk - 1)) {
- fprintf(stderr, Name ": invalid bitmap chunksize: %s\n",
- optarg);
+ bitmap_chunk = parse_size(optarg);
+ if (bitmap_chunk < 0 ||
+ bitmap_chunk & (bitmap_chunk - 1)) {
+ fprintf(stderr,
+ Name ": invalid bitmap chunksize: %s\n",
+ optarg);
exit(2);
}
- /* convert K to B, chunk of 0K means 512B */
- bitmap_chunk = bitmap_chunk ? bitmap_chunk * 1024 : 512;
+ /* convert sectors to B, chunk of 0 means 512B */
+ bitmap_chunk = bitmap_chunk ? bitmap_chunk * 512 : 512;
continue;
case O(GROW, WriteBehind):
continue;
case O(INCREMENTAL, 'r'):
+ case O(INCREMENTAL, RebuildMapOpt):
rebuild_map = 1;
continue;
+ case O(INCREMENTAL, IncrementalPath):
+ remove_path = optarg;
+ continue;
}
/* We have now processed all the valid options. Anything else is
* an error
}
if (raiddisks) {
- if (raiddisks > max_disks) {
- fprintf(stderr, Name ": invalid number of raid devices: %d\n",
- raiddisks);
- exit(2);
- }
if (raiddisks == 1 && !force && level != -5) {
fprintf(stderr, Name ": '1' is an unusual number of drives for an array, so it is probably\n"
" a mistake. If you really mean it you will need to specify --force before\n"
exit(2);
}
}
- if (sparedisks) {
- if ( sparedisks > max_disks - raiddisks) {
- fprintf(stderr, Name ": invalid number of spare-devices: %d\n",
- sparedisks);
- exit(2);
- }
- }
if (homehost == NULL)
homehost = conf_get_homehost(&require_homehost);
rv = Manage_ro(devlist->devname, mdfd, readonly);
if (!rv && devs_found>1)
rv = Manage_subdevs(devlist->devname, mdfd,
- devlist->next, verbose-quiet, test);
+ devlist->next, verbose-quiet, test,
+ update);
if (!rv && readonly < 0)
rv = Manage_ro(devlist->devname, mdfd, readonly);
if (!rv && runstop)
if (devs_found == 1 && ident.uuid_set == 0 &&
ident.super_minor == UnSet && ident.name[0] == 0 && !scan ) {
/* Only a device has been given, so get details from config file */
- mddev_ident_t array_ident = conf_get_ident(devlist->devname);
+ struct mddev_ident *array_ident = conf_get_ident(devlist->devname);
if (array_ident == NULL) {
fprintf(stderr, Name ": %s not identified in config file.\n",
devlist->devname);
if (array_ident->autof == 0)
array_ident->autof = autof;
rv |= Assemble(ss, devlist->devname, array_ident,
- NULL, backup_file,
+ NULL, backup_file, invalid_backup,
readonly, runstop, update,
homehost, require_homehost,
verbose-quiet, force);
}
} else if (!scan)
rv = Assemble(ss, devlist->devname, &ident,
- devlist->next, backup_file,
+ devlist->next, backup_file, invalid_backup,
readonly, runstop, update,
homehost, require_homehost,
verbose-quiet, force);
exit(1);
}
for (dv = devlist ; dv ; dv=dv->next) {
- mddev_ident_t array_ident = conf_get_ident(dv->devname);
+ struct mddev_ident *array_ident = conf_get_ident(dv->devname);
if (array_ident == NULL) {
fprintf(stderr, Name ": %s not identified in config file.\n",
dv->devname);
if (array_ident->autof == 0)
array_ident->autof = autof;
rv |= Assemble(ss, dv->devname, array_ident,
- NULL, backup_file,
+ NULL, backup_file, invalid_backup,
readonly, runstop, update,
homehost, require_homehost,
verbose-quiet, force);
}
} else {
- mddev_ident_t a, array_list = conf_get_ident(NULL);
- mddev_dev_t devlist = conf_get_devs();
+ struct mddev_ident *a, *array_list = conf_get_ident(NULL);
+ struct mddev_dev *devlist = conf_get_devs();
int cnt = 0;
int failures, successes;
if (devlist == NULL) {
r = Assemble(ss, a->devname,
a,
- NULL, NULL,
+ NULL, NULL, 0,
readonly, runstop, NULL,
homehost, require_homehost,
verbose-quiet, force);
int acnt;
ident.autof = autof;
do {
- mddev_dev_t devlist = conf_get_devs();
+ struct mddev_dev *devlist = conf_get_devs();
acnt = 0;
do {
rv2 = Assemble(ss, NULL,
&ident,
- devlist, NULL,
+ devlist, NULL, 0,
readonly, runstop, NULL,
homehost, require_homehost,
verbose-quiet, force);
do {
acnt = 0;
do {
- rv2 = Assemble(ss, NULL,
- &ident,
- NULL, NULL,
- readonly, runstop, "homehost",
- homehost, require_homehost,
- verbose-quiet, force);
+ rv2 = Assemble(
+ ss, NULL,
+ &ident,
+ NULL, NULL, 0,
+ readonly, runstop,
+ "homehost",
+ homehost,
+ require_homehost,
+ verbose-quiet, force);
if (rv2==0) {
cnt++;
acnt++;
case 'X':
rv |= ExamineBitmap(dv->devname, brief, ss); continue;
case 'W':
+ case WaitOpt:
rv |= Wait(dv->devname); continue;
case Waitclean:
rv |= WaitClean(dv->devname, -1, verbose-quiet); continue;
break;
}
if (delay == 0) {
- if (get_linux_version() > 20616)
+ if (get_linux_version() > 2006016)
/* mdstat responds to poll */
delay = 1000;
else
}
rv= Monitor(devlist, mailaddr, program,
delay?delay:60, daemonise, scan, oneshot,
- dosyslog, test, pidfile, increments);
+ dosyslog, test, pidfile, increments, spare_sharing);
break;
case GROW:
} else if (size >= 0 || raiddisks != 0 || layout_str != NULL
|| chunk != 0 || level != UnSet) {
rv = Grow_reshape(devlist->devname, mdfd, quiet, backup_file,
- size, level, layout_str, chunk, raiddisks);
+ size, level, layout_str, chunk, raiddisks,
+ force);
} else if (array_size < 0)
fprintf(stderr, Name ": no changes to --grow\n");
break;
rv = 1;
break;
}
- if (devmode == 'f') {
- rv = IncrementalRemove(devlist->devname, verbose-quiet);
- break;
- }
- rv = Incremental(devlist->devname, verbose-quiet, runstop,
- ss, homehost, require_homehost, autof);
+ if (devmode == 'f')
+ rv = IncrementalRemove(devlist->devname, remove_path,
+ verbose-quiet);
+ else
+ rv = Incremental(devlist->devname, verbose-quiet,
+ runstop, ss, homehost,
+ require_homehost, autof);
break;
case AUTODETECT:
autodetect();
#define MDMON_DIR "/dev/.mdadm/"
#endif /* MDMON_DIR */
+/* FAILED_SLOTS is where to save files storing recent removal of array
+ * member in order to allow future reuse of disk inserted in the same
+ * slot for array recovery
+ */
+#ifndef FAILED_SLOTS_DIR
+#define FAILED_SLOTS_DIR "/dev/.mdadm/failed-slots"
+#endif /* FAILED_SLOTS */
+
#include "md_u.h"
#include "md_p.h"
#include "bitmap.h"
Help_manage[], Help_misc[], Help_monitor[], Help_config[];
/* for option that don't have short equivilents, we assign arbitrary
- * small numbers. '1' means an undecorated option, so we start at '2'.
- * (note we must stop before we get to 65 i.e. 'A')
+ * numbers later than any 'short' character option.
*/
enum special_options {
- AssumeClean = 2,
+ AssumeClean = 300,
BitmapChunk,
WriteBehind,
ReAdd,
NoDegraded,
Sparc22,
- BackupFile, /* 8 */
+ BackupFile,
HomeHost,
AutoHomeHost,
Symlinks,
Waitclean,
DetailPlatform,
KillSubarray,
- UpdateSubarray, /* 16 */
+ UpdateSubarray,
+ IncrementalPath,
+ NoSharing,
+ HelpOptions,
+ Brief,
+ ManageOpt,
+ Add,
+ Remove,
+ Fail,
+ MiscOpt,
+ WaitOpt,
+ ConfigFile,
+ ChunkSize,
+ WriteMostly,
+ Layout,
+ Auto,
+ Force,
+ SuperMinor,
+ EMail,
+ ProgramOpt,
+ Increment,
+ Fork,
+ Bitmap,
+ RebuildMapOpt,
+ InvalidBackup,
};
/* structures read from config file */
* devices is considered
*/
#define UnSet (0xfffe)
-typedef struct mddev_ident_s {
+struct mddev_ident {
char *devname;
int uuid_set;
*/
char *member; /* subarray within a container */
- struct mddev_ident_s *next;
+ struct mddev_ident *next;
union {
/* fields needed by different users of this structure */
int assembled; /* set when assembly succeeds */
};
-} *mddev_ident_t;
+};
/* List of device names - wildcards expanded */
-typedef struct mddev_dev_s {
+struct mddev_dev {
char *devname;
- char disposition; /* 'a' for add, 'r' for remove, 'f' for fail.
+ int disposition; /* 'a' for add, 'r' for remove, 'f' for fail.
* Not set for names read from .config
*/
char writemostly; /* 1 for 'set writemostly', 2 for 'clear writemostly' */
char re_add;
char used; /* set when used */
- struct mdinfo *content; /* If devname is a container, this might list
- * the remaining member arrays. */
- struct mddev_dev_s *next;
-} *mddev_dev_t;
+ struct mddev_dev *next;
+};
typedef struct mapping {
char *name;
char *level;
char *pattern; /* U or up, _ for down */
int percent; /* -1 if no resync */
- int resync; /* 1 if resync, 0 if recovery */
+ int resync; /* 3 if check, 2 if reshape, 1 if resync, 0 if recovery */
int devcnt;
int raid_disks;
- int chunk_size;
char * metadata_version;
struct dev_member {
char *name;
extern int sysfs_get_ll(struct mdinfo *sra, struct mdinfo *dev,
char *name, unsigned long long *val);
extern int sysfs_fd_get_str(int fd, char *val, int size);
+extern int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev,
+ char *name);
extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
char *name, char *val, int size);
extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms);
extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume);
extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
extern int sysfs_unique_holder(int devnum, long rdev);
+extern int sysfs_freeze_array(struct mdinfo *sra);
extern int load_sys(char *path, char *buf);
-
+extern int reshape_prepare_fdlist(char *devname,
+ struct mdinfo *sra,
+ int raid_disks,
+ int nrdisks,
+ unsigned long blocks,
+ char *backup_file,
+ int *fdlist,
+ unsigned long long *offsets);
+extern void reshape_free_fdlist(int *fdlist,
+ unsigned long long *offsets,
+ int size);
+extern int reshape_open_backup_file(char *backup,
+ int fd,
+ char *devname,
+ long blocks,
+ int *fdlist,
+ unsigned long long *offsets);
+extern unsigned long compute_backup_blocks(int nchunk, int ochunk,
+ unsigned int ndata, unsigned int odata);
extern int save_stripes(int *source, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
* The particular device should be:
* The last device added by add_to_super
* The device the metadata was loaded from by load_super
+ * If 'map' is present, then it is an array raid_disks long
+ * (raid_disk must already be set and correct) and it is filled
+ * with 1 for slots that are thought to be active and 0 for slots which
+ * appear to be failed/missing.
*/
- void (*getinfo_super)(struct supertype *st, struct mdinfo *info);
-
+ void (*getinfo_super)(struct supertype *st, struct mdinfo *info, char *map);
+ struct mdinfo *(*getinfo_super_disks)(struct supertype *st);
/* Check if the given metadata is flagged as belonging to "this"
* host. 0 for 'no', 1 for 'yes', -1 for "Don't record homehost"
*/
int (*write_init_super)(struct supertype *st);
int (*compare_super)(struct supertype *st, struct supertype *tst);
int (*load_super)(struct supertype *st, int fd, char *devname);
+ int (*load_container)(struct supertype *st, int fd, char *devname);
struct supertype * (*match_metadata_desc)(char *arg);
__u64 (*avail_size)(struct supertype *st, __u64 size);
+ unsigned long long (*min_acceptable_spare_size)(struct supertype *st);
int (*add_internal_bitmap)(struct supertype *st, int *chunkp,
int delay, int write_behind,
unsigned long long size, int may_change, int major);
* added to validate changing size and new devices. If there are
* inter-device dependencies, it should record sufficient details
* so these can be validated.
- * Both 'size' and '*freesize' are in sectors. chunk is bytes.
+ * Both 'size' and '*freesize' are in sectors. chunk is KiB.
*/
int (*validate_geometry)(struct supertype *st, int level, int layout,
int raiddisks,
char *subdev, unsigned long long *freesize,
int verbose);
- struct mdinfo *(*container_content)(struct supertype *st);
- /* Allow a metadata handler to override mdadm's default layouts */
- int (*default_layout)(int level); /* optional */
- /* query the supertype for default chunk size */
- int (*default_chunk)(struct supertype *st); /* optional */
+ struct mdinfo *(*container_content)(struct supertype *st, char *subarray);
+ /* query the supertype for default geometry */
+ void (*default_geometry)(struct supertype *st, int *level, int *layout, int *chunk); /* optional */
/* Permit subarray's to be deleted from inactive containers */
int (*kill_subarray)(struct supertype *st); /* optional */
/* Permit subarray's to be modified */
- int (*update_subarray)(struct supertype *st, char *update, mddev_ident_t ident); /* optional */
+ int (*update_subarray)(struct supertype *st, char *subarray,
+ char *update, struct mddev_ident *ident); /* optional */
+ /* Check if reshape is supported for this external format.
+ * st is obtained from super_by_fd() where st->subarray[0] is
+ * initialized to indicate if reshape is being performed at the
+ * container or subarray level
+ */
+ int (*reshape_super)(struct supertype *st, long long size, int level,
+ int layout, int chunksize, int raid_disks,
+ char *backup, char *dev, int verbose); /* optional */
+ int (*manage_reshape)(struct supertype *st, char *backup); /* optional */
/* for mdmon */
int (*open_new)(struct supertype *c, struct active_array *a,
*/
struct mdinfo *(*activate_spare)(struct active_array *a,
struct metadata_update **updates);
+ /*
+ * Return statically allocated string that represents metadata specific
+ * controller domain of the disk. The domain is used in disk domain
+ * matching functions. Disks belong to the same domain if the they have
+ * the same domain from mdadm.conf and belong the same metadata domain.
+ * Returning NULL or not providing this handler means that metadata
+ * does not distinguish the differences between disks that belong to
+ * different controllers. They are in the domain specified by
+ * configuration file (mdadm.conf).
+ * In case when the metadata has the notion of domains based on disk
+ * it shall return NULL for disks that do not belong to the controller
+ * the supported domains. Such disks will form another domain and won't
+ * be mixed with supported ones.
+ */
+ const char *(*get_disk_controller_domain)(const char *path);
int swapuuid; /* true if uuid is bigending rather than hostendian */
int external;
const char *name; /* canonical metadata name */
-} super0, super1, super_ddf, *superlist[];
+} *superlist[];
-extern struct superswitch super_imsm;
+extern struct superswitch super0, super1;
+extern struct superswitch super_imsm, super_ddf;
+extern struct superswitch mbr, gpt;
struct metadata_update {
int len;
int minor_version;
int max_devs;
int container_dev; /* devnum of container */
- char subarray[32]; /* name of array inside container */
void *sb;
void *info;
- int loaded_container; /* Set if load_super found a container,
- * not just one device */
struct metadata_update *updates;
struct metadata_update **update_tail;
};
-extern struct supertype *super_by_fd(int fd);
-extern struct supertype *guess_super(int fd);
+extern struct supertype *super_by_fd(int fd, char **subarray);
+enum guess_types { guess_any, guess_array, guess_partitions };
+extern struct supertype *guess_super_type(int fd, enum guess_types guess_type);
+static inline struct supertype *guess_super(int fd) {
+ return guess_super_type(fd, guess_any);
+}
extern struct supertype *dup_super(struct supertype *st);
extern int get_dev_size(int fd, char *dname, unsigned long long *sizep);
+extern int must_be_container(int fd);
+extern int dev_size_from_id(dev_t id, unsigned long long *size);
extern void get_one_disk(int mdfd, mdu_array_info_t *ainf,
mdu_disk_info_t *disk);
void wait_for(char *dev, int fd);
+/*
+ * Data structures for policy management.
+ * Each device can have a policy structure that lists
+ * various name/value pairs each possibly with a metadata associated.
+ * The policy list is sorted by name/value/metadata
+ */
+struct dev_policy {
+ struct dev_policy *next;
+ char *name; /* None of these strings are allocated. They are
+ * all just references to strings which are known
+ * to exist elsewhere.
+ * name and metadata can be compared by address equality.
+ */
+ const char *metadata;
+ const char *value;
+};
+
+extern char pol_act[], pol_domain[], pol_metadata[], pol_auto[];
+
+/* iterate over the sublist starting at list, having the same
+ * 'name' as 'list', and matching the given metadata (Where
+ * NULL matches anything
+ */
+#define pol_for_each(item, list, _metadata) \
+ for (item = list; \
+ item && item->name == list->name; \
+ item = item->next) \
+ if (!(!_metadata || !item->metadata || _metadata == item->metadata)) \
+ ; else
+
+/*
+ * policy records read from mdadm are largely just name-value pairs.
+ * The names are constants, not strdupped
+ */
+struct pol_rule {
+ struct pol_rule *next;
+ char *type; /* rule_policy or rule_part */
+ struct rule {
+ struct rule *next;
+ char *name;
+ char *value;
+ char *dups; /* duplicates of 'value' with a partNN appended */
+ } *rule;
+};
+
+extern char rule_policy[], rule_part[];
+extern char rule_path[], rule_type[];
+extern char type_part[], type_disk[];
+
+extern void policyline(char *line, char *type);
+extern void policy_add(char *type, ...);
+extern void policy_free(void);
+
+extern struct dev_policy *path_policy(char *path, char *type);
+extern struct dev_policy *disk_policy(struct mdinfo *disk);
+extern struct dev_policy *devnum_policy(int dev);
+extern void dev_policy_free(struct dev_policy *p);
+
+//extern void pol_new(struct dev_policy **pol, char *name, char *val, char *metadata);
+extern void pol_add(struct dev_policy **pol, char *name, char *val, char *metadata);
+extern struct dev_policy *pol_find(struct dev_policy *pol, char *name);
+
+enum policy_action {
+ act_default,
+ act_include,
+ act_re_add,
+ act_spare, /* This only applies to bare devices */
+ act_spare_same_slot, /* this allows non-bare devices,
+ * but only if recent removal */
+ act_force_spare, /* this allow non-bare devices in any case */
+ act_err
+};
+
+extern int policy_action_allows(struct dev_policy *plist, const char *metadata,
+ enum policy_action want);
+extern int disk_action_allows(struct mdinfo *disk, const char *metadata,
+ enum policy_action want);
+
+struct domainlist {
+ struct domainlist *next;
+ const char *dom;
+};
+
+extern int domain_test(struct domainlist *dom, struct dev_policy *pol,
+ const char *metadata);
+extern struct domainlist *domain_from_array(struct mdinfo *mdi,
+ const char *metadata);
+extern void domainlist_add_dev(struct domainlist **dom, int devnum,
+ const char *metadata);
+extern void domain_free(struct domainlist *dl);
+extern void domain_merge(struct domainlist **domp, struct dev_policy *pol,
+ const char *metadata);
+void domain_add(struct domainlist **domp, char *domain);
+
+extern void policy_save_path(char *id_path, struct map_ent *array);
+extern int policy_check_path(struct mdinfo *disk, struct map_ent *array);
+
#if __GNUC__ < 3
struct stat64;
#endif
extern int Manage_runstop(char *devname, int fd, int runstop, int quiet);
extern int Manage_resize(char *devname, int fd, long long size, int raid_disks);
extern int Manage_subdevs(char *devname, int fd,
- mddev_dev_t devlist, int verbose, int test);
+ struct mddev_dev *devlist, int verbose, int test,
+ char *update);
extern int autodetect(void);
extern int Grow_Add_device(char *devname, int fd, char *newdev);
extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind, int force);
extern int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
long long size,
- int level, char *layout_str, int chunksize, int raid_disks);
+ int level, char *layout_str, int chunksize, int raid_disks,
+ int force);
extern int Grow_restart(struct supertype *st, struct mdinfo *info,
int *fdlist, int cnt, char *backup_file, int verbose);
extern int Grow_continue(int mdfd, struct supertype *st,
struct mdinfo *info, char *backup_file);
extern int Assemble(struct supertype *st, char *mddev,
- mddev_ident_t ident,
- mddev_dev_t devlist, char *backup_file,
+ struct mddev_ident *ident,
+ struct mddev_dev *devlist,
+ char *backup_file, int invalid_backup,
int readonly, int runstop,
char *update, char *homehost, int require_homehost,
int verbose, int force);
extern int Build(char *mddev, int chunk, int level, int layout,
- int raiddisks, mddev_dev_t devlist, int assume_clean,
+ int raiddisks, struct mddev_dev *devlist, int assume_clean,
char *bitmap_file, int bitmap_chunk, int write_behind,
int delay, int verbose, int autof, unsigned long long size);
extern int Create(struct supertype *st, char *mddev,
int chunk, int level, int layout, unsigned long long size, int raiddisks, int sparedisks,
char *name, char *homehost, int *uuid,
- int subdevs, mddev_dev_t devlist,
+ int subdevs, struct mddev_dev *devlist,
int runstop, int verbose, int force, int assume_clean,
char *bitmap_file, int bitmap_chunk, int write_behind, int delay, int autof);
extern int Detail(char *dev, int brief, int export, int test, char *homehost);
extern int Detail_Platform(struct superswitch *ss, int scan, int verbose);
extern int Query(char *dev);
-extern int Examine(mddev_dev_t devlist, int brief, int export, int scan,
+extern int Examine(struct mddev_dev *devlist, int brief, int export, int scan,
int SparcAdjust, struct supertype *forcest, char *homehost);
-extern int Monitor(mddev_dev_t devlist,
+extern int Monitor(struct mddev_dev *devlist,
char *mailaddr, char *alert_cmd,
int period, int daemonise, int scan, int oneshot,
- int dosyslog, int test, char *pidfile, int increments);
+ int dosyslog, int test, char *pidfile, int increments,
+ int share);
extern int Kill(char *dev, struct supertype *st, int force, int quiet, int noexcl);
extern int Kill_subarray(char *dev, char *subarray, int quiet);
-extern int Update_subarray(char *dev, char *subarray, char *update, mddev_ident_t ident, int quiet);
+extern int Update_subarray(char *dev, char *subarray, char *update, struct mddev_ident *ident, int quiet);
extern int Wait(char *dev);
extern int WaitClean(char *dev, int sock, int verbose);
extern int Incremental(char *devname, int verbose, int runstop,
struct supertype *st, char *homehost, int require_homehost,
int autof);
-extern int Incremental_container(struct supertype *st, char *devname,
- int verbose, int runstop, int autof,
- int trustworthy);
extern void RebuildMap(void);
extern int IncrementalScan(int verbose);
-extern int IncrementalRemove(char *devname, int verbose);
+extern int IncrementalRemove(char *devname, char *path, int verbose);
extern int CreateBitmap(char *filename, int force, char uuid[16],
unsigned long chunksize, unsigned long daemon_sleep,
unsigned long write_behind,
extern int md_get_version(int fd);
extern int get_linux_version(void);
+extern int mdadm_version(char *version);
extern long long parse_size(char *size);
extern int parse_uuid(char *str, int uuid[4]);
extern int parse_layout_10(char *layout);
extern int same_dev(char *one, char *two);
extern int parse_auto(char *str, char *msg, int config);
-extern mddev_ident_t conf_get_ident(char *dev);
-extern mddev_dev_t conf_get_devs(void);
+extern struct mddev_ident *conf_get_ident(char *dev);
+extern struct mddev_dev *conf_get_devs(void);
extern int conf_test_dev(char *devname);
-extern int conf_test_metadata(const char *version, int is_homehost);
+extern int conf_test_metadata(const char *version, struct dev_policy *pol, int is_homehost);
extern struct createinfo *conf_get_create_info(void);
extern void set_conffile(char *file);
extern char *conf_get_mailaddr(void);
extern char *conf_word(FILE *file, int allow_key);
extern int conf_name_is_free(char *name);
extern int devname_matches(char *name, char *match);
-extern struct mddev_ident_s *conf_match(struct mdinfo *info, struct supertype *st);
+extern struct mddev_ident *conf_match(struct mdinfo *info, struct supertype *st);
+extern int experimental(void);
extern void free_line(char *line);
extern int match_oneof(char *devices, char *devname);
extern unsigned long calc_csum(void *super, int bytes);
extern int enough(int level, int raid_disks, int layout, int clean,
char *avail, int avail_disks);
+extern int enough_fd(int fd);
extern int ask(char *mesg);
extern unsigned long long get_component_size(int fd);
extern void remove_partitions(int fd);
extern int add_disk(int mdfd, struct supertype *st,
struct mdinfo *sra, struct mdinfo *info);
+extern int remove_disk(int mdfd, struct supertype *st,
+ struct mdinfo *sra, struct mdinfo *info);
extern int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info);
unsigned long long min_recovery_start(struct mdinfo *array);
extern int is_container_member(struct mdstat_ent *ent, char *devname);
extern int is_subarray_active(char *subarray, char *devname);
int is_container_active(char *devname);
-extern int open_subarray(char *dev, struct supertype *st, int quiet);
+extern int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet);
extern struct superswitch *version_to_superswitch(char *vers);
extern int mdmon_running(int devnum);
/* The version string for a 'subarray' (an array in a container)
* is
* /containername/componentname for normal read-write arrays
- * -containername/componentname for read-only arrays.
+ * -containername/componentname for arrays which mdmon must not
+ * reconfigure. They might be read-only
+ * or might be undergoing reshape etc.
* containername is e.g. md0, md_d1
* componentname is dependant on the metadata. e.g. '1' 'S1' ...
*/
return (*vers == '/' || *vers == '-');
}
+static inline char *to_subarray(struct mdstat_ent *ent, char *container)
+{
+ return &ent->metadata_version[10+strlen(container)+1];
+}
+
#ifdef DEBUG
#define dprintf(fmt, arg...) \
fprintf(stderr, fmt, ##arg)
int force = 0;
int main(int argc, char *argv[]) {
- mddev_ident_t array_list = conf_get_ident(NULL);
+ struct mddev_ident *array_list = conf_get_ident(NULL);
if (!array_list) {
fprintf(stderr, Name ": No arrays found in config file\n");
rv = 1;
container->devnum = devnum;
container->devname = devname;
container->arrays = NULL;
- container->subarray[0] = 0;
container->sock = -1;
if (!container->devname) {
}
close(victim_sock);
}
- if (container->ss->load_super(container, mdfd, devname)) {
+ if (container->ss->load_container(container, mdfd, devname)) {
fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
devname);
exit(3);
ent->resync = 0;
ent->metadata_version = NULL;
ent->raid_disks = 0;
- ent->chunk_size = 0;
ent->devcnt = 0;
ent->members = NULL;
w[l-1] == '%' &&
(eq=strchr(w, '=')) != NULL ) {
ent->percent = atoi(eq+1);
- if (strncmp(w,"resync", 4)==0)
+ if (strncmp(w,"resync", 6)==0)
ent->resync = 1;
+ else if (strncmp(w, "reshape", 7)==0)
+ ent->resync = 2;
+ else
+ ent->resync = 0;
} else if (ent->percent == -1 &&
- strncmp(w, "resync", 4)==0) {
- ent->resync = 1;
+ (w[0] == 'r' || w[0] == 'c')) {
+ if (strncmp(w, "resync", 4)==0)
+ ent->resync = 1;
+ if (strncmp(w, "reshape", 7)==0)
+ ent->resync = 2;
+ if (strncmp(w, "recovery", 8)==0)
+ ent->resync = 0;
+ if (strncmp(w, "check", 5)==0)
+ ent->resync = 3;
+
+ if (l > 8 && strcmp(w+l-8, "=DELAYED"))
+ ent->percent = 0;
+ if (l > 8 && strcmp(w+l-8, "=PENDING"))
+ ent->percent = 0;
} else if (ent->percent == -1 &&
w[0] >= '0' &&
w[0] <= '9' &&
remove_pidfile(container->devname);
exit_now = 1;
signal_manager();
+ close(fd);
exit(0);
}
}
int wait_reply(int fd, int tmo)
{
struct metadata_update msg;
- return receive_message(fd, &msg, tmo);
+ int err = receive_message(fd, &msg, tmo);
+
+ /* mdmon sent extra data, but caller only cares that we got a
+ * successful reply
+ */
+ if (err == 0 && msg.len > 0)
+ free(msg.buf);
+
+ return err;
}
int connect_monitor(char *devname)
return err;
}
-
/* give the monitor a chance to update the metadata */
int ping_monitor(char *devname)
{
return err;
}
+static char *ping_monitor_version(char *devname)
+{
+ int sfd = connect_monitor(devname);
+ struct metadata_update msg;
+ int err = 0;
+
+ if (sfd < 0)
+ return NULL;
+
+ if (ack(sfd, 20) != 0)
+ err = -1;
+
+ if (!err && receive_message(sfd, &msg, 20) != 0)
+ err = -1;
+
+ close(sfd);
+
+ if (err || !msg.len || !msg.buf)
+ return NULL;
+ return msg.buf;
+}
+
+static int unblock_subarray(struct mdinfo *sra, const int unfreeze)
+{
+ char buf[64];
+ int rc = 0;
+
+ if (sra) {
+ sprintf(buf, "external:%s\n", sra->text_version);
+ buf[9] = '/';
+ } else
+ buf[9] = '-';
+
+ if (buf[9] == '-' ||
+ sysfs_set_str(sra, NULL, "metadata_version", buf) ||
+ (unfreeze &&
+ sysfs_attribute_available(sra, NULL, "sync_action") &&
+ sysfs_set_str(sra, NULL, "sync_action", "idle")))
+ rc = -1;
+ return rc;
+}
+
+/**
+ * block_monitor - prevent mdmon spare assignment
+ * @container - container to block
+ * @freeze - flag to additionally freeze sync_action
+ *
+ * This is used by the reshape code to freeze the container, and the
+ * auto-rebuild implementation to atomically move spares.
+ * In both cases we need to stop mdmon from assigning spares to replace
+ * failed devices as we might have other plans for the spare.
+ * For the reshape case we also need to 'freeze' sync_action so that
+ * no recovery happens until we have fully prepared for the reshape.
+ *
+ * We tell mdmon that the array is frozen by marking the 'metadata' name
+ * with a leading '-'. The previously told mdmon "Don't make this array
+ * read/write, leave it readonly". Now it means a more general "Don't
+ * reconfigure this array at all".
+ * As older versions of mdmon (which might run from initrd) don't understand
+ * this, we first check that the running mdmon is new enough.
+ */
+int block_monitor(char *container, const int freeze)
+{
+ int devnum = devname2devnum(container);
+ struct mdstat_ent *ent, *e, *e2;
+ struct mdinfo *sra = NULL;
+ char *version = NULL;
+ char buf[64];
+ int rv = 0;
+
+ if (!mdmon_running(devnum)) {
+ /* if mdmon is not active we assume that any instance that is
+ * later started will match the current mdadm version, if this
+ * assumption is violated we may inadvertantly rebuild an array
+ * that was meant for reshape, or start rebuild on a spare that
+ * was to be moved to another container
+ */
+ /* pass */;
+ } else {
+ int ver;
+
+ version = ping_monitor_version(container);
+ ver = version ? mdadm_version(version) : -1;
+ free(version);
+ if (ver < 3002000) {
+ fprintf(stderr, Name
+ ": mdmon instance for %s cannot be disabled\n",
+ container);
+ return -1;
+ }
+ }
+
+ ent = mdstat_read(0, 0);
+ if (!ent) {
+ fprintf(stderr, Name
+ ": failed to read /proc/mdstat while disabling mdmon\n");
+ return -1;
+ }
+
+ /* freeze container contents */
+ for (e = ent; e; e = e->next) {
+ if (!is_container_member(e, container))
+ continue;
+ sysfs_free(sra);
+ sra = sysfs_read(-1, e->devnum, GET_VERSION);
+ if (!sra) {
+ fprintf(stderr, Name
+ ": failed to read sysfs for subarray%s\n",
+ to_subarray(e, container));
+ break;
+ }
+ /* can't reshape an array that we can't monitor */
+ if (sra->text_version[0] == '-')
+ break;
+
+ if (freeze && sysfs_freeze_array(sra) < 1)
+ break;
+ /* flag this array to not be modified by mdmon (close race with
+ * takeover in reshape case and spare reassignment in the
+ * auto-rebuild case)
+ */
+ sprintf(buf, "external:%s\n", sra->text_version);
+ buf[9] = '-';
+ if (sysfs_set_str(sra, NULL, "metadata_version", buf))
+ break;
+ ping_monitor(container);
+
+ /* check that we did not race with recovery */
+ if ((freeze &&
+ !sysfs_attribute_available(sra, NULL, "sync_action")) ||
+ (freeze &&
+ sysfs_attribute_available(sra, NULL, "sync_action") &&
+ sysfs_get_str(sra, NULL, "sync_action", buf, 20) > 0 &&
+ strcmp(buf, "frozen\n") == 0))
+ /* pass */;
+ else
+ break;
+ }
+
+ if (e) {
+ fprintf(stderr, Name ": failed to freeze subarray%s\n",
+ to_subarray(e, container));
+
+ /* thaw the partially frozen container */
+ for (e2 = ent; e2 && e2 != e; e2 = e2->next) {
+ if (!is_container_member(e2, container))
+ continue;
+ sysfs_free(sra);
+ sra = sysfs_read(-1, e2->devnum, GET_VERSION);
+ if (unblock_subarray(sra, freeze))
+ fprintf(stderr, Name ": Failed to unfreeze %s\n", e2->dev);
+ }
+
+ ping_monitor(container); /* cleared frozen */
+ rv = -1;
+ }
+
+ sysfs_free(sra);
+ free_mdstat(ent);
+ free(container);
+
+ return rv;
+}
+
+void unblock_monitor(char *container, const int unfreeze)
+{
+ struct mdstat_ent *ent, *e;
+ struct mdinfo *sra = NULL;
+ int to_ping = 0;
+
+ ent = mdstat_read(0, 0);
+ if (!ent) {
+ fprintf(stderr, Name
+ ": failed to read /proc/mdstat while unblocking container\n");
+ return;
+ }
+
+ /* unfreeze container contents */
+ for (e = ent; e; e = e->next) {
+ if (!is_container_member(e, container))
+ continue;
+ sysfs_free(sra);
+ sra = sysfs_read(-1, e->devnum, GET_VERSION|GET_LEVEL);
+ if (sra->array.level > 0)
+ to_ping++;
+ if (unblock_subarray(sra, unfreeze))
+ fprintf(stderr, Name ": Failed to unfreeze %s\n", e->dev);
+ }
+ if (to_ping)
+ ping_monitor(container);
+
+ sysfs_free(sra);
+ free_mdstat(ent);
+}
+
+
+
/* give the manager a chance to view the updated container state. This
* would naturally happen due to the manager noticing a change in
* /proc/mdstat; however, pinging encourages this detection to happen
extern int wait_reply(int fd, int tmo);
extern int connect_monitor(char *devname);
extern int ping_monitor(char *devname);
+extern int block_monitor(char *container, const int freeze);
+extern void unblock_monitor(char *container, const int unfreeze);
extern int fping_monitor(int sock);
extern int ping_manager(char *devname);
--- /dev/null
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2010 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neil@brown.name>
+ *
+ */
+
+/* Structure definitions ext for MBR and GPT partition tables
+ */
+
+
+#define MBR_SIGNATURE_MAGIC __cpu_to_le16(0xAA55)
+#define MBR_PARTITIONS 4
+
+struct MBR_part_record {
+ __u8 bootable;
+ __u8 first_head;
+ __u8 first_sector;
+ __u8 first_cyl;
+ __u8 part_type;
+ __u8 last_head;
+ __u8 last_sector;
+ __u8 last_cyl;
+ __u32 first_sect_lba;
+ __u32 blocks_num;
+};
+
+struct MBR {
+ __u8 pad[446];
+ struct MBR_part_record parts[MBR_PARTITIONS];
+ __u16 magic;
+} __attribute__((packed));
+
+
+
+#define GPT_SIGNATURE_MAGIC __cpu_to_le64(0x5452415020494645ULL)
+#define MBR_GPT_PARTITION_TYPE 0xEE
+
+struct GPT_part_entry {
+ unsigned char type_guid[16];
+ unsigned char partition_guid[16];
+ __u64 starting_lba;
+ __u64 ending_lba;
+ unsigned char attr_bits[8];
+ unsigned char name[72];
+} __attribute__((packed));
+
+struct GPT {
+ __u64 magic;
+ __u32 revision;
+ __u32 header_size;
+ __u32 crc;
+ __u32 pad1;
+ __u64 current_lba;
+ __u64 backup_lba;
+ __u64 first_lba;
+ __u64 last_lba;
+ __u8 guid[16];
+ __u64 part_start;
+ __u32 part_cnt;
+ __u32 part_size;
+ __u32 part_crc;
+ __u8 pad2[420];
+} __attribute__((packed));
--- /dev/null
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include "mdadm.h"
+#include <dirent.h>
+#include <fnmatch.h>
+#include <ctype.h>
+#include "dlink.h"
+/*
+ * Policy module for mdadm.
+ * A policy statement about a device lists a set of values for each
+ * of a set of names. Each value can have a metadata type as context.
+ *
+ * names include:
+ * action - the actions that can be taken on hot-plug
+ * domain - the domain(s) that the device is part of
+ *
+ * Policy information is extracted from various sources, but
+ * particularly from a set of policy rules in mdadm.conf
+ */
+
+static void pol_new(struct dev_policy **pol, char *name, const char *val,
+ const char *metadata)
+{
+ struct dev_policy *n = malloc(sizeof(*n));
+ const char *real_metadata = NULL;
+ int i;
+
+ n->name = name;
+ n->value = val;
+
+ /* We need to normalise the metadata name */
+ if (metadata) {
+ for (i = 0; superlist[i] ; i++)
+ if (strcmp(metadata, superlist[i]->name) == 0) {
+ real_metadata = superlist[i]->name;
+ break;
+ }
+ if (!real_metadata) {
+ if (strcmp(metadata, "1") == 0 ||
+ strcmp(metadata, "1.0") == 0 ||
+ strcmp(metadata, "1.1") == 0 ||
+ strcmp(metadata, "1.2") == 0)
+ real_metadata = super1.name;
+ }
+ if (!real_metadata) {
+ static const char *prev = NULL;
+ if (prev != metadata) {
+ fprintf(stderr, Name ": metadata=%s unrecognised - ignoring rule\n",
+ metadata);
+ prev = metadata;
+ }
+ real_metadata = "unknown";
+ }
+ }
+
+ n->metadata = real_metadata;
+ n->next = *pol;
+ *pol = n;
+}
+
+static int pol_lesseq(struct dev_policy *a, struct dev_policy *b)
+{
+ int cmp;
+
+ if (a->name < b->name)
+ return 1;
+ if (a->name > b->name)
+ return 0;
+
+ cmp = strcmp(a->value, b->value);
+ if (cmp < 0)
+ return 1;
+ if (cmp > 0)
+ return 0;
+
+ return (a->metadata <= b->metadata);
+}
+
+static void pol_sort(struct dev_policy **pol)
+{
+ /* sort policy list in *pol by name/metadata/value
+ * using merge sort
+ */
+
+ struct dev_policy *pl[2];
+ pl[0] = *pol;
+ pl[1] = NULL;
+
+ do {
+ struct dev_policy **plp[2], *p[2];
+ int curr = 0;
+ struct dev_policy nul = { NULL, NULL, NULL, NULL };
+ struct dev_policy *prev = &nul;
+ int next = 0;
+
+ /* p[] are the two lists that we are merging.
+ * plp[] are the ends of the two lists we create
+ * from the merge.
+ * 'curr' is which of plp[] that we are currently
+ * adding items to.
+ * 'next' is which if p[] we will take the next
+ * item from.
+ * 'prev' is that last value, which was placed in
+ * plp[curr].
+ */
+ plp[0] = &pl[0];
+ plp[1] = &pl[1];
+ p[0] = pl[0];
+ p[1] = pl[1];
+
+ /* take least of p[0] and p[1]
+ * if it is larger than prev, add to
+ * plp[curr], else swap curr then add
+ */
+ while (p[0] || p[1]) {
+ if (p[next] == NULL ||
+ (p[1-next] != NULL &&
+ !(pol_lesseq(prev, p[1-next])
+ ^pol_lesseq(prev, p[next])
+ ^pol_lesseq(p[next], p[1-next])))
+ )
+ next = 1 - next;
+
+ if (!pol_lesseq(prev, p[next]))
+ curr = 1 - curr;
+
+ *plp[curr] = prev = p[next];
+ plp[curr] = &p[next]->next;
+ p[next] = p[next]->next;
+ }
+ *plp[0] = NULL;
+ *plp[1] = NULL;
+ } while (pl[0] && pl[1]);
+ if (pl[0])
+ *pol = pl[0];
+ else
+ *pol = pl[1];
+}
+
+static void pol_dedup(struct dev_policy *pol)
+{
+ /* This is a sorted list - remove duplicates. */
+ while (pol && pol->next) {
+ if (pol_lesseq(pol->next, pol)) {
+ struct dev_policy *tmp = pol->next;
+ pol->next = tmp->next;
+ free(tmp);
+ } else
+ pol = pol->next;
+ }
+}
+
+/*
+ * pol_find finds the first entry in the policy
+ * list to match name.
+ * If it returns non-NULL there is at least one
+ * value, but how many can only be found by
+ * iterating through the list.
+ */
+struct dev_policy *pol_find(struct dev_policy *pol, char *name)
+{
+ while (pol && pol->name < name)
+ pol = pol->next;
+
+ if (!pol || pol->name != name)
+ return NULL;
+ return pol;
+}
+
+static char *disk_path(struct mdinfo *disk)
+{
+ struct stat stb;
+ int prefix_len;
+ DIR *by_path;
+ char symlink[PATH_MAX] = "/dev/disk/by-path/";
+ struct dirent *ent;
+
+ by_path = opendir(symlink);
+ if (!by_path)
+ return NULL;
+ prefix_len = strlen(symlink);
+
+ while ((ent = readdir(by_path)) != NULL) {
+ if (ent->d_type != DT_LNK)
+ continue;
+ strncpy(symlink + prefix_len,
+ ent->d_name,
+ sizeof(symlink) - prefix_len);
+ if (stat(symlink, &stb) < 0)
+ continue;
+ if ((stb.st_mode & S_IFMT) != S_IFBLK)
+ continue;
+ if (stb.st_rdev != makedev(disk->disk.major, disk->disk.minor))
+ continue;
+ closedir(by_path);
+ return strdup(ent->d_name);
+ }
+ closedir(by_path);
+ return NULL;
+}
+
+char type_part[] = "part";
+char type_disk[] = "disk";
+static char *disk_type(struct mdinfo *disk)
+{
+ char buf[30+20+20];
+ struct stat stb;
+ sprintf(buf, "/sys/dev/block/%d:%d/partition",
+ disk->disk.major, disk->disk.minor);
+ if (stat(buf, &stb) == 0)
+ return type_part;
+ else
+ return type_disk;
+}
+
+static int pol_match(struct rule *rule, char *path, char *type)
+{
+ /* check if this rule matches on path and type */
+ int pathok = 0; /* 0 == no path, 1 == match, -1 == no match yet */
+ int typeok = 0;
+
+ while (rule) {
+ if (rule->name == rule_path) {
+ if (pathok == 0)
+ pathok = -1;
+ if (fnmatch(rule->value, path, 0) == 0)
+ pathok = 1;
+ }
+ if (rule->name == rule_type) {
+ if (typeok == 0)
+ typeok = -1;
+ if (strcmp(rule->value, type) == 0)
+ typeok = 1;
+ }
+ rule = rule->next;
+ }
+ return pathok >= 0 && typeok >= 0;
+}
+
+static void pol_merge(struct dev_policy **pol, struct rule *rule)
+{
+ /* copy any name assignments from rule into pol */
+ struct rule *r;
+ char *metadata = NULL;
+ for (r = rule; r ; r = r->next)
+ if (r->name == pol_metadata)
+ metadata = r->value;
+
+ for (r = rule; r ; r = r->next)
+ if (r->name == pol_act ||
+ r->name == pol_domain)
+ pol_new(pol, r->name, r->value, metadata);
+}
+
+static int path_has_part(char *path, char **part)
+{
+ /* check if path ends with "-partNN" and
+ * if it does, place a pointer to "-pathNN"
+ * in 'part'.
+ */
+ int l = strlen(path);
+ while (l > 1 && isdigit(path[l-1]))
+ l--;
+ if (l < 5 || strncmp(path+l-5, "-part", 5) != 0)
+ return 0;
+ *part = path+l-4;
+ return 1;
+}
+
+static void pol_merge_part(struct dev_policy **pol, struct rule *rule, char *part)
+{
+ /* copy any name assignments from rule into pol, appending
+ * -part to any domain. The string with -part appended is
+ * stored with the rule so it has a lifetime to match
+ * the rule.
+ */
+ struct rule *r;
+ char *metadata = NULL;
+ for (r = rule; r ; r = r->next)
+ if (r->name == pol_metadata)
+ metadata = r->value;
+
+ for (r = rule; r ; r = r->next) {
+ if (r->name == pol_act)
+ pol_new(pol, r->name, r->value, metadata);
+ else if (r->name == pol_domain) {
+ char *dom;
+ int len;
+ if (r->dups == NULL)
+ r->dups = dl_head();
+ len = strlen(r->value);
+ for (dom = dl_next(r->dups); dom != r->dups;
+ dom = dl_next(dom))
+ if (strcmp(dom+len+1, part)== 0)
+ break;
+ if (dom == r->dups) {
+ char *newdom = dl_strndup(
+ r->value, len + 1 + strlen(part));
+ strcat(strcat(newdom, "-"), part);
+ dl_add(r->dups, newdom);
+ dom = newdom;
+ }
+ pol_new(pol, r->name, dom, metadata);
+ }
+ }
+}
+
+static struct pol_rule *config_rules = NULL;
+static struct pol_rule **config_rules_end = NULL;
+static int config_rules_has_path = 0;
+
+/*
+ * most policy comes from a set policy rules that are
+ * read from the config file.
+ * path_policy() gathers policy information for the
+ * disk described in the given a 'path' and a 'type'.
+ */
+struct dev_policy *path_policy(char *path, char *type)
+{
+ struct pol_rule *rules;
+ struct dev_policy *pol = NULL;
+ int i;
+
+ if (!type)
+ return NULL;
+
+ rules = config_rules;
+
+ while (rules) {
+ char *part;
+ if (rules->type == rule_policy)
+ if (pol_match(rules->rule, path, type))
+ pol_merge(&pol, rules->rule);
+ if (rules->type == rule_part && strcmp(type, type_part) == 0)
+ if (path_has_part(path, &part)) {
+ *part = 0;
+ if (pol_match(rules->rule, path, type_disk))
+ pol_merge_part(&pol, rules->rule, part+1);
+ *part = '-';
+ }
+ rules = rules->next;
+ }
+
+ /* Now add any metadata-specific internal knowledge
+ * about this path
+ */
+ for (i=0; superlist[i]; i++)
+ if (superlist[i]->get_disk_controller_domain) {
+ const char *d =
+ superlist[i]->get_disk_controller_domain(path);
+ if (d)
+ pol_new(&pol, pol_domain, d, superlist[i]->name);
+ }
+
+ pol_sort(&pol);
+ pol_dedup(pol);
+ return pol;
+}
+
+void pol_add(struct dev_policy **pol,
+ char *name, char *val,
+ char *metadata)
+{
+ pol_new(pol, name, val, metadata);
+ pol_sort(pol);
+ pol_dedup(*pol);
+}
+
+
+/*
+ * disk_policy() gathers policy information for the
+ * disk described in the given mdinfo (disk.{major,minor}).
+ */
+struct dev_policy *disk_policy(struct mdinfo *disk)
+{
+ char *path = NULL;
+ char *type = disk_type(disk);
+ struct dev_policy *pol = NULL;
+
+ if (!type)
+ return NULL;
+ if (config_rules_has_path)
+ path = disk_path(disk);
+ if (!path)
+ return NULL;
+
+ pol = path_policy(path, type);
+
+ free(path);
+ return pol;
+}
+
+struct dev_policy *devnum_policy(int dev)
+{
+ struct mdinfo disk;
+ disk.disk.major = major(dev);
+ disk.disk.minor = minor(dev);
+ return disk_policy(&disk);
+}
+
+/*
+ * process policy rules read from config file.
+ */
+
+char rule_path[] = "path";
+char rule_type[] = "type";
+
+char rule_policy[] = "policy";
+char rule_part[] = "part-policy";
+
+char pol_metadata[] = "metadata";
+char pol_act[] = "action";
+char pol_domain[] = "domain";
+char pol_auto[] = "auto";
+
+static int try_rule(char *w, char *name, struct rule **rp)
+{
+ struct rule *r;
+ int len = strlen(name);
+ if (strncmp(w, name, len) != 0 ||
+ w[len] != '=')
+ return 0;
+ r = malloc(sizeof(*r));
+ r->next = *rp;
+ r->name = name;
+ r->value = strdup(w+len+1);
+ r->dups = NULL;
+ *rp = r;
+ return 1;
+}
+
+void policyline(char *line, char *type)
+{
+ struct pol_rule *pr;
+ char *w;
+
+ if (config_rules_end == NULL)
+ config_rules_end = &config_rules;
+
+ pr = malloc(sizeof(*pr));
+ pr->type = type;
+ pr->rule = NULL;
+ for (w = dl_next(line); w != line ; w = dl_next(w)) {
+ if (try_rule(w, rule_path, &pr->rule))
+ config_rules_has_path = 1;
+ else if (! try_rule(w, rule_type, &pr->rule) &&
+ ! try_rule(w, pol_metadata, &pr->rule) &&
+ ! try_rule(w, pol_act, &pr->rule) &&
+ ! try_rule(w, pol_domain, &pr->rule) &&
+ ! try_rule(w, pol_auto, &pr->rule))
+ fprintf(stderr, Name ": policy rule %s unrecognised and ignored\n",
+ w);
+ }
+ pr->next = config_rules;
+ config_rules = pr;
+}
+
+void policy_add(char *type, ...)
+{
+ va_list ap;
+ struct pol_rule *pr;
+ char *name, *val;
+
+ pr = malloc(sizeof(*pr));
+ pr->type = type;
+ pr->rule = NULL;
+
+ va_start(ap, type);
+ while ((name = va_arg(ap, char*)) != NULL) {
+ struct rule *r;
+
+ val = va_arg(ap, char*);
+ r = malloc(sizeof(*r));
+ r->next = pr->rule;
+ r->name = name;
+ r->value = strdup(val);
+ r->dups = NULL;
+ pr->rule = r;
+ }
+ pr->next = config_rules;
+ config_rules = pr;
+}
+
+void policy_free(void)
+{
+ while (config_rules) {
+ struct pol_rule *pr = config_rules;
+ struct rule *r;
+
+ config_rules = config_rules->next;
+
+ for (r = pr->rule; r; ) {
+ struct rule *next = r->next;
+ free(r->value);
+ if (r->dups)
+ free_line(r->dups);
+ free(r);
+ r = next;
+ }
+ free(pr);
+ }
+ config_rules_end = NULL;
+ config_rules_has_path = 0;
+}
+
+void dev_policy_free(struct dev_policy *p)
+{
+ struct dev_policy *t;
+ while (p) {
+ t = p;
+ p = p->next;
+ free(t);
+ }
+}
+
+static enum policy_action map_act(const char *act)
+{
+ if (strcmp(act, "include") == 0)
+ return act_include;
+ if (strcmp(act, "re-add") == 0)
+ return act_re_add;
+ if (strcmp(act, "spare") == 0)
+ return act_spare;
+ if (strcmp(act, "spare-same-slot") == 0)
+ return act_spare_same_slot;
+ if (strcmp(act, "force-spare") == 0)
+ return act_force_spare;
+ return act_err;
+}
+
+static enum policy_action policy_action(struct dev_policy *plist, const char *metadata)
+{
+ enum policy_action rv = act_default;
+ struct dev_policy *p;
+
+ plist = pol_find(plist, pol_act);
+ pol_for_each(p, plist, metadata) {
+ enum policy_action a = map_act(p->value);
+ if (a > rv)
+ rv = a;
+ }
+ return rv;
+}
+
+int policy_action_allows(struct dev_policy *plist, const char *metadata, enum policy_action want)
+{
+ enum policy_action act = policy_action(plist, metadata);
+
+ if (act == act_err)
+ return 0;
+ return (act >= want);
+}
+
+int disk_action_allows(struct mdinfo *disk, const char *metadata, enum policy_action want)
+{
+ struct dev_policy *pol = disk_policy(disk);
+ int rv = policy_action_allows(pol, metadata, want);
+
+ dev_policy_free(pol);
+ return rv;
+}
+
+
+/* Domain policy:
+ * Any device can have a list of domains asserted by different policy
+ * statements.
+ * An array also has a list of domains comprising all the domains of
+ * all the devices in an array.
+ * Where an array has a spare-group, that becomes an addition domain for
+ * every device in the array and thus for the array.
+ *
+ * We keep the list of domains in a sorted linked list
+ * As dev policies are already sorted, this is fairly easy to manage.
+ */
+
+static struct domainlist **domain_merge_one(struct domainlist **domp,
+ const char *domain)
+{
+ /* merge a domain name into a sorted list and return the
+ * location of the insertion or match
+ */
+ struct domainlist *dom = *domp;
+
+ while (dom && strcmp(dom->dom, domain) < 0) {
+ domp = &dom->next;
+ dom = *domp;
+ }
+ if (dom == NULL || strcmp(dom->dom, domain) != 0) {
+ dom = malloc(sizeof(*dom));
+ dom->next = *domp;
+ dom->dom = domain;
+ *domp = dom;
+ }
+ return domp;
+}
+
+#if (DEBUG)
+void dump_policy(struct dev_policy *policy)
+{
+ while (policy) {
+ dprintf("policy: %p name: %s value: %s metadata: %s\n",
+ policy,
+ policy->name,
+ policy->value,
+ policy->metadata);
+ policy = policy->next;
+ }
+}
+#endif
+
+void domain_merge(struct domainlist **domp, struct dev_policy *pollist,
+ const char *metadata)
+{
+ /* Add to 'domp' all the domains in pol that apply to 'metadata'
+ * which are not already in domp
+ */
+ struct dev_policy *pol;
+ pollist = pol_find(pollist, pol_domain);
+ pol_for_each(pol, pollist, metadata)
+ domain_merge_one(domp, pol->value);
+}
+
+int domain_test(struct domainlist *dom, struct dev_policy *pol,
+ const char *metadata)
+{
+ /* Check that all domains in pol (for metadata) are also in
+ * dom. Both lists are sorted.
+ * If pol has no domains, we don't really know about this device
+ * so we reject the match.
+ */
+ int found_any = 0;
+ struct dev_policy *p;
+
+ pol = pol_find(pol, pol_domain);
+ pol_for_each(p, pol, metadata) {
+ found_any = 1;
+ while (dom && strcmp(dom->dom, p->value) < 0)
+ dom = dom->next;
+ if (!dom || strcmp(dom->dom, p->value) != 0)
+ return 0;
+ }
+ return found_any;
+}
+
+void domainlist_add_dev(struct domainlist **dom, int devnum, const char *metadata)
+{
+ struct dev_policy *pol = devnum_policy(devnum);
+ domain_merge(dom, pol, metadata);
+ dev_policy_free(pol);
+}
+
+struct domainlist *domain_from_array(struct mdinfo *mdi, const char *metadata)
+{
+ struct domainlist *domlist = NULL;
+
+ for (mdi = mdi->devs ; mdi ; mdi = mdi->next)
+ domainlist_add_dev(&domlist, makedev(mdi->disk.major,
+ mdi->disk.minor),
+ metadata);
+
+ return domlist;
+}
+
+void domain_add(struct domainlist **domp, char *domain)
+{
+ domain_merge_one(domp, domain);
+}
+
+
+void domain_free(struct domainlist *dl)
+{
+ while (dl) {
+ struct domainlist *head = dl;
+ dl = dl->next;
+ free(head);
+ }
+}
+
+/*
+ * same-path policy.
+ * Some policy decisions are guided by knowledge of which
+ * array previously owned the device at a given physical location (path).
+ * When removing a device from an array we might record the array against
+ * the path, and when finding a new device, we might look for which
+ * array previously used that path.
+ *
+ * The 'array' is described by a map_ent, and the path by a the disk in an
+ * mdinfo, or a string.
+ */
+
+void policy_save_path(char *id_path, struct map_ent *array)
+{
+ char path[PATH_MAX];
+ FILE *f = NULL;
+
+ if (mkdir(FAILED_SLOTS_DIR, S_IRWXU) < 0 && errno != EEXIST) {
+ fprintf(stderr, Name ": can't create file to save path "
+ "to old disk: %s\n", strerror(errno));
+ return;
+ }
+
+ snprintf(path, PATH_MAX, FAILED_SLOTS_DIR "/%s", id_path);
+ f = fopen(path, "w");
+ if (!f) {
+ fprintf(stderr, Name ": can't create file to"
+ " save path to old disk: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ if (fprintf(f, "%s %08x:%08x:%08x:%08x\n",
+ array->metadata,
+ array->uuid[0], array->uuid[1],
+ array->uuid[2], array->uuid[3]) <= 0)
+ fprintf(stderr, Name ": Failed to write to "
+ "<id_path> cookie\n");
+
+ fclose(f);
+}
+
+int policy_check_path(struct mdinfo *disk, struct map_ent *array)
+{
+ char path[PATH_MAX];
+ FILE *f = NULL;
+ char *id_path = disk_path(disk);
+ int rv;
+
+ if (!id_path)
+ return 0;
+
+ snprintf(path, PATH_MAX, FAILED_SLOTS_DIR "/%s", id_path);
+ f = fopen(path, "r");
+ if (!f)
+ return 0;
+
+ rv = fscanf(f, " %s %x:%x:%x:%x\n",
+ array->metadata,
+ array->uuid,
+ array->uuid+1,
+ array->uuid+2,
+ array->uuid+3);
+ fclose(f);
+ return rv == 5;
+}
*/
int pd;
+ /* layout is not relevant for raid0 and raid4 */
+ if ((level == 0) ||
+ (level == 4))
+ layout = 0;
+
switch(level*100 + layout) {
case 000:
case 400:
#ifndef MDASSEMBLE
static int load_super_ddf_all(struct supertype *st, int fd,
- void **sbp, char *devname, int keep_fd);
+ void **sbp, char *devname);
#endif
static void free_super_ddf(struct supertype *st);
#ifndef MDASSEMBLE
/* if 'fd' is a container, load metadata from all the devices */
- if (load_super_ddf_all(st, fd, &st->sb, devname, 1) == 0)
+ if (load_super_ddf_all(st, fd, &st->sb, devname) == 0)
return 0;
#endif
- if (st->subarray[0])
- return 1; /* FIXME Is this correct */
if (get_dev_size(fd, devname, &dsize) == 0)
return 1;
return rv;
}
- if (st->subarray[0]) {
- unsigned long val;
- struct vcl *v;
- char *ep;
-
- val = strtoul(st->subarray, &ep, 10);
- if (*ep != '\0') {
- free(super);
- return 1;
- }
-
- for (v = super->conflist; v; v = v->next)
- if (v->vcnum == val)
- super->currentconf = v;
- if (!super->currentconf) {
- free(super);
- return 1;
- }
- }
-
/* Should possibly check the sections .... */
st->sb = super;
st->minor_version = 0;
st->max_devs = 512;
}
- st->loaded_container = 0;
return 0;
}
st = malloc(sizeof(*st));
memset(st, 0, sizeof(*st));
+ st->container_dev = NoMdDev;
st->ss = &super_ddf;
st->max_devs = 512;
st->minor_version = 0;
examine_pds(sb);
}
-static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info);
+static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
*/
struct mdinfo info;
char nbuf[64];
- getinfo_super_ddf(st, &info);
+ getinfo_super_ddf(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
struct mdinfo info;
unsigned int i;
char nbuf[64];
- getinfo_super_ddf(st, &info);
+ getinfo_super_ddf(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
{
struct mdinfo info;
char nbuf[64];
- getinfo_super_ddf(st, &info);
+ getinfo_super_ddf(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf("MD_METADATA=ddf\n");
printf("MD_LEVEL=container\n");
// struct ddf_super *ddf = st->sb;
struct mdinfo info;
char nbuf[64];
- getinfo_super_ddf(st, &info);
+ getinfo_super_ddf(st, &info, NULL);
fname_from_uuid(st, &info, nbuf,':');
printf(" UUID=%s", nbuf + 5);
}
memcpy(uuid, buf, 4*4);
}
-static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info);
+static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
-static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info)
+static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
{
struct ddf_super *ddf = st->sb;
+ int map_disks = info->array.raid_disks;
if (ddf->currentconf) {
- getinfo_super_ddf_bvd(st, info);
+ getinfo_super_ddf_bvd(st, info, map);
return;
}
uuid_from_super_ddf(st, info->uuid);
+ if (map) {
+ int i;
+ for (i = 0 ; i < map_disks; i++) {
+ if (i < info->array.raid_disks &&
+ (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
+ !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
+ map[i] = 1;
+ else
+ map[i] = 0;
+ }
+ }
}
static int rlq_to_layout(int rlq, int prl, int raiddisks);
-static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
+static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
{
struct ddf_super *ddf = st->sb;
struct vcl *vc = ddf->currentconf;
int cd = ddf->currentdev;
int j;
struct dl *dl;
+ int map_disks = info->array.raid_disks;
/* FIXME this returns BVD info - what if we want SVD ?? */
uuid_from_super_ddf(st, info->uuid);
- info->container_member = atoi(st->subarray);
info->array.major_version = -1;
info->array.minor_version = -2;
- sprintf(info->text_version, "/%s/%s",
+ sprintf(info->text_version, "/%s/%d",
devnum2devname(st->container_dev),
- st->subarray);
+ info->container_member);
info->safe_mode_delay = 200;
memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
for(j=0; j<16; j++)
if (info->name[j] == ' ')
info->name[j] = 0;
+
+ if (map)
+ for (j = 0; j < map_disks; j++) {
+ map[j] = 0;
+ if (j < info->array.raid_disks) {
+ int i = find_phys(ddf, vc->conf.phys_refnum[j]);
+ if (i >= 0 &&
+ (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
+ !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
+ map[i] = 1;
+ }
+ }
}
if (strcmp(update, "grow") == 0) {
/* FIXME */
- }
- if (strcmp(update, "resync") == 0) {
+ } else if (strcmp(update, "resync") == 0) {
// info->resync_checkpoint = 0;
- }
- /* We ignore UUID updates as they make even less sense
- * with DDF
- */
- if (strcmp(update, "homehost") == 0) {
+ } else if (strcmp(update, "homehost") == 0) {
/* homehost is stored in controller->vendor_data,
* or it is when we are the vendor
*/
// if (info->vendor_is_local)
// strcpy(ddf->controller.vendor_data, homehost);
- }
- if (strcmp(update, "name") == 0) {
+ rv = -1;
+ } else if (strcmp(update, "name") == 0) {
/* name is stored in virtual_entry->name */
// memset(ve->name, ' ', 16);
// strncpy(ve->name, info->name, 16);
- }
- if (strcmp(update, "_reshape_progress") == 0) {
+ rv = -1;
+ } else if (strcmp(update, "_reshape_progress") == 0) {
/* We don't support reshape yet */
- }
+ } else if (strcmp(update, "assemble") == 0 ) {
+ /* Do nothing, just succeed */
+ rv = 0;
+ } else
+ rv = -1;
// update_all_csum(ddf);
return 0;
}
+ if (name)
+ for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
+ if (!all_ff(ddf->virt->entries[venum].guid)) {
+ char *n = ddf->virt->entries[venum].name;
+
+ if (strncmp(name, n, 16) == 0) {
+ fprintf(stderr, Name ": This ddf already"
+ " has an array called %s\n",
+ name);
+ return 0;
+ }
+ }
+
for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
if (all_ff(ddf->virt->entries[venum].guid))
break;
}
vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
vcl->vcnum = venum;
- sprintf(st->subarray, "%d", venum);
vcl->block_sizes = NULL; /* FIXME not for CONCAT */
vc = &vcl->conf;
* and try to create a bvd
*/
struct ddf_super *ddf;
- if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL, 1) == 0) {
+ if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
st->sb = ddf;
st->container_dev = fd2devnum(cfd);
close(cfd);
}
static int load_super_ddf_all(struct supertype *st, int fd,
- void **sbp, char *devname, int keep_fd)
+ void **sbp, char *devname)
{
struct mdinfo *sra;
struct ddf_super *super;
int rv;
sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
- dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
+ dfd = dev_open(nm, O_RDWR);
if (dfd < 0)
return 2;
rv = load_ddf_headers(dfd, super, NULL);
if (rv == 0)
- rv = load_ddf_local(dfd, super, NULL, keep_fd);
- if (!keep_fd) close(dfd);
+ rv = load_ddf_local(dfd, super, NULL, 1);
if (rv)
return 1;
}
- if (st->subarray[0]) {
- unsigned long val;
- struct vcl *v;
- char *ep;
-
- val = strtoul(st->subarray, &ep, 10);
- if (*ep != '\0') {
- free(super);
- return 1;
- }
-
- for (v = super->conflist; v; v = v->next)
- if (v->vcnum == val)
- super->currentconf = v;
- if (!super->currentconf) {
- free(super);
- return 1;
- }
- }
*sbp = super;
if (st->ss == NULL) {
st->ss = &super_ddf;
st->minor_version = 0;
st->max_devs = 512;
- st->container_dev = fd2devnum(fd);
}
- st->loaded_container = 1;
+ st->container_dev = fd2devnum(fd);
return 0;
}
+
+static int load_container_ddf(struct supertype *st, int fd,
+ char *devname)
+{
+ return load_super_ddf_all(st, fd, &st->sb, devname);
+}
+
#endif /* MDASSEMBLE */
-static struct mdinfo *container_content_ddf(struct supertype *st)
+static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
{
/* Given a container loaded by load_super_ddf_all,
* extract information about all the arrays into
unsigned int i;
unsigned int j;
struct mdinfo *this;
+ char *ep;
+
+ if (subarray &&
+ (strtoul(subarray, &ep, 10) != vc->vcnum ||
+ *ep != '\0'))
+ continue;
+
this = malloc(sizeof(*this));
memset(this, 0, sizeof(*this));
this->next = rest;
}
}
+static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
+{
+ if (level && *level == UnSet)
+ *level = LEVEL_CONTAINER;
+
+ if (level && layout && *layout == UnSet)
+ *layout = ddf_level_to_layout(*level);
+}
+
struct superswitch super_ddf = {
#ifndef MDASSEMBLE
.examine_super = examine_super_ddf,
.validate_geometry = validate_geometry_ddf,
.write_init_super = write_init_super_ddf,
.add_to_super = add_to_super_ddf,
+ .load_container = load_container_ddf,
#endif
.match_home = match_home_ddf,
.uuid_from_super= uuid_from_super_ddf,
.free_super = free_super_ddf,
.match_metadata_desc = match_metadata_desc_ddf,
.container_content = container_content_ddf,
- .default_layout = ddf_level_to_layout,
+ .default_geometry = default_geometry_ddf,
.external = 1,
--- /dev/null
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2010 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neil@brown.name>
+ *
+ */
+
+/*
+ * 'gpt' is a pseudo metadata type for devices which have a
+ * GPT partition table.
+ *
+ * Obviously arrays cannot be created or assembled for this type.
+ * It is used to allow a new bare device to have an partition table
+ * added so the member partitions can then be included in other
+ * arrays as relevant.
+ *
+ * The meaning operations are:
+ * examine_super, but not brief_examine_super or export_examine
+ * load_super
+ * store_super
+ */
+
+#include "mdadm.h"
+#include "part.h"
+
+static void free_gpt(struct supertype *st)
+{
+ free(st->sb);
+ st->sb = NULL;
+}
+
+#ifndef MDASSEMBLE
+static void examine_gpt(struct supertype *st, char *homehost)
+{
+ struct GPT *gpt = st->sb + 512;
+ struct GPT_part_entry *gpe = st->sb + 1024;
+ unsigned int i;
+
+ printf(" GPT Magic : %llx\n", (unsigned long long)__le64_to_cpu(gpt->magic));
+ printf(" GPT Revision : %ld\n", (long)__le32_to_cpu(gpt->revision));
+ for (i = 0; i < __le32_to_cpu(gpt->part_cnt); i++) {
+ printf(" Partition[%02d] : %12llu sectors at %12llu\n",
+ i,
+ (unsigned long long)__le64_to_cpu(gpe[i].starting_lba),
+ (unsigned long long)__le64_to_cpu(gpe[i].ending_lba)-
+ (unsigned long long)__le64_to_cpu(gpe[i].starting_lba)
+ +1
+ );
+ }
+}
+#endif /* MDASSEMBLE */
+
+static int load_gpt(struct supertype *st, int fd, char *devname)
+{
+ struct MBR *super;
+ struct GPT *gpt_head;
+ int to_read;
+
+ free_gpt(st);
+
+ if (posix_memalign((void**)&super, 512, 32*512) != 0) {
+ fprintf(stderr, Name ": %s could not allocate superblock\n",
+ __func__);
+ return 1;
+ }
+
+ ioctl(fd, BLKFLSBUF, 0); /* make sure we read current data */
+
+ lseek(fd, 0, 0);
+ if (read(fd, super, sizeof(*super)) != sizeof(*super)) {
+ no_read:
+ if (devname)
+ fprintf(stderr, Name ": Cannot read partition table on %s\n",
+ devname);
+ free(super);
+ return 1;
+ }
+
+ if (super->magic != MBR_SIGNATURE_MAGIC ||
+ super->parts[0].part_type != MBR_GPT_PARTITION_TYPE) {
+ not_found:
+ if (devname)
+ fprintf(stderr, Name ": No partition table found on %s\n",
+ devname);
+ free(super);
+ return 1;
+ }
+ /* Seem to have GPT, load the header */
+ gpt_head = (struct GPT*)(super+1);
+ if (read(fd, gpt_head, sizeof(*gpt_head)) != sizeof(*gpt_head))
+ goto no_read;
+ if (gpt_head->magic != GPT_SIGNATURE_MAGIC)
+ goto not_found;
+ if (__le32_to_cpu(gpt_head->part_cnt) >= 128)
+ goto not_found;
+
+ to_read = __le32_to_cpu(gpt_head->part_cnt) * sizeof(struct GPT_part_entry);
+ to_read = ((to_read+511)/512) * 512;
+ if (read(fd, gpt_head+1, to_read) != to_read)
+ goto no_read;
+
+ st->sb = super;
+
+ if (st->ss == NULL) {
+ st->ss = &gpt;
+ st->minor_version = 0;
+ st->max_devs = 1;
+ st->info = NULL;
+ }
+ return 0;
+}
+
+static int store_gpt(struct supertype *st, int fd)
+{
+ /* FIXME should I save the boot loader */
+ /* need to write two copies! */
+ /* FIXME allow for blocks != 512 bytes
+ *etc
+ */
+ struct MBR *super = st->sb;
+ struct GPT *gpt;
+ int to_write;
+
+ gpt = (struct GPT*)(super+1);
+
+ to_write = __le32_to_cpu(gpt->part_cnt) * sizeof(struct GPT_part_entry);
+ to_write = ((to_write+511)/512) * 512;
+
+ lseek(fd, 0, 0);
+ if (write(fd, st->sb, to_write) != to_write)
+ return 4;
+
+ fsync(fd);
+ ioctl(fd, BLKRRPART, 0);
+ return 0;
+}
+
+static void getinfo_gpt(struct supertype *st, struct mdinfo *info, char *map)
+{
+ struct GPT *gpt = st->sb + 512;
+ struct GPT_part_entry *gpe = st->sb + 1024;
+ unsigned int i;
+
+ memset(&info->array, 0, sizeof(info->array));
+ memset(&info->disk, 0, sizeof(info->disk));
+ strcpy(info->text_version, "gpt");
+ strcpy(info->name, "gpt");
+ info->component_size = 0;
+
+ for (i = 0; i < __le32_to_cpu(gpt->part_cnt); i++) {
+ unsigned long long last =
+ (unsigned long long)__le64_to_cpu(gpe[i].ending_lba);
+ if (last > info->component_size)
+ info->component_size = last;
+ }
+}
+
+static struct supertype *match_metadata_desc(char *arg)
+{
+ struct supertype *st = malloc(sizeof(*st));
+
+ if (!st)
+ return st;
+ if (strcmp(arg, "gpt") != 0)
+ return NULL;
+
+ st->ss = &gpt;
+ st->info = NULL;
+ st->minor_version = 0;
+ st->max_devs = 1;
+ st->sb = NULL;
+ return st;
+}
+
+#ifndef MDASSEMBLE
+static int validate_geometry(struct supertype *st, int level,
+ int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ char *subdev, unsigned long long *freesize,
+ int verbose)
+{
+ fprintf(stderr, Name ": gpt metadata cannot be used this way\n");
+ return 0;
+}
+#endif
+
+struct superswitch gpt = {
+#ifndef MDASSEMBLE
+ .examine_super = examine_gpt,
+#endif
+ .validate_geometry = validate_geometry,
+ .match_metadata_desc = match_metadata_desc,
+ .load_super = load_gpt,
+ .store_super = store_gpt,
+ .getinfo_super = getinfo_gpt,
+ .free_super = free_gpt,
+ .name = "gpt",
+};
if (!st)
return NULL;
memset(st, 0, sizeof(*st));
+ st->container_dev = NoMdDev;
st->ss = &super_imsm;
st->max_devs = IMSM_MAX_DEVICES;
st->minor_version = 0;
return (disk->status & FAILED_DISK) == FAILED_DISK;
}
+/* Return minimum size of a spare that can be used in this array*/
+static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
+{
+ struct intel_super *super = st->sb;
+ struct dl *dl;
+ struct extent *e;
+ int i;
+ unsigned long long rv = 0;
+
+ if (!super)
+ return rv;
+ /* find first active disk in array */
+ dl = super->disks;
+ while (dl && (is_failed(&dl->disk) || dl->index == -1))
+ dl = dl->next;
+ if (!dl)
+ return rv;
+ /* find last lba used by subarrays */
+ e = get_extents(super, dl);
+ if (!e)
+ return rv;
+ for (i = 0; e[i].size; i++)
+ continue;
+ if (i > 0)
+ rv = e[i-1].start + e[i-1].size;
+ free(e);
+ /* add the amount of space needed for metadata */
+ rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
+ return rv * 512;
+}
+
#ifndef MDASSEMBLE
static __u64 blocks_per_migr_unit(struct imsm_dev *dev);
human_size(sz * 512));
}
-static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info);
+static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
static void examine_super_imsm(struct supertype *st, char *homehost)
{
printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
- getinfo_super_imsm(st, &info);
+ getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf(" UUID : %s\n", nbuf + 5);
sum = __le32_to_cpu(mpb->check_sum);
struct imsm_dev *dev = __get_imsm_dev(mpb, i);
super->current_vol = i;
- getinfo_super_imsm(st, &info);
+ getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
print_imsm_dev(dev, nbuf + 5, super->disks->index);
}
return;
}
- getinfo_super_imsm(st, &info);
+ getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
}
if (!super->anchor->num_raid_devs)
return;
- getinfo_super_imsm(st, &info);
+ getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
for (i = 0; i < super->anchor->num_raid_devs; i++) {
struct imsm_dev *dev = get_imsm_dev(super, i);
super->current_vol = i;
- getinfo_super_imsm(st, &info);
+ getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf1, ':');
printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
dev->volume, nbuf + 5, i, nbuf1 + 5);
struct mdinfo info;
char nbuf[64];
- getinfo_super_imsm(st, &info);
+ getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf("MD_METADATA=imsm\n");
printf("MD_LEVEL=container\n");
struct mdinfo info;
char nbuf[64];
- getinfo_super_imsm(st, &info);
+ getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf("\n UUID : %s\n", nbuf + 5);
}
{
struct mdinfo info;
char nbuf[64];
- getinfo_super_imsm(st, &info);
+ getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf(" UUID=%s", nbuf + 5);
}
return UnSet;
}
-static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
+static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
{
struct intel_super *super = st->sb;
struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
struct imsm_map *map = get_imsm_map(dev, 0);
struct dl *dl;
char *devname;
+ int map_disks = info->array.raid_disks;
for (dl = super->disks; dl; dl = dl->next)
if (dl->raiddisk == info->disk.raid_disk)
free(devname);
info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
uuid_from_super_imsm(st, info->uuid);
-}
+
+ if (dmap) {
+ int i, j;
+ for (i=0; i<map_disks; i++) {
+ dmap[i] = 0;
+ if (i < info->array.raid_disks) {
+ struct imsm_disk *dsk;
+ j = get_imsm_disk_idx(dev, i);
+ dsk = get_imsm_disk(super, j);
+ if (dsk && (dsk->status & CONFIGURED_DISK))
+ dmap[i] = 1;
+ }
+ }
+ }
+}
/* check the config file to see if we can return a real uuid for this spare */
static void fixup_container_spare_uuid(struct mdinfo *inf)
{
- struct mddev_ident_s *array_list;
+ struct mddev_ident *array_list;
if (inf->array.level != LEVEL_CONTAINER ||
memcmp(inf->uuid, uuid_match_any, sizeof(int[4])) != 0)
return NULL;
}
-static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
+static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
{
struct intel_super *super = st->sb;
struct imsm_disk *disk;
+ int map_disks = info->array.raid_disks;
+ int max_enough = -1;
+ int i;
+ struct imsm_super *mpb;
if (super->current_vol >= 0) {
- getinfo_super_imsm_volume(st, info);
+ getinfo_super_imsm_volume(st, info, map);
return;
}
info->recovery_start = MaxSector;
/* do we have the all the insync disks that we expect? */
- if (st->loaded_container) {
- struct imsm_super *mpb = super->anchor;
- int max_enough = -1, i;
+ mpb = super->anchor;
- for (i = 0; i < mpb->num_raid_devs; i++) {
- struct imsm_dev *dev = get_imsm_dev(super, i);
- int failed, enough, j, missing = 0;
- struct imsm_map *map;
- __u8 state;
+ for (i = 0; i < mpb->num_raid_devs; i++) {
+ struct imsm_dev *dev = get_imsm_dev(super, i);
+ int failed, enough, j, missing = 0;
+ struct imsm_map *map;
+ __u8 state;
- failed = imsm_count_failed(super, dev);
- state = imsm_check_degraded(super, dev, failed);
- map = get_imsm_map(dev, dev->vol.migr_state);
+ failed = imsm_count_failed(super, dev);
+ state = imsm_check_degraded(super, dev, failed);
+ map = get_imsm_map(dev, dev->vol.migr_state);
- /* any newly missing disks?
- * (catches single-degraded vs double-degraded)
- */
- for (j = 0; j < map->num_members; j++) {
- __u32 ord = get_imsm_ord_tbl_ent(dev, i);
- __u32 idx = ord_to_idx(ord);
+ /* any newly missing disks?
+ * (catches single-degraded vs double-degraded)
+ */
+ for (j = 0; j < map->num_members; j++) {
+ __u32 ord = get_imsm_ord_tbl_ent(dev, i);
+ __u32 idx = ord_to_idx(ord);
- if (!(ord & IMSM_ORD_REBUILD) &&
- get_imsm_missing(super, idx)) {
- missing = 1;
- break;
- }
+ if (!(ord & IMSM_ORD_REBUILD) &&
+ get_imsm_missing(super, idx)) {
+ missing = 1;
+ break;
}
+ }
- if (state == IMSM_T_STATE_FAILED)
- enough = -1;
- else if (state == IMSM_T_STATE_DEGRADED &&
- (state != map->map_state || missing))
- enough = 0;
- else /* we're normal, or already degraded */
- enough = 1;
+ if (state == IMSM_T_STATE_FAILED)
+ enough = -1;
+ else if (state == IMSM_T_STATE_DEGRADED &&
+ (state != map->map_state || missing))
+ enough = 0;
+ else /* we're normal, or already degraded */
+ enough = 1;
- /* in the missing/failed disk case check to see
- * if at least one array is runnable
- */
- max_enough = max(max_enough, enough);
- }
- dprintf("%s: enough: %d\n", __func__, max_enough);
- info->container_enough = max_enough;
- } else
- info->container_enough = -1;
+ /* in the missing/failed disk case check to see
+ * if at least one array is runnable
+ */
+ max_enough = max(max_enough, enough);
+ }
+ dprintf("%s: enough: %d\n", __func__, max_enough);
+ info->container_enough = max_enough;
if (super->disks) {
__u32 reserved = imsm_reserved_sectors(super, super->disks);
memcpy(info->uuid, uuid_match_any, sizeof(int[4]));
fixup_container_spare_uuid(info);
}
+
+ /* I don't know how to compute 'map' on imsm, so use safe default */
+ if (map) {
+ int i;
+ for (i = 0; i < map_disks; i++)
+ map[i] = 1;
+ }
+
+}
+
+/* allocates memory and fills disk in mdinfo structure
+ * for each disk in array */
+struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
+{
+ struct mdinfo *mddev = NULL;
+ struct intel_super *super = st->sb;
+ struct imsm_disk *disk;
+ int count = 0;
+ struct dl *dl;
+ if (!super || !super->disks)
+ return NULL;
+ dl = super->disks;
+ mddev = malloc(sizeof(*mddev));
+ if (!mddev) {
+ fprintf(stderr, Name ": Failed to allocate memory.\n");
+ return NULL;
+ }
+ memset(mddev, 0, sizeof(*mddev));
+ while (dl) {
+ struct mdinfo *tmp;
+ disk = &dl->disk;
+ tmp = malloc(sizeof(*tmp));
+ if (!tmp) {
+ fprintf(stderr, Name ": Failed to allocate memory.\n");
+ if (mddev)
+ sysfs_free(mddev);
+ return NULL;
+ }
+ memset(tmp, 0, sizeof(*tmp));
+ if (mddev->devs)
+ tmp->next = mddev->devs;
+ mddev->devs = tmp;
+ tmp->disk.number = count++;
+ tmp->disk.major = dl->major;
+ tmp->disk.minor = dl->minor;
+ tmp->disk.state = is_configured(disk) ?
+ (1 << MD_DISK_ACTIVE) : 0;
+ tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
+ tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
+ tmp->disk.raid_disk = -1;
+ dl = dl->next;
+ }
+ return mddev;
}
static int update_super_imsm(struct supertype *st, struct mdinfo *info,
mpb = super->anchor;
if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
- fprintf(stderr,
- Name ": '--uuid' not supported for imsm metadata\n");
+ rv = -1;
else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
mpb->orig_family_num = *((__u32 *) info->update_private);
rv = 0;
} else if (strcmp(update, "assemble") == 0)
rv = 0;
else
- fprintf(stderr,
- Name ": '--update=%s' not supported for imsm metadata\n",
- update);
+ rv = -1;
/* successful update? recompute checksum */
if (rv == 0)
/* duplicate and then set the target end state in map[0] */
memcpy(dest, src, sizeof_imsm_map(src));
- if (migr_type == MIGR_REBUILD) {
+ if ((migr_type == MIGR_REBUILD) ||
+ (migr_type == MIGR_GEN_MIGR)) {
__u32 ord;
int i;
{
struct imsm_map *map = get_imsm_map(dev, 0);
struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
- int i;
+ int i, j;
/* merge any IMSM_ORD_REBUILD bits that were not successfully
* completed in the last migration.
*
- * FIXME add support for online capacity expansion and
- * raid-level-migration
+ * FIXME add support for raid-level-migration
*/
for (i = 0; i < prev->num_members; i++)
- map->disk_ord_tbl[i] |= prev->disk_ord_tbl[i];
+ for (j = 0; j < map->num_members; j++)
+ /* during online capacity expansion
+ * disks position can be changed if takeover is used
+ */
+ if (ord_to_idx(map->disk_ord_tbl[j]) ==
+ ord_to_idx(prev->disk_ord_tbl[i])) {
+ map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i];
+ break;
+ }
dev->vol.migr_state = 0;
+ dev->vol.migr_type = 0;
dev->vol.curr_migr_unit = 0;
map->map_state = map_state;
}
__u32 check_sum;
get_dev_size(fd, NULL, &dsize);
+ if (dsize < 1024) {
+ if (devname)
+ fprintf(stderr,
+ Name ": %s: device to small for imsm\n",
+ devname);
+ return 1;
+ }
if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
if (devname)
}
static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
- char *devname, int keep_fd)
+ char *devname)
{
struct mdinfo *sra;
struct intel_super *super_list = NULL;
err = 2;
sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
- dfd = dev_open(nm, keep_fd ? O_RDWR : O_RDONLY);
+ dfd = dev_open(nm, O_RDWR);
if (dfd < 0)
goto error;
- err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
+ err = load_and_parse_mpb(dfd, s, NULL, 1);
/* retry the load if we might have raced against mdmon */
if (err == 3 && mdmon_running(devnum))
for (retry = 0; retry < 3; retry++) {
usleep(3000);
- err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
+ err = load_and_parse_mpb(dfd, s, NULL, 1);
if (err != 3)
break;
}
- if (!keep_fd)
- close(dfd);
if (err)
goto error;
}
err = 2;
goto error;
}
-
- if (st->subarray[0]) {
- unsigned long val;
- char *ep;
-
- err = 1;
- val = strtoul(st->subarray, &ep, 10);
- if (*ep != '\0') {
- free_imsm(super);
- goto error;
- }
-
- if (val < super->anchor->num_raid_devs)
- super->current_vol = val;
- else {
- free_imsm(super);
- goto error;
- }
- }
err = 0;
error:
st->minor_version = 0;
st->max_devs = IMSM_MAX_DEVICES;
}
- st->loaded_container = 1;
-
return 0;
}
+
+static int load_container_imsm(struct supertype *st, int fd, char *devname)
+{
+ return load_super_imsm_all(st, fd, &st->sb, devname);
+}
#endif
static int load_super_imsm(struct supertype *st, int fd, char *devname)
int rv;
#ifndef MDASSEMBLE
- if (load_super_imsm_all(st, fd, &st->sb, devname, 1) == 0)
+ if (load_super_imsm_all(st, fd, &st->sb, devname) == 0)
return 0;
#endif
return rv;
}
- if (st->subarray[0]) {
- unsigned long val;
- char *ep;
-
- val = strtoul(st->subarray, &ep, 10);
- if (*ep != '\0') {
- free_imsm(super);
- return 1;
- }
-
- if (val < super->anchor->num_raid_devs)
- super->current_vol = val;
- else {
- free_imsm(super);
- return 1;
- }
- }
-
st->sb = super;
if (st->ss == NULL) {
st->ss = &super_imsm;
st->minor_version = 0;
st->max_devs = IMSM_MAX_DEVICES;
}
- st->loaded_container = 0;
-
return 0;
}
if (!check_name(super, name, 0))
return 0;
- sprintf(st->subarray, "%d", idx);
dv = malloc(sizeof(*dv));
if (!dv) {
fprintf(stderr, Name ": failed to allocate device list entry\n");
return 0;
}
-static int write_super_imsm(struct intel_super *super, int doclose)
+static int write_super_imsm(struct supertype *st, int doclose)
{
+ struct intel_super *super = st->sb;
struct imsm_super *mpb = super->anchor;
struct dl *d;
__u32 generation;
int spares = 0;
int i;
__u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
+ int num_disks = 0;
/* 'generation' is incremented everytime the metadata is written */
generation = __le32_to_cpu(mpb->generation_num);
if (mpb->orig_family_num == 0)
mpb->orig_family_num = mpb->family_num;
- mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
for (d = super->disks; d; d = d->next) {
if (d->index == -1)
spares++;
- else
+ else {
mpb->disk[d->index] = d->disk;
+ num_disks++;
+ }
}
- for (d = super->missing; d; d = d->next)
+ for (d = super->missing; d; d = d->next) {
mpb->disk[d->index] = d->disk;
+ num_disks++;
+ }
+ mpb->num_disks = num_disks;
+ mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
for (i = 0; i < mpb->num_raid_devs; i++) {
struct imsm_dev *dev = __get_imsm_dev(mpb, i);
-
- imsm_copy_dev(dev, get_imsm_dev(super, i));
- mpb_size += sizeof_imsm_dev(dev, 0);
+ struct imsm_dev *dev2 = get_imsm_dev(super, i);
+ if (dev && dev2) {
+ imsm_copy_dev(dev, dev2);
+ mpb_size += sizeof_imsm_dev(dev, 0);
+ }
}
mpb_size += __le32_to_cpu(mpb->bbm_log_size);
mpb->mpb_size = __cpu_to_le32(mpb_size);
struct dl *d;
for (d = super->disks; d; d = d->next)
Kill(d->devname, NULL, 0, 1, 1);
- return write_super_imsm(st->sb, 1);
+ return write_super_imsm(st, 1);
}
}
#endif
*/
struct intel_super *super;
- if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) {
+ if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) {
st->sb = super;
st->container_dev = fd2devnum(cfd);
close(cfd);
return 0;
}
-static int default_chunk_imsm(struct supertype *st)
+static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
{
struct intel_super *super = st->sb;
- if (!super || !super->orom)
- return 0;
+ if (level && *level == UnSet)
+ *level = LEVEL_CONTAINER;
+
+ if (level && layout && *layout == UnSet)
+ *layout = imsm_level_to_layout(*level);
- return imsm_orom_default_chunk(super->orom);
+ if (chunk && (*chunk == UnSet || *chunk == 0) &&
+ super && super->orom)
+ *chunk = imsm_orom_default_chunk(super->orom);
}
static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
return 0;
}
-static int update_subarray_imsm(struct supertype *st, char *update, mddev_ident_t ident)
+static int update_subarray_imsm(struct supertype *st, char *subarray,
+ char *update, struct mddev_ident *ident)
{
/* update the subarray currently referenced by ->current_vol */
struct intel_super *super = st->sb;
struct imsm_super *mpb = super->anchor;
- if (super->current_vol < 0)
- return 2;
-
if (strcmp(update, "name") == 0) {
char *name = ident->name;
+ char *ep;
+ int vol;
- if (is_subarray_active(st->subarray, st->devname)) {
+ if (is_subarray_active(subarray, st->devname)) {
fprintf(stderr,
Name ": Unable to update name of active subarray\n");
return 2;
if (!check_name(super, name, 0))
return 2;
+ vol = strtoul(subarray, &ep, 10);
+ if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
+ return 2;
+
if (st->update_tail) {
struct imsm_update_rename_array *u = malloc(sizeof(*u));
if (!u)
return 2;
u->type = update_rename_array;
- u->dev_idx = super->current_vol;
+ u->dev_idx = vol;
snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
append_metadata_update(st, u, sizeof(*u));
} else {
struct imsm_dev *dev;
int i;
- dev = get_imsm_dev(super, super->current_vol);
+ dev = get_imsm_dev(super, vol);
snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
for (i = 0; i < mpb->num_raid_devs; i++) {
dev = get_imsm_dev(super, i);
}
#endif /* MDASSEMBLE */
+static int is_gen_migration(struct imsm_dev *dev)
+{
+ if (!dev->vol.migr_state)
+ return 0;
+
+ if (migr_type(dev) == MIGR_GEN_MIGR)
+ return 1;
+
+ return 0;
+}
+
static int is_rebuilding(struct imsm_dev *dev)
{
struct imsm_map *migr_map;
}
-static struct mdinfo *container_content_imsm(struct supertype *st)
+static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
{
/* Given a container loaded by load_super_imsm_all,
* extract information about all the arrays into
* an mdinfo tree.
+ * If 'subarray' is given, just extract info about that array.
*
* For each imsm_dev create an mdinfo, fill it in,
* then look for matching devices in super->disks
struct intel_super *super = st->sb;
struct imsm_super *mpb = super->anchor;
struct mdinfo *rest = NULL;
- int i;
+ unsigned int i;
/* do not assemble arrays that might have bad blocks */
if (imsm_bbm_log_size(super->anchor)) {
}
for (i = 0; i < mpb->num_raid_devs; i++) {
- struct imsm_dev *dev = get_imsm_dev(super, i);
- struct imsm_map *map = get_imsm_map(dev, 0);
+ struct imsm_dev *dev;
+ struct imsm_map *map;
struct mdinfo *this;
int slot;
+ char *ep;
+
+ if (subarray &&
+ (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
+ continue;
+
+ dev = get_imsm_dev(super, i);
+ map = get_imsm_map(dev, 0);
/* do not publish arrays that are in the middle of an
* unsupported migration
*/
if (dev->vol.migr_state &&
- (migr_type(dev) == MIGR_GEN_MIGR ||
- migr_type(dev) == MIGR_STATE_CHANGE)) {
+ (migr_type(dev) == MIGR_STATE_CHANGE)) {
fprintf(stderr, Name ": cannot assemble volume '%.16s':"
" unsupported migration in progress\n",
dev->volume);
this->next = rest;
super->current_vol = i;
- getinfo_super_imsm_volume(st, this);
+ getinfo_super_imsm_volume(st, this, NULL);
for (slot = 0 ; slot < map->num_members; slot++) {
unsigned long long recovery_start;
struct mdinfo *info_d;
super->updates_pending++;
}
+static void imsm_set_disk(struct active_array *a, int n, int state);
+
/* Handle dirty -> clean transititions and resync. Degraded and rebuild
* states are handled in imsm_set_disk() with one exception, when a
* resync is stopped due to a new failure this routine will set the
dev->vol.dirty = 1;
super->updates_pending++;
}
+
+ /* finalize online capacity expansion/reshape */
+ if ((a->curr_action != reshape) &&
+ (a->prev_action == reshape)) {
+ struct mdinfo *mdi;
+
+ for (mdi = a->info.devs; mdi; mdi = mdi->next)
+ imsm_set_disk(a, mdi->disk.raid_disk, mdi->curr_state);
+ }
+
return consistent;
}
end_migration(dev, map_state);
super->updates_pending++;
a->last_checkpoint = 0;
+ } else if (is_gen_migration(dev)) {
+ dprintf("imsm: Detected General Migration in state: ");
+ if (map_state == IMSM_T_STATE_NORMAL) {
+ end_migration(dev, map_state);
+ map = get_imsm_map(dev, 0);
+ map->failed_disk_num = ~0;
+ dprintf("normal\n");
+ } else {
+ if (map_state == IMSM_T_STATE_DEGRADED) {
+ printf("degraded\n");
+ end_migration(dev, map_state);
+ } else {
+ dprintf("failed\n");
+ }
+ map->map_state = map_state;
+ }
+ super->updates_pending++;
}
}
if (!super->updates_pending)
return;
- write_super_imsm(super, 0);
+ write_super_imsm(container, 0);
super->updates_pending = 0;
}
}
static struct dl *imsm_add_spare(struct intel_super *super, int slot,
- struct active_array *a, int activate_new)
+ struct active_array *a, int activate_new,
+ struct mdinfo *additional_test_list)
{
struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
int idx = get_imsm_disk_idx(dev, slot);
if (d->state_fd >= 0 &&
d->disk.major == dl->major &&
d->disk.minor == dl->minor) {
- dprintf("%x:%x already in array\n", dl->major, dl->minor);
+ dprintf("%x:%x already in array\n",
+ dl->major, dl->minor);
break;
}
if (d)
continue;
+ while (additional_test_list) {
+ if (additional_test_list->disk.major == dl->major &&
+ additional_test_list->disk.minor == dl->minor) {
+ dprintf("%x:%x already in additional test list\n",
+ dl->major, dl->minor);
+ break;
+ }
+ additional_test_list = additional_test_list->next;
+ }
+ if (additional_test_list)
+ continue;
/* skip in use or failed drives */
if (is_failed(&dl->disk) || idx == dl->index ||
*/
dl = imsm_readd(super, i, a);
if (!dl)
- dl = imsm_add_spare(super, i, a, 0);
+ dl = imsm_add_spare(super, i, a, 0, NULL);
if (!dl)
- dl = imsm_add_spare(super, i, a, 1);
+ dl = imsm_add_spare(super, i, a, 1, NULL);
if (!dl)
continue;
}
#endif /* MDASSEMBLE */
+static char disk_by_path[] = "/dev/disk/by-path/";
+
+static const char *imsm_get_disk_controller_domain(const char *path)
+{
+ struct sys_dev *list, *hba = NULL;
+ char disk_path[PATH_MAX];
+ int ahci = 0;
+ char *dpath = NULL;
+
+ list = find_driver_devices("pci", "ahci");
+ for (hba = list; hba; hba = hba->next)
+ if (devpath_to_vendor(hba->path) == 0x8086)
+ break;
+
+ if (hba) {
+ struct stat st;
+
+ strncpy(disk_path, disk_by_path, PATH_MAX - 1);
+ strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
+ if (stat(disk_path, &st) == 0) {
+ dpath = devt_to_devpath(st.st_rdev);
+ if (dpath)
+ ahci = path_attached_to_hba(dpath, hba->path);
+ }
+ }
+ dprintf("path: %s(%s) hba: %s attached: %d\n",
+ path, dpath, (hba) ? hba->path : "NULL", ahci);
+ free_sys_dev(&list);
+ if (ahci)
+ return "ahci";
+ else
+ return NULL;
+}
+
+
struct superswitch super_imsm = {
#ifndef MDASSEMBLE
.examine_super = examine_super_imsm,
.brief_detail_super = brief_detail_super_imsm,
.write_init_super = write_init_super_imsm,
.validate_geometry = validate_geometry_imsm,
- .default_chunk = default_chunk_imsm,
.add_to_super = add_to_super_imsm,
.detail_platform = detail_platform_imsm,
.kill_subarray = kill_subarray_imsm,
.update_subarray = update_subarray_imsm,
+ .load_container = load_container_imsm,
#endif
.match_home = match_home_imsm,
.uuid_from_super= uuid_from_super_imsm,
.getinfo_super = getinfo_super_imsm,
+ .getinfo_super_disks = getinfo_super_disks_imsm,
.update_super = update_super_imsm,
.avail_size = avail_size_imsm,
+ .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
.compare_super = compare_super_imsm,
.free_super = free_super_imsm,
.match_metadata_desc = match_metadata_desc_imsm,
.container_content = container_content_imsm,
- .default_layout = imsm_level_to_layout,
+ .default_geometry = default_geometry_imsm,
+ .get_disk_controller_domain = imsm_get_disk_controller_domain,
.external = 1,
.name = "imsm",
--- /dev/null
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2010 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neil@brown.name>
+ *
+ */
+
+/*
+ * 'mbr' is a pseudo metadata type for devices which have a
+ * partition table in the Master Boot Record (mbr) also known
+ * as a dos partition table.
+ *
+ * Obviously arrays cannot be created or assembled for this type.
+ * It is used to allow a new bare device to have an partition table
+ * added so the member partitions can then be included in other
+ * arrays as relevant.
+ *
+ * The meaning operations are:
+ * examine_super, but not brief_examine_super or export_examine
+ * load_super
+ * store_super
+ */
+
+#include "mdadm.h"
+#include "part.h"
+
+static void free_mbr(struct supertype *st)
+{
+ free(st->sb);
+ st->sb = NULL;
+}
+
+#ifndef MDASSEMBLE
+
+static void examine_mbr(struct supertype *st, char *homehost)
+{
+ struct MBR *sb = st->sb;
+ int i;
+
+ printf(" MBR Magic : %04x\n", sb->magic);
+ for (i = 0; i < MBR_PARTITIONS; i++)
+ if (sb->parts[i].blocks_num)
+ printf("Partition[%d] : %12lu sectors at %12lu (type %02x)\n",
+ i,
+ (unsigned long)__le32_to_cpu(sb->parts[i].blocks_num),
+ (unsigned long)__le32_to_cpu(sb->parts[i].first_sect_lba),
+ sb->parts[i].part_type);
+
+}
+
+#endif /*MDASSEMBLE */
+
+static int load_super_mbr(struct supertype *st, int fd, char *devname)
+{
+ /* try to read an mbr
+ * Return
+ * 0 on success
+ * 1 cannot get record
+ * 2 record is meaningless
+ */
+ struct MBR *super;
+
+ free_mbr(st);
+
+ if (posix_memalign((void**)&super, 512, 512) != 0) {
+ fprintf(stderr, Name ": %s could not allocate superblock\n",
+ __func__);
+ return 1;
+ }
+
+ ioctl(fd, BLKFLSBUF, 0); /* make sure we read current data */
+
+ lseek(fd, 0, 0);
+ if (read(fd, super, sizeof(*super)) != sizeof(*super)) {
+ if (devname)
+ fprintf(stderr, Name ": Cannot read partition table on %s\n",
+ devname);
+ free(super);
+ return 1;
+ }
+
+ if (super->magic != MBR_SIGNATURE_MAGIC) {
+ if (devname)
+ fprintf(stderr, Name ": No partition table found on %s\n",
+ devname);
+ free(super);
+ return 1;
+ }
+
+ st->sb = super;
+
+ if (st->ss == NULL) {
+ st->ss = &mbr;
+ st->minor_version = 0;
+ st->max_devs = 1;
+ st->info = NULL;
+ }
+ return 0;
+}
+
+static int store_mbr(struct supertype *st, int fd)
+{
+ struct MBR *old, *super;
+
+ if (posix_memalign((void**)&old, 512, 512) != 0) {
+ fprintf(stderr, Name ": %s could not allocate superblock\n",
+ __func__);
+ return 1;
+ }
+
+ ioctl(fd, BLKFLSBUF, 0); /* make sure we read current data */
+
+ lseek(fd, 0, 0);
+ if (read(fd, old, sizeof(*old)) != sizeof(*old)) {
+ free(old);
+ return 1;
+ }
+
+ super = st->sb;
+ memcpy(super->pad, old->pad, sizeof(super->pad));
+ free(old);
+ lseek(fd, 0, 0);
+ if (write(fd, super, sizeof(*super)) != sizeof(*super))
+ return 4;
+ fsync(fd);
+ ioctl(fd, BLKRRPART, 0);
+ return 0;
+}
+
+static void getinfo_mbr(struct supertype *st, struct mdinfo *info, char *map)
+{
+ struct MBR *sb = st->sb;
+ int i;
+
+ memset(&info->array, 0, sizeof(info->array));
+ memset(&info->disk, 0, sizeof(info->disk));
+ strcpy(info->text_version, "mbr");
+ strcpy(info->name, "mbr");
+ info->component_size = 0;
+
+ for (i = 0; i < MBR_PARTITIONS ; i++)
+ if (sb->parts[i].blocks_num) {
+ unsigned long last =
+ (unsigned long)__le32_to_cpu(sb->parts[i].blocks_num)
+ + (unsigned long)__le32_to_cpu(sb->parts[i].first_sect_lba);
+ if (last > info->component_size)
+ info->component_size = last;
+ }
+
+}
+
+static struct supertype *match_metadata_desc(char *arg)
+{
+ struct supertype *st = malloc(sizeof(*st));
+
+ if (!st)
+ return st;
+ if (strcmp(arg, "mbr") != 0)
+ return NULL;
+
+ st->ss = &mbr;
+ st->info = NULL;
+ st->minor_version = 0;
+ st->max_devs = 1;
+ st->sb = NULL;
+ return st;
+}
+
+#ifndef MDASSEMBLE
+static int validate_geometry(struct supertype *st, int level,
+ int layout, int raiddisks,
+ int chunk, unsigned long long size,
+ char *subdev, unsigned long long *freesize,
+ int verbose)
+{
+ fprintf(stderr, Name ": mbr metadata cannot be used this way\n");
+ return 0;
+}
+#endif
+
+struct superswitch mbr = {
+#ifndef MDASSEMBLE
+ .examine_super = examine_mbr,
+#endif
+ .validate_geometry = validate_geometry,
+ .match_metadata_desc = match_metadata_desc,
+ .load_super = load_super_mbr,
+ .store_super = store_mbr,
+ .getinfo_super = getinfo_mbr,
+ .free_super = free_mbr,
+ .name = "mbr",
+};
}
}
-static void getinfo_super0(struct supertype *st, struct mdinfo *info)
+static void getinfo_super0(struct supertype *st, struct mdinfo *info, char *map)
{
mdp_super_t *sb = st->sb;
int working = 0;
int i;
+ int map_disks = info->array.raid_disks;
info->array.major_version = sb->major_version;
info->array.minor_version = sb->minor_version;
if ((sb->disks[i].state & (1<<MD_DISK_SYNC)) &&
(sb->disks[i].raid_disk < (unsigned)info->array.raid_disks) &&
(sb->disks[i].state & (1<<MD_DISK_ACTIVE)) &&
- !(sb->disks[i].state & (1<<MD_DISK_FAULTY)))
+ !(sb->disks[i].state & (1<<MD_DISK_FAULTY))) {
working ++;
+ if (map && i < map_disks)
+ map[i] = 1;
+ } else if (map && i < map_disks)
+ map[i] = 0;
info->array.working_disks = working;
}
+static struct mdinfo *container_content0(struct supertype *st, char *subarray)
+{
+ struct mdinfo *info;
+
+ if (subarray)
+ return NULL;
+
+ info = malloc(sizeof(*info));
+ getinfo_super0(st, info, NULL);
+ return info;
+}
static int update_super0(struct supertype *st, struct mdinfo *info,
char *update,
char *devname, int verbose,
int uuid_set, char *homehost)
{
- /* NOTE: for 'assemble' and 'force' we need to return non-zero if any change was made.
- * For others, the return value is ignored.
+ /* NOTE: for 'assemble' and 'force' we need to return non-zero
+ * if any change was made. For others, the return value is
+ * ignored.
*/
int rv = 0;
mdp_super_t *sb = st->sb;
if (verbose >= 0)
fprintf (stderr, Name ": adjusting superblock of %s for 2.2/sparc compatability.\n",
devname);
- }
- if (strcmp(update, "super-minor") ==0) {
+ } else if (strcmp(update, "super-minor") ==0) {
sb->md_minor = info->array.md_minor;
if (verbose > 0)
fprintf(stderr, Name ": updating superblock of %s with minor number %d\n",
devname, info->array.md_minor);
- }
- if (strcmp(update, "summaries") == 0) {
+ } else if (strcmp(update, "summaries") == 0) {
unsigned int i;
/* set nr_disks, active_disks, working_disks,
* failed_disks, spare_disks based on disks[]
sb->spare_disks++;
} else if (i >= sb->raid_disks && sb->disks[i].number == 0)
sb->disks[i].state = 0;
- }
- if (strcmp(update, "force-one")==0) {
+ } else if (strcmp(update, "force-one")==0) {
/* Not enough devices for a working array, so
* bring this one up-to-date.
*/
if (sb->events_hi != ehi ||
sb->events_lo != elo)
rv = 1;
- }
- if (strcmp(update, "force-array")==0) {
+ } else if (strcmp(update, "force-array")==0) {
/* degraded array and 'force' requested, so
* maybe need to mark it 'clean'
*/
sb->state |= (1 << MD_SB_CLEAN);
rv = 1;
}
- }
- if (strcmp(update, "assemble")==0) {
+ } else if (strcmp(update, "assemble")==0) {
int d = info->disk.number;
int wonly = sb->disks[d].state & (1<<MD_DISK_WRITEMOSTLY);
int mask = (1<<MD_DISK_WRITEMOSTLY);
sb->disks[d].state = info->disk.state | wonly;
rv = 1;
}
- }
- if (strcmp(update, "linear-grow-new") == 0) {
+ } else if (strcmp(update, "linear-grow-new") == 0) {
memset(&sb->disks[info->disk.number], 0, sizeof(sb->disks[0]));
sb->disks[info->disk.number].number = info->disk.number;
sb->disks[info->disk.number].major = info->disk.major;
sb->disks[info->disk.number].raid_disk = info->disk.raid_disk;
sb->disks[info->disk.number].state = info->disk.state;
sb->this_disk = sb->disks[info->disk.number];
- }
- if (strcmp(update, "linear-grow-update") == 0) {
+ } else if (strcmp(update, "linear-grow-update") == 0) {
sb->raid_disks = info->array.raid_disks;
sb->nr_disks = info->array.nr_disks;
sb->active_disks = info->array.active_disks;
sb->disks[info->disk.number].minor = info->disk.minor;
sb->disks[info->disk.number].raid_disk = info->disk.raid_disk;
sb->disks[info->disk.number].state = info->disk.state;
- }
- if (strcmp(update, "resync") == 0) {
+ } else if (strcmp(update, "resync") == 0) {
/* make sure resync happens */
sb->state &= ~(1<<MD_SB_CLEAN);
sb->recovery_cp = 0;
- }
- if (strcmp(update, "homehost") == 0 &&
+ } else if (strcmp(update, "homehost") == 0 &&
homehost) {
uuid_set = 0;
update = "uuid";
info->uuid[0] = sb->set_uuid0;
info->uuid[1] = sb->set_uuid1;
- }
- if (strcmp(update, "uuid") == 0) {
+ } else if (strcmp(update, "uuid") == 0) {
if (!uuid_set && homehost) {
char buf[20];
char *hash = sha1_buffer(homehost,
bm = (struct bitmap_super_s*)(sb+1);
uuid_from_super0(st, (int*)bm->uuid);
}
- }
- if (strcmp(update, "_reshape_progress")==0)
+ } else if (strcmp(update, "no-bitmap") == 0) {
+ sb->state &= ~(1<<MD_SB_BITMAP_PRESENT);
+ } else if (strcmp(update, "_reshape_progress")==0)
sb->reshape_position = info->reshape_progress;
+ else
+ rv = -1;
sb->sb_csum = calc_sb0_csum(sb);
return rv;
free_super0(st);
- if (st->subarray[0])
- return 1;
-
if (!get_dev_size(fd, devname, &dsize))
return 1;
if (!st) return st;
memset(st, 0, sizeof(*st));
+ st->container_dev = NoMdDev;
st->ss = &super0;
st->info = NULL;
st->minor_version = 90;
.match_home = match_home0,
.uuid_from_super = uuid_from_super0,
.getinfo_super = getinfo_super0,
+ .container_content = container_content0,
.update_super = update_super0,
.init_super = init_super0,
.store_super = store_super0,
cuuid[i] = super->set_uuid[i];
}
-static void getinfo_super1(struct supertype *st, struct mdinfo *info)
+static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
{
struct mdp_superblock_1 *sb = st->sb;
int working = 0;
unsigned int i;
- int role;
+ unsigned int role;
+ unsigned int map_disks = info->array.raid_disks;
info->array.major_version = 1;
info->array.minor_version = st->minor_version;
} else
info->reshape_active = 0;
+ if (map)
+ for (i=0; i<map_disks; i++)
+ map[i] = 0;
for (i = 0; i < __le32_to_cpu(sb->max_dev); i++) {
role = __le16_to_cpu(sb->dev_roles[i]);
- if (/*role == 0xFFFF || */role < info->array.raid_disks)
+ if (/*role == 0xFFFF || */role < (unsigned) info->array.raid_disks) {
working++;
+ if (map && role < map_disks)
+ map[role] = 1;
+ }
}
info->array.working_disks = working;
}
+static struct mdinfo *container_content1(struct supertype *st, char *subarray)
+{
+ struct mdinfo *info;
+
+ if (subarray)
+ return NULL;
+
+ info = malloc(sizeof(*info));
+ getinfo_super1(st, info, NULL);
+ return info;
+}
+
static int update_super1(struct supertype *st, struct mdinfo *info,
char *update,
char *devname, int verbose,
int uuid_set, char *homehost)
{
- /* NOTE: for 'assemble' and 'force' we need to return non-zero if any change was made.
- * For others, the return value is ignored.
+ /* NOTE: for 'assemble' and 'force' we need to return non-zero
+ * if any change was made. For others, the return value is
+ * ignored.
*/
int rv = 0;
struct mdp_superblock_1 *sb = st->sb;
if (sb->events != __cpu_to_le64(info->events))
rv = 1;
sb->events = __cpu_to_le64(info->events);
- }
- if (strcmp(update, "force-array")==0) {
+ } else if (strcmp(update, "force-array")==0) {
/* Degraded array and 'force' requests to
* maybe need to mark it 'clean'.
*/
rv = 1;
sb->resync_offset = MaxSector;
}
- }
- if (strcmp(update, "assemble")==0) {
+ } else if (strcmp(update, "assemble")==0) {
int d = info->disk.number;
int want;
if (info->disk.state == 6)
sb->dev_roles[d] = __cpu_to_le16(want);
rv = 1;
}
- }
- if (strcmp(update, "linear-grow-new") == 0) {
+ } else if (strcmp(update, "linear-grow-new") == 0) {
unsigned int i;
int rfd, fd;
unsigned int max = __le32_to_cpu(sb->max_dev);
ds - __le64_to_cpu(sb->data_offset));
}
}
- }
- if (strcmp(update, "linear-grow-update") == 0) {
+ } else if (strcmp(update, "linear-grow-update") == 0) {
sb->raid_disks = __cpu_to_le32(info->array.raid_disks);
sb->dev_roles[info->disk.number] =
__cpu_to_le16(info->disk.raid_disk);
- }
- if (strcmp(update, "resync") == 0) {
+ } else if (strcmp(update, "resync") == 0) {
/* make sure resync happens */
sb->resync_offset = 0ULL;
- }
- if (strcmp(update, "uuid") == 0) {
+ } else if (strcmp(update, "uuid") == 0) {
copy_uuid(sb->set_uuid, info->uuid, super1.swapuuid);
if (__le32_to_cpu(sb->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
bm = (struct bitmap_super_s*)(st->sb+1024);
memcpy(bm->uuid, sb->set_uuid, 16);
}
- }
- if (strcmp(update, "homehost") == 0 &&
+ } else if (strcmp(update, "no-bitmap") == 0) {
+ sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
+ } else if (strcmp(update, "homehost") == 0 &&
homehost) {
char *c;
update = "name";
else
strncpy(info->name, sb->set_name, 32);
info->name[32] = 0;
- }
- if (strcmp(update, "name") == 0) {
+ } else if (strcmp(update, "name") == 0) {
if (info->name[0] == 0)
sprintf(info->name, "%d", info->array.md_minor);
memset(sb->set_name, 0, sizeof(sb->set_name));
strcat(sb->set_name, info->name);
} else
strcpy(sb->set_name, info->name);
- }
- if (strcmp(update, "devicesize") == 0 &&
+ } else if (strcmp(update, "devicesize") == 0 &&
__le64_to_cpu(sb->super_offset) <
__le64_to_cpu(sb->data_offset)) {
/* set data_size to device size less data_offset */
misc->device_size - __le64_to_cpu(sb->data_offset));
printf("Size is %llu\n", (unsigned long long)
__le64_to_cpu(sb->data_size));
- }
- if (strcmp(update, "_reshape_progress")==0)
+ } else if (strcmp(update, "_reshape_progress")==0)
sb->reshape_position = __cpu_to_le64(info->reshape_progress);
+ else
+ rv = -1;
sb->sb_csum = calc_sb_1_csum(sb);
return rv;
free_super1(st);
- if (st->subarray[0])
- return 1;
-
if (st->ss == NULL || st->minor_version == -1) {
int bestvers = -1;
struct supertype tst;
if (!st) return st;
memset(st, 0, sizeof(*st));
+ st->container_dev = NoMdDev;
st->ss = &super1;
st->max_devs = 384;
st->sb = NULL;
.match_home = match_home1,
.uuid_from_super = uuid_from_super1,
.getinfo_super = getinfo_super1,
+ .container_content = container_content1,
.update_super = update_super1,
.init_super = init_super1,
.store_super = store_super1,
return 0;
}
+int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev, char *name)
+{
+ char fname[50];
+ struct stat st;
+
+ sprintf(fname, "/sys/block/%s/md/%s/%s",
+ sra->sys_name, dev?dev->sys_name:"", name);
+
+ return stat(fname, &st) == 0;
+}
+
int sysfs_get_fd(struct mdinfo *sra, struct mdinfo *dev,
char *name)
{
* yet, so just ignore status for now.
*/
sysfs_set_str(sra, sd, "state", "insync");
- rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
+ if (sd->disk.raid_disk >= 0)
+ rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
if (resume)
sysfs_set_num(sra, sd, "recovery_start", sd->recovery_start);
}
return found;
}
+int sysfs_freeze_array(struct mdinfo *sra)
+{
+ /* Try to freeze resync/rebuild on this array/container.
+ * Return -1 if the array is busy,
+ * return -2 container cannot be frozen,
+ * return 0 if this kernel doesn't support 'frozen'
+ * return 1 if it worked.
+ */
+ char buf[20];
+
+ if (!sysfs_attribute_available(sra, NULL, "sync_action"))
+ return 1; /* no sync_action == frozen */
+ if (sysfs_get_str(sra, NULL, "sync_action", buf, 20) <= 0)
+ return 0;
+ if (strcmp(buf, "idle\n") != 0 &&
+ strcmp(buf, "frozen\n") != 0)
+ return -1;
+ if (sysfs_set_str(sra, NULL, "sync_action", "frozen") < 0)
+ return 0;
+ return 1;
+}
+
#ifndef MDASSEMBLE
static char *clean_states[] = {
$mdadm -Ssq
for d in 0 1 2 3 4 5 6 7 8 9 10 11 12
do
- losetup -d /dev/loop$d ; # rm -f $targetdir/mdtest$d
+ losetup -d /dev/loop$d ; # rm -f $targetdir/mdtest$d
+ rm -f /dev/disk/by-path/loop*
done
}
check nosync
check state UUU_
+mdadm --zero-superblock $dev2
mdadm $md0 -a $dev2
check recovery
check wait
#
# add some data, tear down the array, reassemble
# and make sure it is still there.
+set -e
mdadm -CR /dev/md/ddf0 -e ddf -n 5 $dev8 $dev9 $dev10 $dev11 $dev12
mdadm -CR r0 -l0 -n5 /dev/md/ddf0 -z 5000
+if mdadm -CR r0 -l1 -n2 /dev/md/ddf0 -z 5000
+then echo >&2 create with same name should fail ; exit 1
+fi
mdadm -CR r1 -l1 -n2 /dev/md/ddf0
mdadm -CR r5 -l5 -n3 /dev/md/ddf0
testdev /dev/md/r0 5 5000 512
--- /dev/null
+# Set of tests for autorebuild functionality using mdadm -F
+# To be able to test ddf one must have all loop devices of bigger size, with the ones
+# above number 7 bigger again by any amount (this is not changed for now as it
+# could affect other tests)
+
+. tests/utils
+set -ex
+verbose="yes"
+sleeptime=10
+
+# if listfailed=yes then don't exit if test failed due to wrong
+# spare-migration and just print a list at the end. Other errors still
+# stop the test.
+# if listfailed=no then exit on first failure
+listfailed="yes"
+
+# start Monitor, set monitorpid
+# uses global scan variable
+# all parameters are numbers of devices to be monitored. only used when $scan="no"
+# eg. monitor 0 1 will start monitoring of containers c0, c1 and subarrays v0, v1
+monitor(){
+ [ -z $monitorpid ] || return
+ if [ "$scan" == "yes" ]; then
+ $mdadm -F -d 1 --scan --mail root@localhost &
+ monitorpid=$!
+ return
+ fi
+ unset mddevs
+ while [ -n "$1" ]
+ do
+ eval container=\$c$1
+ eval volumes=\$v$1
+ mddevs="$mddevs /dev/$container"
+ if [ "$container" != "$volumes" ]; then
+ for vol in $volumes; do
+ mddevs="$mddevs /dev/$vol"
+ done
+ fi
+ shift
+ done
+ if [ -n "$mddevs" ]; then
+ if [ "$verbose" != "yes" ]; then
+ $mdadm -F -d 1 $mddevs >&2 &
+ monitorpid=$!
+ else
+ $mdadm -F -t -d 1 $mddevs &
+ monitorpid=$!
+ fi
+ fi
+ [ "$verbose" != "yes" ] || echo $mddevs $monitorpid
+}
+
+test1()
+{
+dsc "Test 1: Common domain, add disk to one container and fail first one in another container, spare should be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+# create config file with arrays and common domain
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v0 $dev0
+# check that spare loop2 was moved from container c1 to container c0
+chksparemoved $c1 $c0 $dev2
+tidyup
+}
+
+test1a()
+{
+dsc "Test 1a: Common domain, add disk to one container and fail second one in another container, spare should be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v0 $dev1
+# check that spare loop2 was moved from container c1 to container c0
+chksparemoved $c1 $c0 $dev2
+tidyup
+}
+
+test2()
+{
+dsc "Test 2: Common domain, fail disk in one container and add one to another container, spare should be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4
+monitor 0 1
+mdadm --fail /dev/$v0 $dev1
+mdadm -a /dev/$c1 $dev2
+chksparemoved $c1 $c0 $dev2
+tidyup
+}
+
+test3()
+{
+dsc "Test 3: Two domains, fail a disk in one domain, add a disk to another domain, the spare should not be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+# create config file with 2 domains
+createconfig a
+createconfig domain-$platform"1" $platform spare 0 1 2
+createconfig domain-$platform"2" $platform spare 3 4 5
+monitor 0 1
+mdadm --fail /dev/$v0 $dev1
+mdadm -a /dev/$c1 $dev5
+chksparemoved $c1 $c0 $dev5 n
+tidyup
+}
+
+test4()
+{
+dsc "Test 4: One domain holds one container, fail a disk in domain, and add disk to a container not described by domain, spare loop5 should not be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2
+monitor 0 1
+mdadm --fail /dev/$v0 $dev1
+mdadm -a /dev/$c1 $dev5
+chksparemoved $c1 $c0 $dev5 n
+tidyup
+}
+
+test5()
+{
+dsc "Test 5: Two domains, two containers in each domain"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+setupdevs 2 5 6 $platform
+setupdevs 3 7 8 $platform
+# 2 and 9 for spares
+createconfig a
+createconfig domain-$platform"1" $platform spare 0 1 2 3 4
+createconfig domain-$platform"2" $platform spare 5 6 7 8 9
+monitor 0 1 2 3
+test5a
+test5b
+test5c
+tidyup
+}
+
+test5a()
+{
+dsc "Test 5a: Two containers in each domain, add spare loop2 to domain1 and fail disk in the other domain, the spare should not be moved"
+mdadm -a /dev/$c0 $dev2
+mdadm --fail /dev/$v2 $dev5
+chksparemoved $c0 $c2 $dev2 n
+}
+
+test5b()
+{
+dsc "Test 5b: Fail disk in the same domain but different container, spare loop2 should be moved"
+mdadm --fail /dev/$v1 $dev3
+chksparemoved $c0 $c1 $dev2
+}
+
+test5c()
+{
+dsc "Test 5c: Add spare loop9 to different container in domain with degraded array, spare should be moved"
+mdadm -a /dev/$c3 $dev9
+chksparemoved $c3 $c2 $dev9
+}
+
+test6()
+{
+dsc "Test 6: One domain has two containers, fail a disk in one container, there is a spare in other container too small to use for rebuild"
+setupdevs 0 0 1 $platform
+setupdevs 1 8 9 $platform
+# all devices in one domain
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 8 9
+monitor 0 1
+mdadm -a /dev/$c0 $dev2
+mdadm --fail /dev/$v1 $dev8
+chksparemoved $c0 $c1 $dev2 n
+tidyup
+}
+
+test7()
+{
+dsc "Test 7: One domain, add small spare to container, fail disk in array, spare not used, add suitable spare to other container, spare should be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 8 9 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 8 9 10
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v1 $dev8
+mdadm -a /dev/$c0 $dev10
+chksparemoved $c0 $c1 $dev10
+tidyup
+}
+
+
+test7a()
+{
+dsc "Test 7a: Small spare in parent, suitable one in other container, $dev2 in $c1 is not in common domain"
+setupdevs 0 0 1 $platform
+setupdevs 1 8 9 $platform
+#all $platform devices in one domain
+createconfig a
+createconfig domain-$platform"1" $platform spare 0 1 8 9 10
+createconfig domain-$platform"2" $platform spare 2
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+chkspare $c1 $dev2
+mdadm --fail /dev/$v1 $dev8
+mdadm -a /dev/$c0 $dev10
+chksparemoved $c0 $c1 $dev10
+tidyup
+}
+
+test8()
+{
+# ddf does not have getinfo_super_disks implemented so skip this test
+return
+dsc "Test 8: imsm and ddf - spare should not be migrated"
+setupdevs 0 10 11 imsm
+setupdevs 1 8 9 ddf
+createconfig a
+createconfig domain0 noplatform spare 8 9 10 11 12
+monitor 0 1
+mdadm -a /dev/$c1 $dev12
+mdadm --fail /dev/$v0 $dev10
+chksparemoved $c1 $c0 $dev12 n
+tidyup
+}
+
+test9()
+{
+dsc "Test 9: imsm and native 1.2 - spare should not be shared"
+setupdevs 0 10 11 imsm
+setupdevs 1 8 9 1.2
+createconfig a
+createconfig domain0 noplatform spare 8 9 10 11 12
+monitor 0 1
+mdadm -a /dev/$c1 $dev12
+mdadm --fail /dev/$v0 $dev10
+chksparemoved $c1 $c0 $dev12 n
+tidyup
+}
+
+test10()
+{
+dsc "Test 10: Two arrays on the same devices in container"
+setupdevs 0 0 1 $platform 10000
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4 5
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/md/sub0_ $dev0
+chksparemoved $c1 $c0 $dev2
+if [ $failed -eq 0 ]; then
+# now fail the spare and see if we get another one
+ mdadm --fail /dev/md/sub0_ $dev2
+ mdadm -a /dev/$c1 $dev5
+ chksparemoved $c1 $c0 $dev5
+fi
+tidyup
+}
+
+test11()
+{
+dsc "Test 11: Failed spare from other container should not be used"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v1 $dev3
+#wait until recovery finishes so no degraded array in c1
+check wait
+mdadm --fail /dev/$v0 $dev0
+chksparemoved $c1 $c0 $dev3 n
+tidyup
+}
+
+test12()
+{
+dsc "Test 12: Only one spare should be taken for rebuild, second not needed"
+setupdevs 0 0 1 $platform
+setupdevs 1 3 4 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 3 4 5
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm -a /dev/$c1 $dev5
+mdadm --fail /dev/$v0 $dev0
+sleep $sleeptime
+chkarray $dev2 n
+sc1=$c
+chkarray $dev5 n
+sc2=$c
+[ "$sc1" != "$sc2" ] || err "both spares in the same container $sc1"
+tidyup
+}
+
+test13()
+{
+dsc "Test 13: Common domain, two containers, fail a disk in container, action is below spare, the spare should not be moved"
+setupdevs 0 0 1 $platform
+setupdevs 1 4 5 $platform
+# same domain but different action on 4 5 6
+createconfig a
+createconfig domain-$platform $platform spare 0 1
+createconfig domain-$platform $platform include 4 5 6
+monitor 0 1
+mdadm -a /dev/$c1 $dev6
+mdadm --fail /dev/$v0 $dev0
+chksparemoved $c1 $c0 $d6 n
+tidyup
+}
+
+test14()
+{
+dsc "Test 14: One domain, small array on big disks, check if small spare is accepted"
+setupdevs 0 8 9 $platform 10000 1
+setupdevs 1 0 1 $platform
+createconfig a
+createconfig domain-$platform $platform spare 0 1 2 8 9
+monitor 0 1
+mdadm -a /dev/$c1 $dev2
+mdadm --fail /dev/$v0 $dev9
+chksparemoved $c1 $c0 $d2
+tidyup
+}
+
+try()
+{
+test1
+test1a
+test2
+test3
+test4
+test5
+test6
+if [ "$platform" != "1.2" ]; then
+# this is because we can't have a small spare added to native array
+ test7
+ test7a
+fi
+test8
+test9
+if [ "$platform" != "1.2" ]; then
+# we can't create two subarrays on the same devices for native (without
+# partitions)
+ test10
+fi
+test11
+test12
+test13
+test14
+}
+
+try_failed()
+{
+platform="1.2"
+scan="no"
+test5
+test9
+test13
+scan="yes"
+test9
+}
+
+#try_failed
+
+for scan in no yes; do
+ for platform in 1.2 imsm; do
+ try
+ done
+done
+
+[ $listfailed == "no" ] || [ -z $flist ] || echo -e "\n FAILED TESTS: $flist"
+
+#cat $targetdir/log
+rm -f /dev/disk/by-path/loop*
--- /dev/null
+setup_env() {
+ export IMSM_DEVNAME_AS_SERIAL=1
+ export IMSM_TEST_OROM=1
+ }
+
+reset_env() {
+ unset IMSM_DEVNAME_AS_SERIAL
+ unset IMSM_TEST_OROM
+}
--- /dev/null
+# set of functions used to test policy framework with assemble, incremental and Monitor
+
+set +e
+#create links to be able to use domains
+for d in 0 1 2 3 4 5 6 7 8 9 10 11 12
+do
+ eval ln -s \$dev$d /dev/disk/by-path/loop$d
+ eval d$d="loop$d"
+ eval mdadm --zero-superblock \$dev$d
+done
+
+devices="/dev/loop[0-9] /dev/loop10 /dev/loop11 /dev/loop12"
+
+# on failure print out few things before exit
+# uses testdsc and platform global variables
+err(){
+ echo >&2 "ERROR: $*"
+ cat /etc/mdadm.conf >&2 || true
+ cat /proc/mdstat >&2
+ [ -z "$testdsc" ] || { echo >&2 $platform: $testdsc "- failed"; }
+ ps -e | grep mdadm >&2 || true
+ if [ $listfailed == "yes" ]; then
+ [ "$verbose" != "yes" ] || echo ---FAILED---
+ flist="$flist \n $platform $testdsc"
+ failed=1
+ else
+ exit 1
+ fi
+}
+
+# set test description
+dsc(){
+ failed=0
+ testdsc="$*"
+ [ "$verbose" != "yes" ] || echo $testdsc
+}
+
+killmonitor(){
+ [ -z "$monitorpid" ] || { kill -9 $monitorpid; unset monitorpid; }
+}
+
+tidyup(){
+ killmonitor
+ mdadm -Ss || true
+ mdadm -Ss
+ mdadm --zero-superblock $devices || true
+ udevadm settle
+ rm -f /etc/mdadm.conf
+}
+
+trap tidyup 0 1 2 3 15
+
+# create a RAID 1 array or container and subarray(s) on 2 disks
+# if platform not specified imsm is used
+# if subsize is given, first subarray is created with given size and second one on remaining space
+ccv(){
+ # mddevno used to name created array
+ local mddevno="$1"
+ # numbers of devices to be used in array
+ local devno1="$2"
+ local devno2="$3"
+ local platform="$4"
+ local subsize="$5"
+ local onearray="$6"
+ [ -n "$platform" ] || platform="imsm"
+ if [ "$platform" == "imsm" ] || [ "$platform" == "ddf" ]; then
+ eval mdadm -CR /dev/md/con$mddevno -e $platform -n 2 \$dev$devno1 \$dev$devno2
+ udevadm settle
+ [ -z "$subsize" ] || eval mdadm -CR sub$mddevno"_" -l 1 -n 2 /dev/md/con$mddevno -z $subsize
+ [ -n "$onearray" ] || eval mdadm -CR sub$mddevno -l 1 -n 2 /dev/md/con$mddevno
+ else
+ [ -z "$subsize" ] || sizepar="-z $subsize"
+ eval mdadm -CR arr$mddevno -e $platform -l 1 -n 2 \$dev$devno1 \$dev$devno2 $sizepar
+ unset sizepar
+ fi
+}
+
+# get container and subarray using given device from mdstat
+# sets global variables c and v
+getarray(){
+ local devname=`basename $1`
+ local platformtype=`grep -A 1 $devname /proc/mdstat | awk '/active/ {getline; print $4 }' | awk -F ":" 'END {print $1}'`
+ c=`grep "inactive.*$devname" /proc/mdstat | awk -F " " '{print $1}'`
+ v=`grep " active.*$devname" /proc/mdstat | awk -F " " '{print $1}'`
+ [ "$platformtype" == "external" ] || c=$v
+}
+
+# check if given device belongs to any container and subarray
+# if $2 given then only container checked
+chkarray(){
+ local devname="$1"
+ local subcheck="$2"
+ getarray $devname
+ [ -n "$c" ] || err "$devname not in any container"
+ [ -n "$subcheck" ] || [ -n "$v" ] || err " $devname not in subarray"
+}
+
+# test if two devices in the same container/subarray
+# $1 $2 - devices
+# $3 don't check subarrays, only containers
+tst(){
+ local device1=`basename $1`
+ local device2=`basename $2`
+ local subcheck="$3"
+ chkarray $device1 $subcheck
+ local x="$c"
+ local y="$v"
+ chkarray $device2 $subcheck
+ [ "$c" == "$x" ] || err "$device1 and $device2 not in the same container"
+ [ -n "$subcheck" ] || [ "$v" == "$y" ] || err "$device1 and $device2 not in the same subarray"
+}
+
+# same as tst, just use numbers of devices instead of names as parameters
+dtst(){
+ local devno1="$1"
+ local devno2="$2"
+ local subcheck="$3"
+ eval tst \$dev$devno1 \$dev$devno2 $subcheck
+}
+
+# create containers/subarrays, check if created properly,
+# set global variables c$mddevno v$mddevno, usually c0=md127, v0=md126 , etc.
+setupdevs(){
+ local mddevno="$1"
+ local devno1="$2"
+ local devno2="$3"
+ local p="$4"
+ local subsize="$5"
+ local onearray="$6"
+ [ -n "$p" ] || p=$platform
+ ccv $mddevno $devno1 $devno2 $p $subsize $onearray
+ dtst $devno1 $devno2
+ eval c$mddevno=\"$c\"
+ eval v$mddevno=\"$v\"
+}
+
+# check if given spare in container
+# usage: chkspare container spare [n] (n if spare shouldn't be in container)
+chkspare(){
+ local container=`basename $1`
+ local spare=$2
+ local expected=$3
+ getarray $spare
+ [ -n "$expected" ] || expected="y"
+ if [ "$expected" == "y" ]; then
+ [ "$c" == "$container" ] || err "$spare not in container $container"
+ else
+ [ "$c" != "$container" ] || err "$spare in container $container"
+ fi
+}
+
+#check if spare was moved from one container to another
+# args: from_container to_container spare [yn]
+# n when spare should remain in original container
+chksparemoved(){
+ sleep $sleeptime
+ from_container="$1"
+ to_container="$2"
+ spare="$3"
+ expected="$4"
+ [ -n "$expected" ] || expected="y"
+ notexpected="n"; [ "$expected" == "y" ] || notexpected="y"
+ chkspare $from_container $spare $notexpected
+ [ $failed -eq 1 ] || chkspare $to_container $spare $expected
+}
+
+
+# for domains defined through policy
+createconfig(){
+conf=/etc/mdadm.conf
+if [ "$1" != "a" ]; then
+{
+ domain=$1
+ metadata=$2
+ action=$3
+ while [ -n "$4" ]; do
+ echo="policy domain=$domain"
+ [ "$metadata" == "noplatform" ] || echo="$echo metadata=$metadata"
+ echo="$echo path=loop$4"
+ echo="$echo action=$action"
+ echo "$echo"
+ shift
+ done
+} >> $conf
+else
+{
+ echo "DEVICES $devlist /dev/md1*"
+ mdadm -Ebs
+} > $conf
+fi
+#[ "$verbose" != "yes" ] || cat /etc/mdadm.conf | grep policy || true
+}
SUBSYSTEM!="block", GOTO="md_end"
# handle potential components of arrays
-ENV{ID_FS_TYPE}=="linux_raid_member", ACTION=="remove", RUN+="/sbin/mdadm -If $name"
+ENV{ID_FS_TYPE}=="linux_raid_member", ACTION=="remove", RUN+="/sbin/mdadm -If $name --path $env{ID_PATH}"
ENV{ID_FS_TYPE}=="linux_raid_member", ACTION=="add", RUN+="/sbin/mdadm --incremental $env{DEVNAME}"
+ENV{ID_FS_TYPE}=="isw_raid_member", ACTION=="remove", RUN+="/sbin/mdadm -If $name --path $env{ID_PATH}"
+ENV{ID_FS_TYPE}=="isw_raid_member", ACTION=="add", RUN+="/sbin/mdadm --incremental $env{DEVNAME}"
# handle md arrays
ACTION!="add|change", GOTO="md_end"
char volname[BLKPG_VOLNAMELTH]; /* volume label */
};
-/* partition table structures so we can check metadata position
- * against the end of the last partition.
- * Only handle MBR ant GPT partition tables.
- */
-struct MBR_part_record {
- __u8 bootable;
- __u8 first_head;
- __u8 first_sector;
- __u8 first_cyl;
- __u8 part_type;
- __u8 last_head;
- __u8 last_sector;
- __u8 last_cyl;
- __u32 first_sect_lba;
- __u32 blocks_num;
-};
-
-struct MBR {
- __u8 pad[446];
- struct MBR_part_record parts[4];
- __u16 magic;
-} __attribute__((packed));
-
-struct GPT_part_entry {
- unsigned char type_guid[16];
- unsigned char partition_guid[16];
- __u64 starting_lba;
- __u64 ending_lba;
- unsigned char attr_bits[8];
- unsigned char name[72];
-} __attribute__((packed));
-
-struct GPT {
- __u64 magic;
- __u32 revision;
- __u32 header_size;
- __u32 crc;
- __u32 pad1;
- __u64 current_lba;
- __u64 backup_lba;
- __u64 first_lba;
- __u64 last_lba;
- __u8 guid[16];
- __u64 part_start;
- __u32 part_cnt;
- __u32 part_size;
- __u32 part_crc;
- __u8 pad2[420];
-} __attribute__((packed));
+#include "part.h"
/* Force a compilation error if condition is true */
#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
aren't permitted). */
#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
-
-/* MBR/GPT magic numbers */
-#define MBR_SIGNATURE_MAGIC __cpu_to_le16(0xAA55)
-#define GPT_SIGNATURE_MAGIC __cpu_to_le64(0x5452415020494645ULL)
-
-#define MBR_PARTITIONS 4
-#define MBR_GPT_PARTITION_TYPE 0xEE
-
/*
* Parse a 128 bit uuid in 4 integers
* format is 32 hexx nibbles with options :.<space> separator
return (a*1000000)+(b*1000)+c;
}
+int mdadm_version(char *version)
+{
+ int a, b, c;
+ char *cp;
+
+ if (!version)
+ version = Version;
+
+ cp = strchr(version, '-');
+ if (!cp || *(cp+1) != ' ' || *(cp+2) != 'v')
+ return -1;
+ cp += 3;
+ a = strtoul(cp, &cp, 10);
+ if (*cp != '.')
+ return -1;
+ b = strtoul(cp+1, &cp, 10);
+ if (*cp == '.')
+ c = strtoul(cp+1, &cp, 10);
+ else
+ c = 0;
+ if (*cp != ' ' && *cp != '-')
+ return -1;
+ return (a*1000000)+(b*1000)+c;
+}
+
#ifndef MDASSEMBLE
long long parse_size(char *size)
{
}
}
+int enough_fd(int fd)
+{
+ struct mdu_array_info_s array;
+ struct mdu_disk_info_s disk;
+ int avail_disks = 0;
+ int i;
+ char *avail;
+
+ if (ioctl(fd, GET_ARRAY_INFO, &array) != 0 ||
+ array.raid_disks <= 0)
+ return 0;
+ avail = calloc(array.raid_disks, 1);
+ for (i=0; i<array.raid_disks + array.nr_disks; i++) {
+ disk.number = i;
+ if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
+ continue;
+ if (! (disk.state & (1<<MD_DISK_SYNC)))
+ continue;
+ if (disk.raid_disk < 0 || disk.raid_disk >= array.raid_disks)
+ continue;
+ avail_disks++;
+ avail[disk.raid_disk] = 1;
+ }
+ /* This is used on an active array, so assume it is clean */
+ return enough(array.level, array.raid_disks, array.layout,
+ 1,
+ avail, avail_disks);
+}
+
+
const int uuid_match_any[4] = { ~0, ~0, ~0, ~0 };
int same_uuid(int a[4], int b[4], int swapuuid)
{
/* Looks like a raid array .. */
fprintf(stderr, Name ": %s appears to be part of a raid array:\n",
name);
- st->ss->getinfo_super(st, &info);
+ st->ss->getinfo_super(st, &info, NULL);
st->ss->free_super(st);
crtime = info.array.ctime;
level = map_num(pers, info.array.level);
dprintf("%s: timeout waiting for %s\n", __func__, dev);
}
-struct superswitch *superlist[] = { &super0, &super1, &super_ddf, &super_imsm, NULL };
+struct superswitch *superlist[] =
+{
+ &super0, &super1,
+ &super_ddf, &super_imsm,
+ &mbr, &gpt,
+ NULL };
#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
-struct supertype *super_by_fd(int fd)
+struct supertype *super_by_fd(int fd, char **subarrayp)
{
mdu_array_info_t array;
int vers;
char version[20];
int i;
char *subarray = NULL;
+ int container = NoMdDev;
sra = sysfs_read(fd, 0, GET_VERSION);
}
if (minor == -2 && is_subarray(verstr)) {
char *dev = verstr+1;
+
subarray = strchr(dev, '/');
- int devnum;
if (subarray)
*subarray++ = '\0';
- devnum = devname2devnum(dev);
subarray = strdup(subarray);
+ container = devname2devnum(dev);
if (sra)
sysfs_free(sra);
- sra = sysfs_read(-1, devnum, GET_VERSION);
+ sra = sysfs_read(-1, container, GET_VERSION);
if (sra && sra->text_version[0])
verstr = sra->text_version;
else
sysfs_free(sra);
if (st) {
st->sb = NULL;
- if (subarray) {
- strncpy(st->subarray, subarray, 32);
- st->subarray[31] = 0;
- free(subarray);
- } else
- st->subarray[0] = 0;
- }
+ if (subarrayp)
+ *subarrayp = subarray;
+ st->container_dev = container;
+ st->devnum = fd2devnum(fd);
+ } else
+ free(subarray);
+
return st;
}
#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
+int dev_size_from_id(dev_t id, unsigned long long *size)
+{
+ char buf[20];
+ int fd;
+
+ sprintf(buf, "%d:%d", major(id), minor(id));
+ fd = dev_open(buf, O_RDONLY);
+ if (fd < 0)
+ return 0;
+ if (get_dev_size(fd, NULL, size)) {
+ close(fd);
+ return 1;
+ }
+ close(fd);
+ return 0;
+}
struct supertype *dup_super(struct supertype *orig)
{
st->ss = orig->ss;
st->max_devs = orig->max_devs;
st->minor_version = orig->minor_version;
- strcpy(st->subarray, orig->subarray);
st->sb = NULL;
st->info = NULL;
return st;
}
-struct supertype *guess_super(int fd)
+struct supertype *guess_super_type(int fd, enum guess_types guess_type)
{
/* try each load_super to find the best match,
* and return the best superswitch
int i;
st = malloc(sizeof(*st));
+ memset(st, 0, sizeof(*st));
+ st->container_dev = NoMdDev;
+
for (i=0 ; superlist[i]; i++) {
int rv;
ss = superlist[i];
+ if (guess_type == guess_array && ss->add_to_super == NULL)
+ continue;
+ if (guess_type == guess_partitions && ss->add_to_super != NULL)
+ continue;
memset(st, 0, sizeof(*st));
rv = ss->load_super(st, fd, NULL);
if (rv == 0) {
struct mdinfo info;
- st->ss->getinfo_super(st, &info);
+ st->ss->getinfo_super(st, &info, NULL);
if (bestsuper == -1 ||
besttime < info.array.ctime) {
bestsuper = i;
return 1;
}
+/* Return true if this can only be a container, not a member device.
+ * i.e. is and md device and size is zero
+ */
+int must_be_container(int fd)
+{
+ unsigned long long size;
+ if (md_get_version(fd) < 0)
+ return 0;
+ if (get_dev_size(fd, NULL, &size) == 0)
+ return 1;
+ if (size == 0)
+ return 1;
+ return 0;
+}
/* Sets endofpart parameter to the last block used by the last GPT partition on the device.
* Returns: 1 if successful
struct mdstat_ent *mdstat = mdstat_read(0, 0);
struct mdstat_ent *ent;
- for (ent = mdstat; ent; ent = ent->next) {
- if (is_container_member(ent, container)) {
- char *inst = &ent->metadata_version[10+strlen(container)+1];
-
- if (!subarray || strcmp(inst, subarray) == 0)
+ for (ent = mdstat; ent; ent = ent->next)
+ if (is_container_member(ent, container))
+ if (!subarray ||
+ strcmp(to_subarray(ent, container), subarray) == 0)
break;
- }
- }
free_mdstat(mdstat);
/* open_subarray - opens a subarray in a container
* @dev: container device name
- * @st: supertype with only ->subarray set
+ * @st: empty supertype
* @quiet: block reporting errors flag
*
* On success returns an fd to a container and fills in *st
*/
-int open_subarray(char *dev, struct supertype *st, int quiet)
+int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet)
{
struct mdinfo *mdi;
+ struct mdinfo *info;
int fd, err = 1;
fd = open(dev, O_RDWR|O_EXCL);
goto free_sysfs;
}
- if (st->ss->load_super(st, fd, NULL)) {
+ if (!st->ss->load_container) {
if (!quiet)
- fprintf(stderr, Name ": Failed to find subarray-%s in %s\n",
- st->subarray, dev);
+ fprintf(stderr, Name ": %s is not a container\n", dev);
goto free_name;
}
- if (!st->loaded_container) {
+ if (st->ss->load_container(st, fd, NULL)) {
if (!quiet)
- fprintf(stderr, Name ": %s is not a container\n", dev);
+ fprintf(stderr, Name ": Failed to load metadata for %s\n",
+ dev);
+ goto free_name;
+ }
+
+ info = st->ss->container_content(st, subarray);
+ if (!info) {
+ if (!quiet)
+ fprintf(stderr, Name ": Failed to find subarray-%s in %s\n",
+ subarray, dev);
goto free_super;
}
+ free(info);
err = 0;
return rv;
}
+int remove_disk(int mdfd, struct supertype *st,
+ struct mdinfo *sra, struct mdinfo *info)
+{
+ int rv;
+ /* Remove the disk given by 'info' from the array */
+#ifndef MDASSEMBLE
+ if (st->ss->external)
+ rv = sysfs_set_str(sra, info, "slot", "none");
+ else
+#endif
+ rv = ioctl(mdfd, HOT_REMOVE_DISK, makedev(info->disk.major,
+ info->disk.minor));
+ return rv;
+}
+
int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
{
/* Initialise kernel's knowledge of array.
unsigned int __invalid_size_argument_for_IOC = 0;
#endif
+int experimental(void)
+{
+ if (check_env("MDADM_EXPERIMENTAL"))
+ return 1;
+ else {
+ fprintf(stderr, Name ": To use this feature MDADM_EXPERIMENTAL enviroment variable has to defined.\n");
+ return 0;
+ }
+}
+