X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=Assemble.c;h=05ace561fb507bb6a557b3d15e6d35f18517400b;hp=bfc879c7f4433550aa25e80dc1fd39156544582f;hb=fdcd157a8034fe5c896e49c4764d8ceef72e3741;hpb=ca6529edf6f7216003863a9ea847e6689693bd96 diff --git a/Assemble.c b/Assemble.c index bfc879c7..05ace561 100644 --- a/Assemble.c +++ b/Assemble.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2001-2009 Neil Brown + * Copyright (C) 2001-2013 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -48,7 +48,7 @@ static int name_matches(char *found, char *required, char *homehost) static int is_member_busy(char *metadata_version) { /* check if the given member array is active */ - struct mdstat_ent *mdstat = mdstat_read(1, 0); + struct mdstat_ent *mdstat = mdstat_read(0, 0); struct mdstat_ent *ent; int busy = 0; @@ -81,36 +81,35 @@ static int ident_matches(struct mddev_ident *ident, same_uuid(content->uuid, ident->uuid, tst->ss->swapuuid)==0 && memcmp(content->uuid, uuid_zero, sizeof(int[4])) != 0) { if (devname) - fprintf(stderr, Name ": %s has wrong uuid.\n", - devname); + pr_err("%s has wrong uuid.\n", devname); return 0; } if (ident->name[0] && (!update || strcmp(update, "name")!= 0) && name_matches(content->name, ident->name, homehost)==0) { if (devname) - fprintf(stderr, Name ": %s has wrong name.\n", - devname); + pr_err("%s has wrong name.\n", devname); return 0; } if (ident->super_minor != UnSet && ident->super_minor != content->array.md_minor) { if (devname) - fprintf(stderr, Name ": %s has wrong super-minor.\n", - devname); + pr_err("%s has wrong super-minor.\n", + devname); return 0; } if (ident->level != UnSet && ident->level != content->array.level) { if (devname) - fprintf(stderr, Name ": %s has wrong raid level.\n", - devname); + pr_err("%s has wrong raid level.\n", + devname); return 0; } if (ident->raid_disks != UnSet && + content->array.raid_disks != 0 && /* metadata doesn't know how many to expect */ ident->raid_disks!= content->array.raid_disks) { if (devname) - fprintf(stderr, Name ": %s requires wrong number of drives.\n", - devname); + pr_err("%s requires wrong number of drives.\n", + devname); return 0; } if (ident->member && ident->member[0]) { @@ -118,171 +117,43 @@ static int ident_matches(struct mddev_ident *ident, char *s = strchr(content->text_version+1, '/'); if (s == NULL) { if (devname) - fprintf(stderr, Name ": %s is not a container and one is required.\n", - devname); + pr_err("%s is not a container and one is required.\n", + devname); return 0; } else if (strcmp(ident->member, s+1) != 0) { if (devname) - fprintf(stderr, Name ": skipping wrong member %s is %s\n", - content->text_version, devname); + pr_err("skipping wrong member %s is %s\n", + content->text_version, devname); return 0; } } return 1; } - -int Assemble(struct supertype *st, char *mddev, - struct mddev_ident *ident, - struct mddev_dev *devlist, - char *backup_file, int invalid_backup, - int readonly, int runstop, - char *update, char *homehost, int require_homehost, - int verbose, int force) +static int select_devices(struct mddev_dev *devlist, + struct mddev_ident *ident, + struct supertype **stp, + struct mdinfo **contentp, + struct context *c, + int inargv, int auto_assem) { - /* - * The task of Assemble is to find a collection of - * devices that should (according to their superblocks) - * form an array, and to give this collection to the MD driver. - * In Linux-2.4 and later, this involves submitting a - * SET_ARRAY_INFO ioctl with no arg - to prepare - * the array - and then submit a number of - * ADD_NEW_DISK ioctls to add disks into - * the array. Finally RUN_ARRAY might - * be submitted to start the array. - * - * Much of the work of Assemble is in finding and/or - * checking the disks to make sure they look right. - * - * If mddev is not set, then scan must be set and we - * read through the config file for dev+uuid mapping - * We recurse, setting mddev, for each device that - * - isn't running - * - has a valid uuid (or any uuid if !uuidset) - * - * If mddev is set, we try to determine state of md. - * check version - must be at least 0.90.0 - * check kernel version. must be at least 2.4. - * If not, we can possibly fall back on START_ARRAY - * Try to GET_ARRAY_INFO. - * If possible, give up - * If not, try to STOP_ARRAY just to make sure - * - * If !uuidset and scan, look in conf-file for uuid - * If not found, give up - * If !devlist and scan and uuidset, get list of devs from conf-file - * - * For each device: - * Check superblock - discard if bad - * Check uuid (set if we don't have one) - discard if no match - * Check superblock similarity if we have a superblock - discard if different - * Record events, devicenum - * This should give us a list of devices for the array - * We should collect the most recent event number - * - * Count disks with recent enough event count - * While force && !enough disks - * Choose newest rejected disks, update event count - * mark clean and rewrite superblock - * If recent kernel: - * SET_ARRAY_INFO - * foreach device with recent events : ADD_NEW_DISK - * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY - * If old kernel: - * Check the device numbers in superblock are right - * update superblock if any changes - * START_ARRAY - * - */ - int mdfd; - int clean; - int auto_assem = (mddev == NULL && !ident->uuid_set && - ident->super_minor == UnSet && ident->name[0] == 0 - && (ident->container == NULL || ident->member == NULL)); - int old_linux = 0; - int vers = vers; /* Keep gcc quite - it really is initialised */ - struct { - char *devname; - int uptodate; /* set once we decide that this device is as - * recent as everything else in the array. - */ - struct mdinfo i; - } *devices; - char *devmap; - int *best = NULL; /* indexed by raid_disk */ - int bestcnt = 0; - int devcnt = 0; - unsigned int okcnt, sparecnt, rebuilding_cnt; - unsigned int req_cnt; - int i; - int most_recent = 0; - int chosen_drive; - int change = 0; - int inargv = 0; - int report_missmatch; - int bitmap_done; - int start_partial_ok = (runstop >= 0) && - (force || devlist==NULL || auto_assem); - unsigned int num_devs; struct mddev_dev *tmpdev; - struct mdinfo info; + int num_devs; + struct supertype *st = *stp; struct mdinfo *content = NULL; - char *avail; - int nextspare = 0; - char *name = NULL; - int trustworthy; - char chosen_name[1024]; + int report_mismatch = ((inargv && c->verbose >= 0) || c->verbose > 0); struct domainlist *domains = NULL; - if (get_linux_version() < 2004000) - old_linux = 1; - - /* - * If any subdevs are listed, then any that don't - * match ident are discarded. Remainder must all match and - * become the array. - * If no subdevs, then we scan all devices in the config file, but - * there must be something in the identity - */ - - if (!devlist && - ident->uuid_set == 0 && - (ident->super_minor < 0 || ident->super_minor == UnSet) && - ident->name[0] == 0 && - (ident->container == NULL || ident->member == NULL) && - ident->devices == NULL) { - fprintf(stderr, Name ": No identity information available for %s - cannot assemble.\n", - mddev ? mddev : "further assembly"); - return 1; - } - - if (devlist == NULL) - devlist = conf_get_devs(); - else if (mddev) - inargv = 1; - - report_missmatch = ((inargv && verbose >= 0) || verbose > 0); - try_again: - /* We come back here when doing auto-assembly and attempting some - * set of devices failed. Those are now marked as ->used==2 and - * we ignore them and try again - */ - tmpdev = devlist; num_devs = 0; while (tmpdev) { if (tmpdev->used) tmpdev->used = 2; else num_devs++; + tmpdev->disposition = 0; tmpdev = tmpdev->next; } - if (!st && ident->st) st = ident->st; - - if (verbose>0) - fprintf(stderr, Name ": looking for devices for %s\n", - mddev ? mddev : "further assembly"); - /* first walk the list of devices to find a consistent set * that match the criterea, if that is possible. * We flag the ones we like with 'used'. @@ -293,89 +164,104 @@ int Assemble(struct supertype *st, char *mddev, char *devname = tmpdev->devname; int dfd; struct stat stb; - struct supertype *tst = dup_super(st); + struct supertype *tst; struct dev_policy *pol = NULL; int found_container = 0; - if (tmpdev->used > 1) continue; + if (tmpdev->used > 1) + continue; - if (ident->devices && - !match_oneof(ident->devices, devname)) { - if (report_missmatch) - fprintf(stderr, Name ": %s is not one of %s\n", devname, ident->devices); + if (ident->container) { + if (ident->container[0] == '/' && + !same_dev(ident->container, devname)) { + if (report_mismatch) + pr_err("%s is not the container required (%s)\n", + devname, ident->container); + continue; + } + } else if (ident->devices && + !match_oneof(ident->devices, devname)) { + /* Note that we ignore the "device=" identifier if a + * "container=" is given. Checking both is unnecessarily + * complicated. + */ + if (report_mismatch) + pr_err("%s is not one of %s\n", devname, ident->devices); continue; } - dfd = dev_open(devname, O_RDONLY|O_EXCL); + tst = dup_super(st); + + dfd = dev_open(devname, O_RDONLY); if (dfd < 0) { - if (report_missmatch) - fprintf(stderr, Name ": cannot open device %s: %s\n", - devname, strerror(errno)); + if (report_mismatch) + pr_err("cannot open device %s: %s\n", + devname, strerror(errno)); tmpdev->used = 2; } else if (fstat(dfd, &stb)< 0) { /* Impossible! */ - fprintf(stderr, Name ": fstat failed for %s: %s\n", - devname, strerror(errno)); + pr_err("fstat failed for %s: %s\n", + devname, strerror(errno)); tmpdev->used = 2; } else if ((stb.st_mode & S_IFMT) != S_IFBLK) { - fprintf(stderr, Name ": %s is not a block device.\n", - devname); + pr_err("%s is not a block device.\n", + devname); tmpdev->used = 2; } else if (must_be_container(dfd)) { if (st) { /* already found some components, this cannot * be another one. */ - if (report_missmatch) - fprintf(stderr, Name ": %s is a container, but we are looking for components\n", - devname); + if (report_mismatch) + pr_err("%s is a container, but we are looking for components\n", + devname); tmpdev->used = 2; #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) } if (!tst && (tst = super_by_fd(dfd, NULL)) == NULL) { - if (report_missmatch) - fprintf(stderr, Name ": not a recognisable container: %s\n", - devname); + if (report_mismatch) + pr_err("not a recognisable container: %s\n", + devname); tmpdev->used = 2; #endif } else if (!tst->ss->load_container || tst->ss->load_container(tst, dfd, NULL)) { - if (report_missmatch) - fprintf(stderr, Name ": no correct container type: %s\n", - devname); + if (report_mismatch) + pr_err("no correct container type: %s\n", + devname); tmpdev->used = 2; } else if (auto_assem && - !conf_test_metadata(tst->ss->name, (pol = devnum_policy(stb.st_rdev)), - tst->ss->match_home(tst, homehost) == 1)) { - if (report_missmatch) - fprintf(stderr, Name ": %s has metadata type %s for which " - "auto-assembly is disabled\n", - devname, tst->ss->name); + !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)), + tst->ss->match_home(tst, c->homehost) == 1)) { + if (report_mismatch) + pr_err("%s has metadata type %s for which " + "auto-assembly is disabled\n", + devname, tst->ss->name); tmpdev->used = 2; } else found_container = 1; } else { if (!tst && (tst = guess_super(dfd)) == NULL) { - if (report_missmatch) - fprintf(stderr, Name ": no recogniseable superblock on %s\n", - devname); + if (report_mismatch) + pr_err("no recogniseable superblock on %s\n", + devname); tmpdev->used = 2; } else if (tst->ss->load_super(tst,dfd, NULL)) { - if (report_missmatch) - fprintf(stderr, Name ": no RAID superblock on %s\n", - devname); + if (report_mismatch) + pr_err("no RAID superblock on %s\n", + devname); tmpdev->used = 2; } else if (tst->ss->compare_super == NULL) { - if (report_missmatch) - fprintf(stderr, Name ": Cannot assemble %s metadata on %s\n", - tst->ss->name, devname); + if (report_mismatch) + pr_err("Cannot assemble %s metadata on %s\n", + tst->ss->name, devname); tmpdev->used = 2; } else if (auto_assem && st == NULL && - !conf_test_metadata(tst->ss->name, (pol = devnum_policy(stb.st_rdev)), - tst->ss->match_home(tst, homehost) == 1)) { - if (report_missmatch) - fprintf(stderr, Name ": %s has metadata type %s for which " - "auto-assembly is disabled\n", - devname, tst->ss->name); + !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)), + tst->ss->match_home(tst, c->homehost) == 1)) { + if (report_mismatch) + pr_err("%s has metadata type %s for which " + "auto-assembly is disabled\n", + devname, tst->ss->name); tmpdev->used = 2; } } @@ -389,73 +275,71 @@ int Assemble(struct supertype *st, char *mddev, /* Ignore unrecognised device if looking for * specific array */ goto loop; - - fprintf(stderr, Name ": %s has no superblock - assembly aborted\n", - devname); + pr_err("%s has no superblock - assembly aborted\n", + devname); if (st) st->ss->free_super(st); dev_policy_free(pol); domain_free(domains); - return 1; + return -1; } if (found_container) { /* tmpdev is a container. We need to be either * looking for a member, or auto-assembling */ + /* should be safe to try an exclusive open now, we + * have rejected anything that some other mdadm might + * be looking at + */ + dfd = dev_open(devname, O_RDONLY | O_EXCL); + if (dfd < 0) { + if (report_mismatch) + pr_err("%s is busy - skipping\n", devname); + goto loop; + } + close(dfd); + + if (ident->container && ident->container[0] != '/') { + /* we have a uuid */ + int uuid[4]; - if (ident->container) { - if (ident->container[0] == '/' && - !same_dev(ident->container, devname)) { - if (report_missmatch) - fprintf(stderr, Name ": %s is not the container required (%s)\n", - devname, ident->container); + content = *contentp; + tst->ss->getinfo_super(tst, content, NULL); + + if (!parse_uuid(ident->container, uuid) || + !same_uuid(content->uuid, uuid, tst->ss->swapuuid)) { + if (report_mismatch) + pr_err("%s has wrong UUID to be required container\n", + devname); goto loop; } - if (ident->container[0] != '/') { - /* we have a uuid */ - int uuid[4]; - - content = &info; - memset(content, 0, sizeof(*content)); - tst->ss->getinfo_super(tst, content, NULL); - - if (!parse_uuid(ident->container, uuid) || - !same_uuid(content->uuid, uuid, tst->ss->swapuuid)) { - if (report_missmatch) - fprintf(stderr, Name ": %s has wrong UUID to be required container\n", - devname); - goto loop; - } - } } /* It is worth looking inside this container. */ - if (verbose > 0) - fprintf(stderr, Name ": looking in container %s\n", - devname); + if (c->verbose > 0) + pr_err("looking in container %s\n", + devname); for (content = tst->ss->container_content(tst, NULL); content; content = content->next) { - /* do not assemble arrays that might have bad blocks */ - if (content->array.state & (1<used = 2; - goto loop; - } if (!ident_matches(ident, content, tst, - homehost, update, - report_missmatch ? devname : NULL)) + c->homehost, c->update, + report_mismatch ? devname : NULL)) /* message already printed */; else if (is_member_busy(content->text_version)) { - if (report_missmatch) - fprintf(stderr, Name ": member %s in %s is already assembled\n", - content->text_version, - devname); + if (report_mismatch) + pr_err("member %s in %s is already assembled\n", + content->text_version, + devname); + } else if (content->array.state & (1<text_version, + devname); } else break; } @@ -466,32 +350,65 @@ int Assemble(struct supertype *st, char *mddev, st = tst; tst = NULL; if (!auto_assem && inargv && tmpdev->next != NULL) { - fprintf(stderr, Name ": %s is a container, but is not " - "only device given: confused and aborting\n", - devname); + pr_err("%s is a container, but is not " + "only device given: confused and aborting\n", + devname); st->ss->free_super(st); dev_policy_free(pol); domain_free(domains); - return 1; + return -1; } - if (verbose > 0) - fprintf(stderr, Name ": found match on member %s in %s\n", - content->text_version, devname); + if (c->verbose > 0) + pr_err("found match on member %s in %s\n", + content->text_version, devname); /* make sure we finished the loop */ tmpdev = NULL; goto loop; } else { + int rv = 0; + struct mddev_ident *match; - content = &info; - memset(content, 0, sizeof(*content)); + content = *contentp; tst->ss->getinfo_super(tst, content, NULL); if (!ident_matches(ident, content, tst, - homehost, update, - report_missmatch ? devname : NULL)) + c->homehost, c->update, + report_mismatch ? devname : NULL)) + goto loop; + + match = conf_match(tst, content, devname, + report_mismatch ? c->verbose : -1, + &rv); + if (!match && rv == 2) + goto loop; + if (match && match->devname && + strcasecmp(match->devname, "") == 0) { + if (report_mismatch) + pr_err("%s is a member of an explicitly ignored array\n", + devname); + goto loop; + } + if (match && !ident_matches(match, content, tst, + c->homehost, c->update, + report_mismatch ? devname : NULL)) + /* Array exists in mdadm.conf but some + * details don't match, so reject it + */ + goto loop; + + /* should be safe to try an exclusive open now, we + * have rejected anything that some other mdadm might + * be looking at + */ + dfd = dev_open(devname, O_RDONLY | O_EXCL); + if (dfd < 0) { + if (report_mismatch) + pr_err("%s is busy - skipping\n", devname); goto loop; - + } + close(dfd); + if (st == NULL) st = dup_super(tst); if (st->minor_version == -1) @@ -518,22 +435,22 @@ int Assemble(struct supertype *st, char *mddev, */ if (auto_assem) goto loop; - if (homehost) { - int first = st->ss->match_home(st, homehost); - int last = tst->ss->match_home(tst, homehost); + if (c->homehost) { + int first = st->ss->match_home(st, c->homehost); + int last = tst->ss->match_home(tst, c->homehost); if (first != last && (first == 1 || last == 1)) { /* We can do something */ if (first) {/* just ignore this one */ - if (report_missmatch) - fprintf(stderr, Name ": %s misses out due to wrong homehost\n", - devname); + if (report_mismatch) + pr_err("%s misses out due to wrong homehost\n", + devname); goto loop; } else { /* reject all those sofar */ struct mddev_dev *td; - if (report_missmatch) - fprintf(stderr, Name ": %s overrides previous devices due to good homehost\n", - devname); + if (report_mismatch) + pr_err("%s overrides previous devices due to good homehost\n", + devname); for (td=devlist; td != tmpdev; td=td->next) if (td->used == 1) td->used = 0; @@ -542,13 +459,13 @@ int Assemble(struct supertype *st, char *mddev, } } } - fprintf(stderr, Name ": superblock on %s doesn't match others - assembly aborted\n", - devname); + pr_err("superblock on %s doesn't match others - assembly aborted\n", + devname); tst->ss->free_super(tst); st->ss->free_super(st); dev_policy_free(pol); domain_free(domains); - return 1; + return -1; } tmpdev->used = 1; } @@ -556,7 +473,7 @@ int Assemble(struct supertype *st, char *mddev, /* Collect domain information from members only */ if (tmpdev && tmpdev->used == 1) { if (!pol) - pol = devnum_policy(stb.st_rdev); + pol = devid_policy(stb.st_rdev); domain_merge(&domains, pol, tst?tst->ss->name:NULL); } dev_policy_free(pol); @@ -574,7 +491,7 @@ int Assemble(struct supertype *st, char *mddev, if (tmpdev->used != 3) continue; tmpdev->used = 1; - content = &info; + content = *contentp; if (!st->sb) { /* we need sb from one of the spares */ @@ -593,11 +510,11 @@ int Assemble(struct supertype *st, char *mddev, if (tmpdev->used != 3) continue; if (stat(tmpdev->devname, &stb)< 0) { - fprintf(stderr, Name ": fstat failed for %s: %s\n", - tmpdev->devname, strerror(errno)); + pr_err("fstat failed for %s: %s\n", + tmpdev->devname, strerror(errno)); tmpdev->used = 2; } else { - struct dev_policy *pol = devnum_policy(stb.st_rdev); + struct dev_policy *pol = devid_policy(stb.st_rdev); int dt = domain_test(domains, pol, NULL); if (inargv && dt != 0) /* take this spare as domains match @@ -614,115 +531,62 @@ int Assemble(struct supertype *st, char *mddev, } } domain_free(domains); - - if (!st || !st->sb || !content) - return 2; - - /* Now need to open the array device. Use create_mddev */ - if (content == &info) + *stp = st; + if (st && st->sb && content == *contentp) st->ss->getinfo_super(st, content, NULL); + *contentp = content; - trustworthy = FOREIGN; - name = content->name; - switch (st->ss->match_home(st, homehost) - ?: st->ss->match_home(st, "any")) { - case 1: - trustworthy = LOCAL; - name = strchr(content->name, ':'); - if (name) - name++; - else - name = content->name; - break; - } - if (!auto_assem) - /* If the array is listed in mdadm.conf or on - * command line, then we trust the name - * even if the array doesn't look local - */ - trustworthy = LOCAL; - - if (name[0] == 0 && - content->array.level == LEVEL_CONTAINER) { - name = content->text_version; - trustworthy = METADATA; - } - - if (name[0] && trustworthy != LOCAL && - ! require_homehost && - conf_name_is_free(name)) - trustworthy = LOCAL; - - if (trustworthy == LOCAL && - strchr(name, ':')) - /* Ignore 'host:' prefix of name */ - name = strchr(name, ':')+1; - - mdfd = create_mddev(mddev, name, ident->autof, trustworthy, - chosen_name); - if (mdfd < 0) { - st->ss->free_super(st); - if (auto_assem) - goto try_again; - return 1; - } - mddev = chosen_name; - vers = md_get_version(mdfd); - if (vers < 9000) { - fprintf(stderr, Name ": Assemble requires driver version 0.90.0 or later.\n" - " Upgrade your kernel or try --build\n"); - close(mdfd); - return 1; - } - if (mddev_busy(fd2devnum(mdfd))) { - fprintf(stderr, Name ": %s already active, cannot restart it!\n", - mddev); - for (tmpdev = devlist ; - tmpdev && tmpdev->used != 1; - tmpdev = tmpdev->next) - ; - if (tmpdev && auto_assem) - fprintf(stderr, Name ": %s needed for %s...\n", - mddev, tmpdev->devname); - close(mdfd); - mdfd = -3; - st->ss->free_super(st); - if (auto_assem) - goto try_again; - return 1; - } - ioctl(mdfd, STOP_ARRAY, NULL); /* just incase it was started but has no content */ + return num_devs; +} +struct devs { + char *devname; + int uptodate; /* set once we decide that this device is as + * recent as everything else in the array. + */ + int included; /* set if the device is already in the array + * due to a previous '-I' + */ + struct mdinfo i; +}; + +static int load_devices(struct devs *devices, char *devmap, + struct mddev_ident *ident, struct supertype **stp, + struct mddev_dev *devlist, struct context *c, + struct mdinfo *content, + int mdfd, char *mddev, + int *most_recentp, int *bestcntp, int **bestp, + int inargv) +{ + struct mddev_dev *tmpdev; + int devcnt = 0; + int nextspare = 0; #ifndef MDASSEMBLE - if (content != &info) { - /* This is a member of a container. Try starting the array. */ - int err; - err = assemble_container_content(st, mdfd, content, runstop, - chosen_name, verbose, - backup_file); - close(mdfd); - return err; - } + int bitmap_done = 0; #endif - /* Ok, no bad inconsistancy, we can try updating etc */ - bitmap_done = 0; - content->update_private = NULL; - devices = malloc(num_devs * sizeof(*devices)); - devmap = calloc(num_devs * content->array.raid_disks, 1); - for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) if (tmpdev->used == 1) { + int most_recent = -1; + int bestcnt = 0; + int *best = *bestp; + struct supertype *st = *stp; + + for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) { char *devname = tmpdev->devname; struct stat stb; + struct supertype *tst; + int i; + + if (tmpdev->used != 1) + continue; /* looks like a good enough match to update the super block if needed */ #ifndef MDASSEMBLE - if (update) { + if (c->update) { int dfd; /* prepare useful information in info structures */ struct stat stb2; - struct supertype *tst; int err; fstat(mdfd, &stb2); - if (strcmp(update, "uuid")==0 && + if (strcmp(c->update, "uuid")==0 && !ident->uuid_set) { int rfd; if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || @@ -734,18 +598,23 @@ int Assemble(struct supertype *st, char *mddev, } if (rfd >= 0) close(rfd); } - dfd = dev_open(devname, O_RDWR|O_EXCL); + dfd = dev_open(devname, + tmpdev->disposition == 'I' + ? O_RDWR : (O_RDWR|O_EXCL)); tst = dup_super(st); if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) { - fprintf(stderr, Name ": cannot re-read metadata from %s - aborting\n", - devname); + pr_err("cannot re-read metadata from %s - aborting\n", + devname); if (dfd >= 0) close(dfd); close(mdfd); free(devices); free(devmap); - return 1; + tst->ss->free_super(tst); + free(tst); + *stp = st; + return -1; } tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks); @@ -753,101 +622,120 @@ int Assemble(struct supertype *st, char *mddev, strcpy(content->name, ident->name); content->array.md_minor = minor(stb2.st_rdev); - if (strcmp(update, "byteorder") == 0) + if (strcmp(c->update, "byteorder") == 0) err = 0; else - err = tst->ss->update_super(tst, content, update, - devname, verbose, + err = tst->ss->update_super(tst, content, c->update, + devname, c->verbose, ident->uuid_set, - homehost); + c->homehost); if (err < 0) { - fprintf(stderr, - Name ": --update=%s not understood" - " for %s metadata\n", - update, tst->ss->name); + if (err == -1) + pr_err("--update=%s not understood" + " for %s metadata\n", + c->update, tst->ss->name); tst->ss->free_super(tst); free(tst); close(mdfd); close(dfd); free(devices); free(devmap); - return 1; + *stp = st; + return -1; } - if (strcmp(update, "uuid")==0 && + if (strcmp(c->update, "uuid")==0 && !ident->uuid_set) { ident->uuid_set = 1; memcpy(ident->uuid, content->uuid, 16); } if (tst->ss->store_super(tst, dfd)) - fprintf(stderr, Name ": Could not re-write superblock on %s.\n", - devname); + pr_err("Could not re-write superblock on %s.\n", + devname); close(dfd); - if (strcmp(update, "uuid")==0 && + if (strcmp(c->update, "uuid")==0 && ident->bitmap_fd >= 0 && !bitmap_done) { if (bitmap_update_uuid(ident->bitmap_fd, content->uuid, tst->ss->swapuuid) != 0) - fprintf(stderr, Name ": Could not update uuid on external bitmap.\n"); + pr_err("Could not update uuid on external bitmap.\n"); else bitmap_done = 1; } - tst->ss->free_super(tst); } else #endif { - struct supertype *tst = dup_super(st); - int dfd; - dfd = dev_open(devname, O_RDWR|O_EXCL); + int dfd = dev_open(devname, + tmpdev->disposition == 'I' + ? O_RDWR : (O_RDWR|O_EXCL)); + tst = dup_super(st); if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) { - fprintf(stderr, Name ": cannot re-read metadata from %s - aborting\n", - devname); + pr_err("cannot re-read metadata from %s - aborting\n", + devname); if (dfd >= 0) close(dfd); close(mdfd); free(devices); free(devmap); - return 1; + tst->ss->free_super(tst); + free(tst); + *stp = st; + return -1; } tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks); - tst->ss->free_super(tst); close(dfd); } stat(devname, &stb); - if (verbose > 0) - fprintf(stderr, Name ": %s is identified as a member of %s, slot %d.\n", - devname, mddev, content->disk.raid_disk); + if (c->verbose > 0) + pr_err("%s is identified as a member of %s, slot %d%s.\n", + devname, mddev, content->disk.raid_disk, + (content->disk.state & (1<disposition == 'I'); devices[devcnt].i = *content; devices[devcnt].i.disk.major = major(stb.st_rdev); devices[devcnt].i.disk.minor = minor(stb.st_rdev); - if (most_recent < devcnt) { - if (devices[devcnt].i.events - > devices[most_recent].i.events) + + if (devices[devcnt].i.disk.state == 6) { + if (most_recent < 0 || + devices[devcnt].i.events + > devices[most_recent].i.events) { + struct supertype *tmp = tst; + tst = st; + st = tmp; most_recent = devcnt; + } } + tst->ss->free_super(tst); + free(tst); + if (content->array.level == LEVEL_MULTIPATH) /* with multipath, the raid_disk from the superblock is meaningless */ i = devcnt; else i = devices[devcnt].i.disk.raid_disk; if (i+1 == 0) { - if (nextspare < content->array.raid_disks) - nextspare = content->array.raid_disks; + if (nextspare < content->array.raid_disks*2) + nextspare = content->array.raid_disks*2; i = nextspare++; } else { - if (i >= content->array.raid_disks && + /* i is raid_disk - double it so there is room for + * replacements */ + i *= 2; + if (devices[devcnt].i.disk.state & (1<= content->array.raid_disks*2 && i >= nextspare) nextspare = i+1; } if (i < 10000) { if (i >= bestcnt) { int newbestcnt = i+10; - int *newbest = malloc(sizeof(int)*newbestcnt); + int *newbest = xmalloc(sizeof(int)*newbestcnt); int c; for (c=0; c < newbestcnt; c++) if (c < bestcnt) @@ -869,20 +757,21 @@ int Assemble(struct supertype *st, char *mddev, * Could be a mis-detection caused by overlapping * partitions. fail-safe. */ - fprintf(stderr, Name ": WARNING %s and %s appear" - " to have very similar superblocks.\n" - " If they are really different, " - "please --zero the superblock on one\n" - " If they are the same or overlap," - " please remove one from %s.\n", - devices[best[i]].devname, devname, - inargv ? "the list" : - "the\n DEVICE list in mdadm.conf" + pr_err("WARNING %s and %s appear" + " to have very similar superblocks.\n" + " If they are really different, " + "please --zero the superblock on one\n" + " If they are the same or overlap," + " please remove one from %s.\n", + devices[best[i]].devname, devname, + inargv ? "the list" : + "the\n DEVICE list in mdadm.conf" ); close(mdfd); free(devices); free(devmap); - return 1; + *stp = st; + return -1; } if (best[i] == -1 || (devices[best[i]].i.events @@ -891,38 +780,706 @@ int Assemble(struct supertype *st, char *mddev, } devcnt++; } - free(content->update_private); - content->update_private = NULL; - - if (devcnt == 0) { - fprintf(stderr, Name ": no devices found for %s\n", - mddev); - if (st) - st->ss->free_super(st); - close(mdfd); - free(devices); - free(devmap); - return 1; - } + if (most_recent >= 0) + *most_recentp = most_recent; + *bestcntp = bestcnt; + *bestp = best; + *stp = st; + return devcnt; +} - if (update && strcmp(update, "byteorder")==0) - st->minor_version = 90; +static int force_array(struct mdinfo *content, + struct devs *devices, + int *best, int bestcnt, char *avail, + int most_recent, + struct supertype *st, + struct context *c) +{ + int okcnt = 0; + while (!enough(content->array.level, content->array.raid_disks, + content->array.layout, 1, + avail) + || + (content->reshape_active && content->delta_disks > 0 && + !enough(content->array.level, (content->array.raid_disks + - content->delta_disks), + content->new_layout, 1, + avail) + )) { + /* Choose the newest best drive which is + * not up-to-date, update the superblock + * and add it. + */ + int fd; + struct supertype *tst; + unsigned long long current_events; + int chosen_drive = -1; + int i; - st->ss->getinfo_super(st, content, NULL); - clean = content->array.state & 1; + for (i = 0; + i < content->array.raid_disks * 2 && i < bestcnt; + i += 2) { + int j = best[i]; + if (j>=0 && + !devices[j].uptodate && + devices[j].i.recovery_start == MaxSector && + (chosen_drive < 0 || + devices[j].i.events + > devices[chosen_drive].i.events)) + chosen_drive = j; + } + if (chosen_drive < 0) + break; + current_events = devices[chosen_drive].i.events; + add_another: + if (c->verbose >= 0) + pr_err("forcing event count in %s(%d) from %d upto %d\n", + devices[chosen_drive].devname, + devices[chosen_drive].i.disk.raid_disk, + (int)(devices[chosen_drive].i.events), + (int)(devices[most_recent].i.events)); + fd = dev_open(devices[chosen_drive].devname, + devices[chosen_drive].included ? O_RDWR + : (O_RDWR|O_EXCL)); + if (fd < 0) { + pr_err("Couldn't open %s for write - not updating\n", + devices[chosen_drive].devname); + devices[chosen_drive].i.events = 0; + continue; + } + tst = dup_super(st); + if (tst->ss->load_super(tst,fd, NULL)) { + close(fd); + pr_err("RAID superblock disappeared from %s - not updating.\n", + devices[chosen_drive].devname); + devices[chosen_drive].i.events = 0; + continue; + } + content->events = devices[most_recent].i.events; + tst->ss->update_super(tst, content, "force-one", + devices[chosen_drive].devname, c->verbose, + 0, NULL); - /* now we have some devices that might be suitable. - * I wonder how many - */ - avail = malloc(content->array.raid_disks); - memset(avail, 0, content->array.raid_disks); - okcnt = 0; - sparecnt=0; - rebuilding_cnt=0; - for (i=0; i< bestcnt; i++) { - int j = best[i]; - int event_margin = 1; /* always allow a difference of '1' - * like the kernel does + if (tst->ss->store_super(tst, fd)) { + close(fd); + pr_err("Could not re-write superblock on %s\n", + devices[chosen_drive].devname); + devices[chosen_drive].i.events = 0; + tst->ss->free_super(tst); + continue; + } + close(fd); + devices[chosen_drive].i.events = devices[most_recent].i.events; + devices[chosen_drive].uptodate = 1; + avail[chosen_drive] = 1; + okcnt++; + tst->ss->free_super(tst); + + /* If there are any other drives of the same vintage, + * add them in as well. We can't lose and we might gain + */ + for (i = 0; + i < content->array.raid_disks * 2 && i < bestcnt ; + i += 2) { + int j = best[i]; + if (j >= 0 && + !devices[j].uptodate && + devices[j].i.recovery_start == MaxSector && + devices[j].i.events == current_events) { + chosen_drive = j; + goto add_another; + } + } + } + return okcnt; +} + +static int start_array(int mdfd, + char *mddev, + struct mdinfo *content, + struct supertype *st, + struct mddev_ident *ident, + int *best, int bestcnt, + int chosen_drive, + struct devs *devices, + unsigned int okcnt, + unsigned int sparecnt, + unsigned int rebuilding_cnt, + struct context *c, + int clean, char *avail, + int start_partial_ok, + int err_ok, + int was_forced + ) +{ + int rv; + int i; + unsigned int req_cnt; + + rv = set_array_info(mdfd, st, content); + if (rv && !err_ok) { + pr_err("failed to set array info for %s: %s\n", + mddev, strerror(errno)); + return 1; + } + if (ident->bitmap_fd >= 0) { + if (ioctl(mdfd, SET_BITMAP_FILE, ident->bitmap_fd) != 0) { + pr_err("SET_BITMAP_FILE failed.\n"); + return 1; + } + } else if (ident->bitmap_file) { + /* From config file */ + int bmfd = open(ident->bitmap_file, O_RDWR); + if (bmfd < 0) { + pr_err("Could not open bitmap file %s\n", + ident->bitmap_file); + return 1; + } + if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) { + pr_err("Failed to set bitmapfile for %s\n", mddev); + close(bmfd); + return 1; + } + close(bmfd); + } + + /* First, add the raid disks, but add the chosen one last */ + for (i=0; i<= bestcnt; i++) { + int j; + if (i < bestcnt) { + j = best[i]; + if (j == chosen_drive) + continue; + } else + j = chosen_drive; + + if (j >= 0 && !devices[j].included) { + int dfd = dev_open(devices[j].devname, + O_RDWR|O_EXCL); + if (dfd >= 0) { + remove_partitions(dfd); + close(dfd); + } + rv = add_disk(mdfd, st, content, &devices[j].i); + + if (rv) { + pr_err("failed to add " + "%s to %s: %s\n", + devices[j].devname, + mddev, + strerror(errno)); + if (i < content->array.raid_disks * 2 + || i == bestcnt) + okcnt--; + else + sparecnt--; + } else if (c->verbose > 0) + pr_err("added %s to %s as %d%s%s\n", + devices[j].devname, mddev, + devices[j].i.disk.raid_disk, + devices[j].uptodate?"": + " (possibly out of date)", + (devices[j].i.disk.state & (1<= 0) { + if (c->verbose > 0) + pr_err("%s is already in %s as %d\n", + devices[j].devname, mddev, + devices[j].i.disk.raid_disk); + } else if (c->verbose > 0 && i < content->array.raid_disks*2 + && (i&1) == 0) + pr_err("no uptodate device for slot %d of %s\n", + i, mddev); + } + + if (content->array.level == LEVEL_CONTAINER) { + if (c->verbose >= 0) { + pr_err("Container %s has been " + "assembled with %d drive%s", + mddev, okcnt+sparecnt, okcnt+sparecnt==1?"":"s"); + if (okcnt < (unsigned)content->array.raid_disks) + fprintf(stderr, " (out of %d)", + content->array.raid_disks); + fprintf(stderr, "\n"); + } + st->ss->free_super(st); + sysfs_uevent(content, "change"); + if (err_ok && okcnt < (unsigned)content->array.raid_disks) + /* Was partial, is still partial, so signal an error + * to ensure we don't retry */ + return 1; + return 0; + } + + /* Get number of in-sync devices according to the superblock. + * We must have this number to start the array without -s or -R + */ + req_cnt = content->array.working_disks; + + if (c->runstop == 1 || + (c->runstop <= 0 && + ( enough(content->array.level, content->array.raid_disks, + content->array.layout, clean, avail) && + (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok) + ))) { + /* This array is good-to-go. + * If a reshape is in progress then we might need to + * continue monitoring it. In that case we start + * it read-only and let the grow code make it writable. + */ + int rv; +#ifndef MDASSEMBLE + if (content->reshape_active && + !(content->reshape_active & RESHAPE_NO_BACKUP) && + content->delta_disks <= 0) { + if (!c->backup_file) { + pr_err("%s: Need a backup file to complete reshape of this array.\n", + mddev); + pr_err("Please provided one with \"--backup-file=...\"\n"); + if (c->update && + strcmp(c->update, "revert-reshape") == 0) + pr_err("(Don't specify --update=revert-reshape again, that part succeeded.)\n"); + return 1; + } + rv = sysfs_set_str(content, NULL, + "array_state", "readonly"); + if (rv == 0) + rv = Grow_continue(mdfd, st, content, + c->backup_file, + c->freeze_reshape); + } else if (c->readonly && + sysfs_attribute_available( + content, NULL, "array_state")) { + rv = sysfs_set_str(content, NULL, + "array_state", "readonly"); + } else +#endif + rv = ioctl(mdfd, RUN_ARRAY, NULL); + reopen_mddev(mdfd); /* drop O_EXCL */ + if (rv == 0) { + if (c->verbose >= 0) { + pr_err("%s has been started with %d drive%s", + mddev, okcnt, okcnt==1?"":"s"); + if (okcnt < (unsigned)content->array.raid_disks) + fprintf(stderr, " (out of %d)", content->array.raid_disks); + if (rebuilding_cnt) + fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt); + if (sparecnt) + fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); + fprintf(stderr, ".\n"); + } + if (content->reshape_active && + content->array.level >= 4 && + content->array.level <= 6) { + /* might need to increase the size + * of the stripe cache - default is 256 + */ + if (256 < 4 * (content->array.chunk_size/4096)) { + struct mdinfo *sra = sysfs_read(mdfd, NULL, 0); + if (sra) + sysfs_set_num(sra, NULL, + "stripe_cache_size", + (4 * content->array.chunk_size / 4096) + 1); + sysfs_free(sra); + } + } + if (okcnt < (unsigned)content->array.raid_disks) { + /* If any devices did not get added + * because the kernel rejected them based + * on event count, try adding them + * again providing the action policy is + * 're-add' or greater. The bitmap + * might allow them to be included, or + * they will become spares. + */ + for (i = 0; i < bestcnt; i++) { + int j = best[i]; + if (j >= 0 && !devices[j].uptodate) { + if (!disk_action_allows(&devices[j].i, st->ss->name, act_re_add)) + continue; + rv = add_disk(mdfd, st, content, + &devices[j].i); + if (rv == 0 && c->verbose >= 0) + pr_err("%s has been re-added.\n", + devices[j].devname); + } + } + } + if (content->array.level == 6 && + okcnt + 1 == (unsigned)content->array.raid_disks && + was_forced) { + struct mdinfo *sra = sysfs_read(mdfd, NULL, 0); + if (sra) + sysfs_set_str(sra, NULL, + "sync_action", "repair"); + sysfs_free(sra); + } + return 0; + } + pr_err("failed to RUN_ARRAY %s: %s\n", + mddev, strerror(errno)); + + if (!enough(content->array.level, content->array.raid_disks, + content->array.layout, 1, avail)) + pr_err("Not enough devices to " + "start the array.\n"); + else if (!enough(content->array.level, + content->array.raid_disks, + content->array.layout, clean, + avail)) + pr_err("Not enough devices to " + "start the array while not clean " + "- consider --force.\n"); + + return 1; + } + if (c->runstop == -1) { + pr_err("%s assembled from %d drive%s", + mddev, okcnt, okcnt==1?"":"s"); + if (okcnt != (unsigned)content->array.raid_disks) + fprintf(stderr, " (out of %d)", content->array.raid_disks); + fprintf(stderr, ", but not started.\n"); + return 2; + } + if (c->verbose >= -1) { + pr_err("%s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s"); + if (rebuilding_cnt) + fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt); + if (sparecnt) + fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); + if (!enough(content->array.level, content->array.raid_disks, + content->array.layout, 1, avail)) + fprintf(stderr, " - not enough to start the array.\n"); + else if (!enough(content->array.level, + content->array.raid_disks, + content->array.layout, clean, + avail)) + fprintf(stderr, " - not enough to start the " + "array while not clean - consider " + "--force.\n"); + else { + if (req_cnt == (unsigned)content->array.raid_disks) + fprintf(stderr, " - need all %d to start it", req_cnt); + else + fprintf(stderr, " - need %d to start", req_cnt); + fprintf(stderr, " (use --run to insist).\n"); + } + } + return 1; +} + +int Assemble(struct supertype *st, char *mddev, + struct mddev_ident *ident, + struct mddev_dev *devlist, + struct context *c) +{ + /* + * The task of Assemble is to find a collection of + * devices that should (according to their superblocks) + * form an array, and to give this collection to the MD driver. + * In Linux-2.4 and later, this involves submitting a + * SET_ARRAY_INFO ioctl with no arg - to prepare + * the array - and then submit a number of + * ADD_NEW_DISK ioctls to add disks into + * the array. Finally RUN_ARRAY might + * be submitted to start the array. + * + * Much of the work of Assemble is in finding and/or + * checking the disks to make sure they look right. + * + * If mddev is not set, then scan must be set and we + * read through the config file for dev+uuid mapping + * We recurse, setting mddev, for each device that + * - isn't running + * - has a valid uuid (or any uuid if !uuidset) + * + * If mddev is set, we try to determine state of md. + * check version - must be at least 0.90.0 + * check kernel version. must be at least 2.4. + * If not, we can possibly fall back on START_ARRAY + * Try to GET_ARRAY_INFO. + * If possible, give up + * If not, try to STOP_ARRAY just to make sure + * + * If !uuidset and scan, look in conf-file for uuid + * If not found, give up + * If !devlist and scan and uuidset, get list of devs from conf-file + * + * For each device: + * Check superblock - discard if bad + * Check uuid (set if we don't have one) - discard if no match + * Check superblock similarity if we have a superblock - discard if different + * Record events, devicenum + * This should give us a list of devices for the array + * We should collect the most recent event number + * + * Count disks with recent enough event count + * While force && !enough disks + * Choose newest rejected disks, update event count + * mark clean and rewrite superblock + * If recent kernel: + * SET_ARRAY_INFO + * foreach device with recent events : ADD_NEW_DISK + * if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY + * If old kernel: + * Check the device numbers in superblock are right + * update superblock if any changes + * START_ARRAY + * + */ + int rv; + int mdfd; + int clean; + int auto_assem = (mddev == NULL && !ident->uuid_set && + ident->super_minor == UnSet && ident->name[0] == 0 + && (ident->container == NULL || ident->member == NULL)); + struct devs *devices; + char *devmap; + int *best = NULL; /* indexed by raid_disk */ + int bestcnt = 0; + int devcnt; + unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt; + int i; + int was_forced = 0; + int most_recent = 0; + int chosen_drive; + int change = 0; + int inargv = 0; + int start_partial_ok = (c->runstop >= 0) && + (c->force || devlist==NULL || auto_assem); + int num_devs; + struct mddev_dev *tmpdev; + struct mdinfo info; + struct mdinfo *content = NULL; + struct mdinfo *pre_exist = NULL; + char *avail; + char *name = NULL; + char chosen_name[1024]; + struct map_ent *map = NULL; + struct map_ent *mp; + + /* + * If any subdevs are listed, then any that don't + * match ident are discarded. Remainder must all match and + * become the array. + * If no subdevs, then we scan all devices in the config file, but + * there must be something in the identity + */ + + if (!devlist && + ident->uuid_set == 0 && + (ident->super_minor < 0 || ident->super_minor == UnSet) && + ident->name[0] == 0 && + (ident->container == NULL || ident->member == NULL) && + ident->devices == NULL) { + pr_err("No identity information available for %s - cannot assemble.\n", + mddev ? mddev : "further assembly"); + return 1; + } + + if (devlist == NULL) + devlist = conf_get_devs(); + else if (mddev) + inargv = 1; + +try_again: + /* We come back here when doing auto-assembly and attempting some + * set of devices failed. Those are now marked as ->used==2 and + * we ignore them and try again + */ + if (!st && ident->st) + st = ident->st; + if (c->verbose>0) + pr_err("looking for devices for %s\n", + mddev ? mddev : "further assembly"); + + content = &info; + if (st) + st->ignore_hw_compat = 1; + num_devs = select_devices(devlist, ident, &st, &content, c, + inargv, auto_assem); + if (num_devs < 0) + return 1; + + if (!st || !st->sb || !content) + return 2; + + /* We have a full set of devices - we now need to find the + * array device. + * However there is a risk that we are racing with "mdadm -I" + * and the array is already partially assembled - we will have + * rejected any devices already in this address. + * So we take a lock on the map file - to prevent further races - + * and look for the uuid in there. If found and the array is + * active, we abort. If found and the array is not active + * we commit to that md device and add all the contained devices + * to our list. We flag them so that we don't try to re-add, + * but can remove if they turn out to not be wanted. + */ + if (map_lock(&map)) + pr_err("failed to get exclusive lock on mapfile - continue anyway...\n"); + mp = map_by_uuid(&map, content->uuid); + if (mp) { + struct mdinfo *dv; + /* array already exists. */ + pre_exist = sysfs_read(-1, mp->devnm, GET_LEVEL|GET_DEVS); + if (pre_exist->array.level != UnSet) { + pr_err("Found some drive for an array that is already active: %s\n", + mp->path); + pr_err("giving up.\n"); + return 1; + } + for (dv = pre_exist->devs; dv; dv = dv->next) { + /* We want to add this device to our list, + * but it could already be there if "mdadm -I" + * started *after* we checked for O_EXCL. + * If we add it to the top of the list + * it will be preferred over later copies. + */ + struct mddev_dev *newdev; + char *devname = map_dev(dv->disk.major, + dv->disk.minor, + 0); + if (!devname) + continue; + newdev = xmalloc(sizeof(*newdev)); + newdev->devname = devname; + newdev->disposition = 'I'; + newdev->used = 1; + newdev->next = devlist; + devlist = newdev; + num_devs++; + } + strcpy(chosen_name, mp->path); + if (c->verbose > 0 || mddev == NULL || + strcmp(mddev, chosen_name) != 0) + pr_err("Merging with already-assembled %s\n", + chosen_name); + mdfd = open_dev_excl(mp->devnm); + } else { + int trustworthy = FOREIGN; + name = content->name; + switch (st->ss->match_home(st, c->homehost) + ?: st->ss->match_home(st, "any")) { + case 1: + trustworthy = LOCAL; + name = strchr(content->name, ':'); + if (name) + name++; + else + name = content->name; + break; + } + if (!auto_assem) + /* If the array is listed in mdadm.conf or on + * command line, then we trust the name + * even if the array doesn't look local + */ + trustworthy = LOCAL; + + if (name[0] == 0 && + content->array.level == LEVEL_CONTAINER) { + name = content->text_version; + trustworthy = METADATA; + } + + if (name[0] && trustworthy != LOCAL && + ! c->require_homehost && + conf_name_is_free(name)) + trustworthy = LOCAL; + + if (trustworthy == LOCAL && + strchr(name, ':')) + /* Ignore 'host:' prefix of name */ + name = strchr(name, ':')+1; + + mdfd = create_mddev(mddev, name, ident->autof, trustworthy, + chosen_name); + } + if (mdfd < 0) { + st->ss->free_super(st); + if (auto_assem) + goto try_again; + return 1; + } + mddev = chosen_name; + if (get_linux_version() < 2004000 || + md_get_version(mdfd) < 9000) { + pr_err("Assemble requires Linux 2.4 or later, and\n" + " md driver version 0.90.0 or later.\n" + " Upgrade your kernel or try --build\n"); + close(mdfd); + return 1; + } + if (pre_exist == NULL) { + if (mddev_busy(fd2devnm(mdfd))) { + pr_err("%s already active, cannot restart it!\n", + mddev); + for (tmpdev = devlist ; + tmpdev && tmpdev->used != 1; + tmpdev = tmpdev->next) + ; + if (tmpdev && auto_assem) + pr_err("%s needed for %s...\n", + mddev, tmpdev->devname); + close(mdfd); + mdfd = -3; + st->ss->free_super(st); + if (auto_assem) + goto try_again; + return 1; + } + /* just incase it was started but has no content */ + ioctl(mdfd, STOP_ARRAY, NULL); + } + +#ifndef MDASSEMBLE + if (content != &info) { + /* This is a member of a container. Try starting the array. */ + int err; + err = assemble_container_content(st, mdfd, content, c, + chosen_name, NULL); + close(mdfd); + return err; + } +#endif + /* Ok, no bad inconsistancy, we can try updating etc */ + devices = xcalloc(num_devs, sizeof(*devices)); + devmap = xcalloc(num_devs, content->array.raid_disks); + devcnt = load_devices(devices, devmap, ident, &st, devlist, + c, content, mdfd, mddev, + &most_recent, &bestcnt, &best, inargv); + if (devcnt < 0) + return 1; + + if (devcnt == 0) { + pr_err("no devices found for %s\n", + mddev); + if (st) + st->ss->free_super(st); + close(mdfd); + free(devices); + free(devmap); + return 1; + } + + if (c->update && strcmp(c->update, "byteorder")==0) + st->minor_version = 90; + + st->ss->getinfo_super(st, content, NULL); + clean = content->array.state & 1; + + /* now we have some devices that might be suitable. + * I wonder how many + */ + avail = xcalloc(content->array.raid_disks, 1); + okcnt = 0; + replcnt = 0; + sparecnt=0; + rebuilding_cnt=0; + for (i=0; i< bestcnt; i++) { + int j = best[i]; + int event_margin = 1; /* always allow a difference of '1' + * like the kernel does */ if (j < 0) continue; /* note: we ignore error flags in multipath arrays @@ -937,28 +1494,40 @@ int Assemble(struct supertype *st, char *mddev, } continue; } - /* If this devices thinks that 'most_recent' has failed, then + /* If this device thinks that 'most_recent' has failed, then * we must reject this device. */ - if (j != most_recent && + if (j != most_recent && !c->force && content->array.raid_disks > 0 && devices[most_recent].i.disk.raid_disk >= 0 && devmap[j * content->array.raid_disks + devices[most_recent].i.disk.raid_disk] == 0) { - if (verbose > -1) - fprintf(stderr, Name ": ignoring %s as it reports %s as failed\n", - devices[j].devname, devices[most_recent].devname); + if (c->verbose > -1) + pr_err("ignoring %s as it reports %s as failed\n", + devices[j].devname, devices[most_recent].devname); best[i] = -1; continue; } + /* Require event counter to be same as, or just less than, + * most recent. If it is bigger, it must be a stray spare and + * should be ignored. + */ if (devices[j].i.events+event_margin >= - devices[most_recent].i.events) { + devices[most_recent].i.events && + devices[j].i.events <= + devices[most_recent].i.events + ) { devices[j].uptodate = 1; - if (i < content->array.raid_disks) { + if (i < content->array.raid_disks * 2) { if (devices[j].i.recovery_start == MaxSector || (content->reshape_active && - j >= content->array.raid_disks - content->delta_disks)) { - okcnt++; - avail[i]=1; + ((i >= content->array.raid_disks - content->delta_disks) || + (i >= content->array.raid_disks - content->delta_disks - 1 + && content->array.level == 4)))) { + if (!avail[i/2]) { + okcnt++; + avail[i/2]=1; + } else + replcnt++; } else rebuilding_cnt++; } else @@ -966,86 +1535,13 @@ int Assemble(struct supertype *st, char *mddev, } } free(devmap); - while (force && !enough(content->array.level, content->array.raid_disks, - content->array.layout, 1, - avail, okcnt)) { - /* Choose the newest best drive which is - * not up-to-date, update the superblock - * and add it. - */ - int fd; - struct supertype *tst; - unsigned long long current_events; - chosen_drive = -1; - for (i = 0; i < content->array.raid_disks && i < bestcnt; i++) { - int j = best[i]; - if (j>=0 && - !devices[j].uptodate && - devices[j].i.recovery_start == MaxSector && - (chosen_drive < 0 || - devices[j].i.events - > devices[chosen_drive].i.events)) - chosen_drive = j; - } - if (chosen_drive < 0) - break; - current_events = devices[chosen_drive].i.events; - add_another: - if (verbose >= 0) - fprintf(stderr, Name ": forcing event count in %s(%d) from %d upto %d\n", - devices[chosen_drive].devname, - devices[chosen_drive].i.disk.raid_disk, - (int)(devices[chosen_drive].i.events), - (int)(devices[most_recent].i.events)); - fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL); - if (fd < 0) { - fprintf(stderr, Name ": Couldn't open %s for write - not updating\n", - devices[chosen_drive].devname); - devices[chosen_drive].i.events = 0; - continue; - } - tst = dup_super(st); - if (tst->ss->load_super(tst,fd, NULL)) { - close(fd); - fprintf(stderr, Name ": RAID superblock disappeared from %s - not updating.\n", - devices[chosen_drive].devname); - devices[chosen_drive].i.events = 0; - continue; - } - content->events = devices[most_recent].i.events; - tst->ss->update_super(tst, content, "force-one", - devices[chosen_drive].devname, verbose, - 0, NULL); - - if (tst->ss->store_super(tst, fd)) { - close(fd); - fprintf(stderr, Name ": Could not re-write superblock on %s\n", - devices[chosen_drive].devname); - devices[chosen_drive].i.events = 0; - tst->ss->free_super(tst); - continue; - } - close(fd); - devices[chosen_drive].i.events = devices[most_recent].i.events; - devices[chosen_drive].uptodate = 1; - avail[chosen_drive] = 1; - okcnt++; - tst->ss->free_super(tst); - - /* If there are any other drives of the same vintage, - * add them in as well. We can't lose and we might gain - */ - for (i = 0; i < content->array.raid_disks && i < bestcnt ; i++) { - int j = best[i]; - if (j >= 0 && - !devices[j].uptodate && - devices[j].i.events == current_events) { - chosen_drive = j; - goto add_another; - } - } + if (c->force) { + int force_ok = force_array(content, devices, best, bestcnt, + avail, most_recent, st, c); + okcnt += force_ok; + if (force_ok) + was_forced = 1; } - /* Now we want to look at the superblock which the kernel will base things on * and compare the devices that we think are working with the devices that the * superblock thinks are working. @@ -1054,7 +1550,7 @@ int Assemble(struct supertype *st, char *mddev, */ chosen_drive = -1; st->ss->free_super(st); - for (i=0; chosen_drive < 0 && iss->load_super(st,fd, NULL)) { close(fd); - fprintf(stderr, Name ": RAID superblock has disappeared from %s\n", - devices[j].devname); + pr_err("RAID superblock has disappeared from %s\n", + devices[j].devname); close(mdfd); free(devices); return 1; @@ -1081,23 +1581,25 @@ int Assemble(struct supertype *st, char *mddev, close(fd); } if (st->sb == NULL) { - fprintf(stderr, Name ": No suitable drives found for %s\n", mddev); + pr_err("No suitable drives found for %s\n", mddev); close(mdfd); free(devices); return 1; } st->ss->getinfo_super(st, content, NULL); #ifndef MDASSEMBLE - sysfs_init(content, mdfd, 0); + sysfs_init(content, mdfd, NULL); #endif for (i=0; iarray.raid_disks) - desired_state = (1<= content->array.raid_disks * 2) desired_state = 0; + else if (i & 1) + desired_state = (1<ss->update_super(st, &devices[j].i, "assemble", NULL, - verbose, 0, NULL)) { - if (force) { - if (verbose >= 0) - fprintf(stderr, Name ": " - "clearing FAULTY flag for device %d in %s for %s\n", - j, mddev, devices[j].devname); + c->verbose, 0, NULL)) { + if (c->force) { + if (c->verbose >= 0) + pr_err("clearing FAULTY flag for device %d in %s for %s\n", + j, mddev, devices[j].devname); change = 1; } else { - if (verbose >= -1) - fprintf(stderr, Name ": " - "device %d in %s has wrong state in superblock, but %s seems ok\n", - i, mddev, devices[j].devname); + if (c->verbose >= -1) + pr_err("device %d in %s has wrong state in superblock, but %s seems ok\n", + i, mddev, devices[j].devname); } } #if 0 if (!(super.disks[i].i.disk.state & (1 << MD_DISK_FAULTY))) { - fprintf(stderr, Name ": devices %d of %s is not marked FAULTY in superblock, but cannot be found\n", - i, mddev); + pr_err("devices %d of %s is not marked FAULTY in superblock, but cannot be found\n", + i, mddev); } #endif } - if (force && !clean && + if (c->force && !clean && !enough(content->array.level, content->array.raid_disks, content->array.layout, clean, - avail, okcnt)) { + avail)) { change += st->ss->update_super(st, content, "force-array", - devices[chosen_drive].devname, verbose, + devices[chosen_drive].devname, c->verbose, 0, NULL); + was_forced = 1; clean = 1; } if (change) { int fd; - fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL); + fd = dev_open(devices[chosen_drive].devname, + devices[chosen_drive].included ? + O_RDWR : (O_RDWR|O_EXCL)); if (fd < 0) { - fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n", - devices[chosen_drive].devname); + pr_err("Could not open %s for write - cannot Assemble array.\n", + devices[chosen_drive].devname); close(mdfd); free(devices); return 1; } if (st->ss->store_super(st, fd)) { close(fd); - fprintf(stderr, Name ": Could not re-write superblock on %s\n", - devices[chosen_drive].devname); + pr_err("Could not re-write superblock on %s\n", + devices[chosen_drive].devname); close(mdfd); free(devices); return 1; } + if (c->verbose >= 0) + pr_err("Marking array %s as 'clean'\n", + mddev); close(fd); } @@ -1166,20 +1672,24 @@ int Assemble(struct supertype *st, char *mddev, * The code of doing this lives in Grow.c */ #ifndef MDASSEMBLE - if (content->reshape_active) { + if (content->reshape_active && + !(content->reshape_active & RESHAPE_NO_BACKUP)) { int err = 0; - int *fdlist = malloc(sizeof(int)* bestcnt); - if (verbose > 0) - fprintf(stderr, Name ":%s has an active reshape - checking " - "if critical section needs to be restored\n", - chosen_name); - for (i=0; iverbose > 0) + pr_err(":%s has an active reshape - checking " + "if critical section needs to be restored\n", + chosen_name); + enable_fds(bestcnt/2); + for (i = 0; i < bestcnt/2; i++) { + int j = best[i*2]; if (j >= 0) { - fdlist[i] = dev_open(devices[j].devname, O_RDWR|O_EXCL); + fdlist[i] = dev_open(devices[j].devname, + devices[j].included + ? O_RDWR : (O_RDWR|O_EXCL)); if (fdlist[i] < 0) { - fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n", - devices[j].devname); + pr_err("Could not open %s for write - cannot Assemble array.\n", + devices[j].devname); err = 1; break; } @@ -1187,12 +1697,15 @@ int Assemble(struct supertype *st, char *mddev, fdlist[i] = -1; } if (!err) { - err = Grow_restart(st, content, fdlist, bestcnt, - backup_file, verbose > 0); - if (err && invalid_backup) { - if (verbose > 0) - fprintf(stderr, Name ": continuing" - " without restoring backup\n"); + if (st->ss->external && st->ss->recover_backup) + err = st->ss->recover_backup(st, content); + else + err = Grow_restart(st, content, fdlist, bestcnt/2, + c->backup_file, c->verbose > 0); + if (err && c->invalid_backup) { + if (c->verbose > 0) + pr_err("continuing" + " without restoring backup\n"); err = 0; } } @@ -1200,438 +1713,257 @@ int Assemble(struct supertype *st, char *mddev, i--; if (fdlist[i]>=0) close(fdlist[i]); } + free(fdlist); if (err) { - fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n"); - if (backup_file == NULL) - fprintf(stderr," Possibly you needed to specify the --backup-file\n"); + pr_err("Failed to restore critical section for reshape, sorry.\n"); + if (c->backup_file == NULL) + cont_err("Possibly you needed to specify the --backup-file\n"); close(mdfd); free(devices); return err; } } #endif - /* count number of in-sync devices according to the superblock. - * We must have this number to start the array without -s or -R - */ - req_cnt = content->array.working_disks; /* Almost ready to actually *do* something */ - if (!old_linux) { - int rv; - - /* First, fill in the map, so that udev can find our name - * as soon as we become active. - */ - map_update(NULL, fd2devnum(mdfd), content->text_version, - content->uuid, chosen_name); - - rv = set_array_info(mdfd, st, content); - if (rv) { - fprintf(stderr, Name ": failed to set array info for %s: %s\n", - mddev, strerror(errno)); - ioctl(mdfd, STOP_ARRAY, NULL); - close(mdfd); - free(devices); - return 1; - } - if (ident->bitmap_fd >= 0) { - if (ioctl(mdfd, SET_BITMAP_FILE, ident->bitmap_fd) != 0) { - fprintf(stderr, Name ": SET_BITMAP_FILE failed.\n"); - ioctl(mdfd, STOP_ARRAY, NULL); - close(mdfd); - free(devices); - return 1; - } - } else if (ident->bitmap_file) { - /* From config file */ - int bmfd = open(ident->bitmap_file, O_RDWR); - if (bmfd < 0) { - fprintf(stderr, Name ": Could not open bitmap file %s\n", - ident->bitmap_file); - ioctl(mdfd, STOP_ARRAY, NULL); - close(mdfd); - free(devices); - return 1; - } - if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) { - fprintf(stderr, Name ": Failed to set bitmapfile for %s\n", mddev); - close(bmfd); - ioctl(mdfd, STOP_ARRAY, NULL); - close(mdfd); - free(devices); - return 1; - } - close(bmfd); - } - - /* First, add the raid disks, but add the chosen one last */ - for (i=0; i<= bestcnt; i++) { - int j; - if (i < bestcnt) { - j = best[i]; - if (j == chosen_drive) - continue; - } else - j = chosen_drive; - - if (j >= 0 /* && devices[j].uptodate */) { - int dfd = dev_open(devices[j].devname, - O_RDWR|O_EXCL); - if (dfd >= 0) { - remove_partitions(dfd); - close(dfd); - } - rv = add_disk(mdfd, st, content, &devices[j].i); - - if (rv) { - fprintf(stderr, Name ": failed to add " - "%s to %s: %s\n", - devices[j].devname, - mddev, - strerror(errno)); - if (i < content->array.raid_disks - || i == bestcnt) - okcnt--; - else - sparecnt--; - } else if (verbose > 0) - fprintf(stderr, Name ": added %s " - "to %s as %d\n", - devices[j].devname, mddev, - devices[j].i.disk.raid_disk); - } else if (verbose > 0 && i < content->array.raid_disks) - fprintf(stderr, Name ": no uptodate device for " - "slot %d of %s\n", - i, mddev); - } + /* First, fill in the map, so that udev can find our name + * as soon as we become active. + */ + if (c->update && strcmp(c->update, "metadata")==0) { + content->array.major_version = 1; + content->array.minor_version = 0; + strcpy(content->text_version, "1.0"); + } - if (content->array.level == LEVEL_CONTAINER) { - if (verbose >= 0) { - fprintf(stderr, Name ": Container %s has been " - "assembled with %d drive%s", - mddev, okcnt+sparecnt, okcnt+sparecnt==1?"":"s"); - if (okcnt < (unsigned)content->array.raid_disks) - fprintf(stderr, " (out of %d)", - content->array.raid_disks); - fprintf(stderr, "\n"); - } - st->ss->free_super(st); - sysfs_uevent(content, "change"); - wait_for(chosen_name, mdfd); - close(mdfd); - free(devices); - return 0; - } + map_update(&map, fd2devnm(mdfd), content->text_version, + content->uuid, chosen_name); - if (runstop == 1 || - (runstop <= 0 && - ( enough(content->array.level, content->array.raid_disks, - content->array.layout, clean, avail, okcnt) && - (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok) - ))) { - /* This array is good-to-go. - * If a reshape is in progress then we might need to - * continue monitoring it. In that case we start - * it read-only and let the grow code make it writable. + rv = start_array(mdfd, mddev, content, + st, ident, best, bestcnt, + chosen_drive, devices, okcnt, sparecnt, + rebuilding_cnt, + c, + clean, avail, start_partial_ok, + pre_exist != NULL, + was_forced); + if (rv == 1 && !pre_exist) + ioctl(mdfd, STOP_ARRAY, NULL); + free(devices); + map_unlock(&map); + if (rv == 0) { + wait_for(chosen_name, mdfd); + close(mdfd); + if (auto_assem) { + int usecs = 1; + /* There is a nasty race with 'mdadm --monitor'. + * If it opens this device before we close it, + * it gets an incomplete open on which IO + * doesn't work and the capacity is + * wrong. + * If we reopen (to check for layered devices) + * before --monitor closes, we loose. + * + * So: wait upto 1 second for there to be + * a non-zero capacity. */ - int rv; -#ifndef MDASSEMBLE - if (content->reshape_active && - content->delta_disks <= 0) - rv = Grow_continue(mdfd, st, content, backup_file); - else -#endif - rv = ioctl(mdfd, RUN_ARRAY, NULL); - if (rv == 0) { - if (verbose >= 0) { - fprintf(stderr, Name ": %s has been started with %d drive%s", - mddev, okcnt, okcnt==1?"":"s"); - if (okcnt < (unsigned)content->array.raid_disks) - fprintf(stderr, " (out of %d)", content->array.raid_disks); - if (rebuilding_cnt) - fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt); - if (sparecnt) - fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); - fprintf(stderr, ".\n"); - } - if (content->reshape_active && - content->array.level >= 4 && - content->array.level <= 6) { - /* might need to increase the size - * of the stripe cache - default is 256 - */ - if (256 < 4 * (content->array.chunk_size/4096)) { - struct mdinfo *sra = sysfs_read(mdfd, 0, 0); - if (sra) - sysfs_set_num(sra, NULL, - "stripe_cache_size", - (4 * content->array.chunk_size / 4096) + 1); - } - } - if (okcnt < (unsigned)content->array.raid_disks) { - /* If any devices did not get added - * because the kernel rejected them based - * on event count, try adding them - * again providing the action policy is - * 're-add' or greater. The bitmap - * might allow them to be included, or - * they will become spares. - */ - for (i = 0; i <= bestcnt; i++) { - int j = best[i]; - if (j >= 0 && !devices[j].uptodate) { - if (!disk_action_allows(&devices[j].i, st->ss->name, act_re_add)) - continue; - rv = add_disk(mdfd, st, content, - &devices[j].i); - if (rv == 0 && verbose >= 0) - fprintf(stderr, - Name ": %s has been re-added.\n", - devices[j].devname); - } - } - } - wait_for(mddev, mdfd); - close(mdfd); - if (auto_assem) { - int usecs = 1; - /* There is a nasty race with 'mdadm --monitor'. - * If it opens this device before we close it, - * it gets an incomplete open on which IO - * doesn't work and the capacity is - * wrong. - * If we reopen (to check for layered devices) - * before --monitor closes, we loose. - * - * So: wait upto 1 second for there to be - * a non-zero capacity. - */ - while (usecs < 1000) { - mdfd = open(mddev, O_RDONLY); - if (mdfd >= 0) { - unsigned long long size; - if (get_dev_size(mdfd, NULL, &size) && - size > 0) - break; - close(mdfd); - } - usleep(usecs); - usecs <<= 1; - } + while (usecs < 1000) { + mdfd = open(mddev, O_RDONLY); + if (mdfd >= 0) { + unsigned long long size; + if (get_dev_size(mdfd, NULL, &size) && + size > 0) + break; + close(mdfd); } - free(devices); - return 0; - } - fprintf(stderr, Name ": failed to RUN_ARRAY %s: %s\n", - mddev, strerror(errno)); - - if (!enough(content->array.level, content->array.raid_disks, - content->array.layout, 1, avail, okcnt)) - fprintf(stderr, Name ": Not enough devices to " - "start the array.\n"); - else if (!enough(content->array.level, - content->array.raid_disks, - content->array.layout, clean, - avail, okcnt)) - fprintf(stderr, Name ": Not enough devices to " - "start the array while not clean " - "- consider --force.\n"); - - if (auto_assem) - ioctl(mdfd, STOP_ARRAY, NULL); - close(mdfd); - free(devices); - return 1; - } - if (runstop == -1) { - fprintf(stderr, Name ": %s assembled from %d drive%s", - mddev, okcnt, okcnt==1?"":"s"); - if (okcnt != (unsigned)content->array.raid_disks) - fprintf(stderr, " (out of %d)", content->array.raid_disks); - fprintf(stderr, ", but not started.\n"); - close(mdfd); - free(devices); - return 0; - } - if (verbose >= -1) { - fprintf(stderr, Name ": %s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s"); - if (rebuilding_cnt) - fprintf(stderr, "%s %d rebuilding", sparecnt?", ":" and ", rebuilding_cnt); - if (sparecnt) - fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); - if (!enough(content->array.level, content->array.raid_disks, - content->array.layout, 1, avail, okcnt)) - fprintf(stderr, " - not enough to start the array.\n"); - else if (!enough(content->array.level, - content->array.raid_disks, - content->array.layout, clean, - avail, okcnt)) - fprintf(stderr, " - not enough to start the " - "array while not clean - consider " - "--force.\n"); - else { - if (req_cnt == (unsigned)content->array.raid_disks) - fprintf(stderr, " - need all %d to start it", req_cnt); - else - fprintf(stderr, " - need %d of %d to start", req_cnt, content->array.raid_disks); - fprintf(stderr, " (use --run to insist).\n"); + usleep(usecs); + usecs <<= 1; } } - if (auto_assem) - ioctl(mdfd, STOP_ARRAY, NULL); + } else close(mdfd); - free(devices); - return 1; - } else { - /* The "chosen_drive" is a good choice, and if necessary, the superblock has - * been updated to point to the current locations of devices. - * so we can just start the array - */ - unsigned long dev; - dev = makedev(devices[chosen_drive].i.disk.major, - devices[chosen_drive].i.disk.minor); - if (ioctl(mdfd, START_ARRAY, dev)) { - fprintf(stderr, Name ": Cannot start array: %s\n", - strerror(errno)); - } - } - close(mdfd); - free(devices); - return 0; + /* '2' means 'OK, but not started yet' */ + return rv == 2 ? 0 : rv; } #ifndef MDASSEMBLE int assemble_container_content(struct supertype *st, int mdfd, - struct mdinfo *content, int runstop, - char *chosen_name, int verbose, - char *backup_file) + struct mdinfo *content, struct context *c, + char *chosen_name, int *result) { struct mdinfo *dev, *sra; int working = 0, preexist = 0; int expansion = 0; struct map_ent *map = NULL; - - sysfs_init(content, mdfd, 0); - - sra = sysfs_read(mdfd, 0, GET_VERSION); - if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) - if (sysfs_set_array(content, md_get_version(mdfd)) != 0) + int old_raid_disks; + int start_reshape; + char *avail = NULL; + int err; + + sysfs_init(content, mdfd, NULL); + + sra = sysfs_read(mdfd, NULL, GET_VERSION); + if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) { + if (content->array.major_version == -1 && + content->array.minor_version == -2 && + c->readonly && + content->text_version[0] == '/') + content->text_version[0] = '-'; + if (sysfs_set_array(content, md_get_version(mdfd)) != 0) { + if (sra) + sysfs_free(sra); return 1; + } + } - if (content->reshape_active) + /* There are two types of reshape: container wide or sub-array specific + * Check if metadata requests blocking container wide reshapes + */ + start_reshape = (content->reshape_active && + !((content->reshape_active == CONTAINER_RESHAPE) && + (content->array.state & (1<ss->external && content->recovery_blocked && start_reshape) block_subarray(content); if (sra) sysfs_free(sra); - - for (dev = content->devs; dev; dev = dev->next) + old_raid_disks = content->array.raid_disks - content->delta_disks; + avail = xcalloc(content->array.raid_disks, 1); + for (dev = content->devs; dev; dev = dev->next) { + if (dev->disk.raid_disk >= 0) + avail[dev->disk.raid_disk] = 1; if (sysfs_add_disk(content, dev, 1) == 0) { - if (dev->disk.raid_disk >= content->array.raid_disks && + if (dev->disk.raid_disk >= old_raid_disks && content->reshape_active) expansion++; else working++; } else if (errno == EEXIST) preexist++; - if (working == 0) + } + if (working + expansion == 0 && c->runstop <= 0) { + free(avail); return 1;/* Nothing new, don't try to start */ - - map_update(&map, fd2devnum(mdfd), + } + map_update(&map, fd2devnm(mdfd), content->text_version, content->uuid, chosen_name); - if (runstop > 0 || - (working + preexist + expansion) >= - content->array.working_disks) { - int err; - if (content->reshape_active) { - int spare = content->array.raid_disks + expansion; - int i; - int *fdlist = malloc(sizeof(int) * - (working + expansion - + content->array.raid_disks)); - for (i=0; idevs; dev; dev = dev->next) { - char buf[20]; - int fd; - sprintf(buf, "%d:%d", - dev->disk.major, - dev->disk.minor); - fd = dev_open(buf, O_RDWR); - - if (dev->disk.raid_disk >= 0) - fdlist[dev->disk.raid_disk] = fd; - else - fdlist[spare++] = fd; - } - err = Grow_restart(st, content, fdlist, spare, - backup_file, verbose > 0); - while (spare > 0) { - spare--; - if (fdlist[spare] >= 0) - close(fdlist[spare]); - } - if (err) { - fprintf(stderr, Name ": Failed to restore critical" - " section for reshape - sorry.\n"); - if (!backup_file) - fprintf(stderr, Name ": Possibly you need" - " to specify a --backup-file\n"); - return 1; - } + if (enough(content->array.level, content->array.raid_disks, + content->array.layout, content->array.state & 1, avail) == 0) { + if (c->export && result) + *result |= INCR_NO; + else if (c->verbose >= 0) { + pr_err("%s assembled with %d device%s", + chosen_name, preexist + working, + preexist + working == 1 ? "":"s"); + if (preexist) + fprintf(stderr, " (%d new)", working); + fprintf(stderr, " but not started\n"); + } + free(avail); + return 1; + } + free(avail); + + if (c->runstop <= 0 && + (working + preexist + expansion) < + content->array.working_disks) { + if (c->export && result) + *result |= INCR_UNSAFE; + else if (c->verbose >= 0) { + pr_err("%s assembled with %d device%s", + chosen_name, preexist + working, + preexist + working == 1 ? "":"s"); + if (preexist) + fprintf(stderr, " (%d new)", working); + fprintf(stderr, " but not safe to start\n"); + } + return 1; + } + - err = Grow_continue(mdfd, st, content, backup_file); - } else switch(content->array.level) { + if (start_reshape) { + int spare = content->array.raid_disks + expansion; + if (restore_backup(st, content, + working, + spare, c->backup_file, c->verbose) == 1) + return 1; + + err = sysfs_set_str(content, NULL, + "array_state", "readonly"); + if (err) + return 1; + + if (st->ss->external) { + if (!mdmon_running(st->container_devnm)) + start_mdmon(st->container_devnm); + ping_monitor(st->container_devnm); + if (mdmon_running(st->container_devnm) && + st->update_tail == NULL) + st->update_tail = &st->updates; + } + + err = Grow_continue(mdfd, st, content, c->backup_file, + c->freeze_reshape); + } else switch(content->array.level) { case LEVEL_LINEAR: case LEVEL_MULTIPATH: case 0: err = sysfs_set_str(content, NULL, "array_state", - "active"); + c->readonly ? "readonly" : "active"); break; default: err = sysfs_set_str(content, NULL, "array_state", - "readonly"); + "readonly"); /* start mdmon if needed. */ if (!err) { - if (!mdmon_running(st->container_dev)) - start_mdmon(st->container_dev); - ping_monitor(devnum2devname(st->container_dev)); + if (!mdmon_running(st->container_devnm)) + start_mdmon(st->container_devnm); + ping_monitor(st->container_devnm); } break; } - if (!err) - sysfs_set_safemode(content, content->safe_mode_delay); - if (verbose >= 0) { - if (err) - fprintf(stderr, Name - ": array %s now has %d devices", - chosen_name, working + preexist); - else - fprintf(stderr, Name - ": Started %s with %d devices", - chosen_name, working + preexist); - if (preexist) - fprintf(stderr, " (%d new)", working); - if (expansion) - fprintf(stderr, " ( + %d for expansion)", - expansion); - fprintf(stderr, "\n"); - } - if (!err) - wait_for(chosen_name, mdfd); - return err; - /* FIXME should have an O_EXCL and wait for read-auto */ - } else { - if (verbose >= 0) - fprintf(stderr, Name - ": %s assembled with %d devices but " - "not started\n", - chosen_name, working); - return 1; + if (!err) + sysfs_set_safemode(content, content->safe_mode_delay); + + /* Block subarray here if it is not reshaped now + * It has be blocked a little later to allow mdmon to switch in + * in to R/W state + */ + if (st->ss->external && content->recovery_blocked && + !start_reshape) + block_subarray(content); + + if (c->export && result) { + if (err) + *result |= INCR_NO; + else + *result |= INCR_YES; + } else if (c->verbose >= 0) { + if (err) + pr_err("array %s now has %d device%s", + chosen_name, working + preexist, + working + preexist == 1 ? "":"s"); + else + pr_err("Started %s with %d device%s", + chosen_name, working + preexist, + working + preexist == 1 ? "":"s"); + if (preexist) + fprintf(stderr, " (%d new)", working); + if (expansion) + fprintf(stderr, " ( + %d for expansion)", + expansion); + fprintf(stderr, "\n"); } + if (!err) + wait_for(chosen_name, mdfd); + return err; + /* FIXME should have an O_EXCL and wait for read-auto */ } #endif -