X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=Assemble.c;h=2ed588498430e89abcee31289063c6c5ca3ba15f;hb=5cfb79dea26d9d7266f79c7c196a1a9f70c16a28;hp=eed1c9b098f3a81e4be3c1a57f5d271dcbfcb968;hpb=8cf2eb96b2330b1507af2fa55d99f338eeff5ab5;p=thirdparty%2Fmdadm.git diff --git a/Assemble.c b/Assemble.c index eed1c9b0..2ed58849 100644 --- a/Assemble.c +++ b/Assemble.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2001-2012 Neil Brown + * Copyright (C) 2001-2016 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -25,21 +25,27 @@ #include "mdadm.h" #include -static int name_matches(char *found, char *required, char *homehost) +static int name_matches(char *found, char *required, char *homehost, int require_homehost) { /* See if the name found matches the required name, possibly * prefixed with 'homehost' */ - char fnd[33]; + char *sep; + unsigned int l; - strncpy(fnd, found, 32); - fnd[32] = 0; if (strcmp(found, required)==0) return 1; - if (homehost) { - int l = strlen(homehost); - if (l < 32 && fnd[l] == ':' && - strcmp(fnd+l+1, required)==0) + sep = strchr(found, ':'); + if (!sep) + return 0; + l = sep - found; + if (strncmp(found, "any:", 4) == 0 || + (homehost && strcmp(homehost, "any") == 0) || + !require_homehost || + (homehost && strlen(homehost) == l && + strncmp(found, homehost, l) == 0)) { + /* matching homehost */ + if (strcmp(sep+1, required) == 0) return 1; } return 0; @@ -48,7 +54,7 @@ static int name_matches(char *found, char *required, char *homehost) static int is_member_busy(char *metadata_version) { /* check if the given member array is active */ - struct mdstat_ent *mdstat = mdstat_read(1, 0); + struct mdstat_ent *mdstat = mdstat_read(0, 0); struct mdstat_ent *ent; int busy = 0; @@ -73,7 +79,7 @@ static int is_member_busy(char *metadata_version) static int ident_matches(struct mddev_ident *ident, struct mdinfo *content, struct supertype *tst, - char *homehost, + char *homehost, int require_homehost, char *update, char *devname) { @@ -85,7 +91,7 @@ static int ident_matches(struct mddev_ident *ident, return 0; } if (ident->name[0] && (!update || strcmp(update, "name")!= 0) && - name_matches(content->name, ident->name, homehost)==0) { + name_matches(content->name, ident->name, homehost, require_homehost)==0) { if (devname) pr_err("%s has wrong name.\n", devname); return 0; @@ -105,6 +111,7 @@ static int ident_matches(struct mddev_ident *ident, return 0; } if (ident->raid_disks != UnSet && + content->array.raid_disks != 0 && /* metadata doesn't know how many to expect */ ident->raid_disks!= content->array.raid_disks) { if (devname) pr_err("%s requires wrong number of drives.\n", @@ -142,6 +149,7 @@ static int select_devices(struct mddev_dev *devlist, struct mdinfo *content = NULL; int report_mismatch = ((inargv && c->verbose >= 0) || c->verbose > 0); struct domainlist *domains = NULL; + dev_t rdev; tmpdev = devlist; num_devs = 0; while (tmpdev) { @@ -162,7 +170,6 @@ static int select_devices(struct mddev_dev *devlist, tmpdev = tmpdev ? tmpdev->next : NULL) { char *devname = tmpdev->devname; int dfd; - struct stat stb; struct supertype *tst; struct dev_policy *pol = NULL; int found_container = 0; @@ -170,8 +177,20 @@ static int select_devices(struct mddev_dev *devlist, if (tmpdev->used > 1) continue; - if (ident->devices && - !match_oneof(ident->devices, devname)) { + if (ident->container) { + if (ident->container[0] == '/' && + !same_dev(ident->container, devname)) { + if (report_mismatch) + pr_err("%s is not the container required (%s)\n", + devname, ident->container); + continue; + } + } else if (ident->devices && + !match_oneof(ident->devices, devname)) { + /* Note that we ignore the "device=" identifier if a + * "container=" is given. Checking both is unnecessarily + * complicated. + */ if (report_mismatch) pr_err("%s is not one of %s\n", devname, ident->devices); continue; @@ -185,14 +204,7 @@ static int select_devices(struct mddev_dev *devlist, pr_err("cannot open device %s: %s\n", devname, strerror(errno)); tmpdev->used = 2; - } else if (fstat(dfd, &stb)< 0) { - /* Impossible! */ - pr_err("fstat failed for %s: %s\n", - devname, strerror(errno)); - tmpdev->used = 2; - } else if ((stb.st_mode & S_IFMT) != S_IFBLK) { - pr_err("%s is not a block device.\n", - devname); + } else if (!fstat_is_blkdev(dfd, devname, &rdev)) { tmpdev->used = 2; } else if (must_be_container(dfd)) { if (st) { @@ -203,25 +215,23 @@ static int select_devices(struct mddev_dev *devlist, pr_err("%s is a container, but we are looking for components\n", devname); tmpdev->used = 2; -#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) } if (!tst && (tst = super_by_fd(dfd, NULL)) == NULL) { if (report_mismatch) pr_err("not a recognisable container: %s\n", devname); tmpdev->used = 2; -#endif - } else if (!tst->ss->load_container - || tst->ss->load_container(tst, dfd, NULL)) { + } else if (!tst->ss->load_container || + tst->ss->load_container(tst, dfd, NULL)) { if (report_mismatch) pr_err("no correct container type: %s\n", devname); tmpdev->used = 2; } else if (auto_assem && - !conf_test_metadata(tst->ss->name, (pol = devnum_policy(stb.st_rdev)), + !conf_test_metadata(tst->ss->name, + (pol = devid_policy(rdev)), tst->ss->match_home(tst, c->homehost) == 1)) { if (report_mismatch) - pr_err("%s has metadata type %s for which " - "auto-assembly is disabled\n", + pr_err("%s has metadata type %s for which auto-assembly is disabled\n", devname, tst->ss->name); tmpdev->used = 2; } else @@ -232,7 +242,9 @@ static int select_devices(struct mddev_dev *devlist, pr_err("no recogniseable superblock on %s\n", devname); tmpdev->used = 2; - } else if (tst->ss->load_super(tst,dfd, NULL)) { + } else if ((tst->ignore_hw_compat = 0), + tst->ss->load_super(tst, dfd, + report_mismatch ? devname : NULL)) { if (report_mismatch) pr_err("no RAID superblock on %s\n", devname); @@ -243,11 +255,11 @@ static int select_devices(struct mddev_dev *devlist, tst->ss->name, devname); tmpdev->used = 2; } else if (auto_assem && st == NULL && - !conf_test_metadata(tst->ss->name, (pol = devnum_policy(stb.st_rdev)), + !conf_test_metadata(tst->ss->name, + (pol = devid_policy(rdev)), tst->ss->match_home(tst, c->homehost) == 1)) { if (report_mismatch) - pr_err("%s has metadata type %s for which " - "auto-assembly is disabled\n", + pr_err("%s has metadata type %s for which auto-assembly is disabled\n", devname, tst->ss->name); tmpdev->used = 2; } @@ -257,7 +269,7 @@ static int select_devices(struct mddev_dev *devlist, if (auto_assem || !inargv) /* Ignore unrecognised devices during auto-assembly */ goto loop; - if (ident->uuid_set || ident->name[0] || + if (ident->name[0] || ident->super_minor != UnSet) /* Ignore unrecognised device if looking for * specific array */ @@ -269,6 +281,8 @@ static int select_devices(struct mddev_dev *devlist, st->ss->free_super(st); dev_policy_free(pol); domain_free(domains); + if (tst) + tst->ss->free_super(tst); return -1; } @@ -288,29 +302,20 @@ static int select_devices(struct mddev_dev *devlist, } close(dfd); - if (ident->container) { - if (ident->container[0] == '/' && - !same_dev(ident->container, devname)) { + if (ident->container && ident->container[0] != '/') { + /* we have a uuid */ + int uuid[4]; + + content = *contentp; + tst->ss->getinfo_super(tst, content, NULL); + + if (!parse_uuid(ident->container, uuid) || + !same_uuid(content->uuid, uuid, tst->ss->swapuuid)) { if (report_mismatch) - pr_err("%s is not the container required (%s)\n", - devname, ident->container); + pr_err("%s has wrong UUID to be required container\n", + devname); goto loop; } - if (ident->container[0] != '/') { - /* we have a uuid */ - int uuid[4]; - - content = *contentp; - tst->ss->getinfo_super(tst, content, NULL); - - if (!parse_uuid(ident->container, uuid) || - !same_uuid(content->uuid, uuid, tst->ss->swapuuid)) { - if (report_mismatch) - pr_err("%s has wrong UUID to be required container\n", - devname); - goto loop; - } - } } /* It is worth looking inside this container. */ @@ -323,7 +328,8 @@ static int select_devices(struct mddev_dev *devlist, content = content->next) { if (!ident_matches(ident, content, tst, - c->homehost, c->update, + c->homehost, c->require_homehost, + c->update, report_mismatch ? devname : NULL)) /* message already printed */; else if (is_member_busy(content->text_version)) { @@ -346,8 +352,7 @@ static int select_devices(struct mddev_dev *devlist, st = tst; tst = NULL; if (!auto_assem && inargv && tmpdev->next != NULL) { - pr_err("%s is a container, but is not " - "only device given: confused and aborting\n", + pr_err("%s is a container, but is not only device given: confused and aborting\n", devname); st->ss->free_super(st); dev_policy_free(pol); @@ -362,36 +367,43 @@ static int select_devices(struct mddev_dev *devlist, tmpdev = NULL; goto loop; } else { - int rv = 0; - struct mddev_ident *match; - content = *contentp; tst->ss->getinfo_super(tst, content, NULL); if (!ident_matches(ident, content, tst, - c->homehost, c->update, + c->homehost, c->require_homehost, + c->update, report_mismatch ? devname : NULL)) goto loop; - match = conf_match(tst, content, devname, - report_mismatch ? c->verbose : -1, - &rv); - if (!match && rv == 2) - goto loop; - if (match && match->devname && - strcasecmp(match->devname, "") == 0) { - if (report_mismatch) - pr_err("%s is a member of an explicitly ignored array\n", - devname); - goto loop; - } - if (match && !ident_matches(match, content, tst, - c->homehost, c->update, - report_mismatch ? devname : NULL)) - /* Array exists in mdadm.conf but some - * details don't match, so reject it + if (auto_assem) { + /* Never auto-assemble things that conflict + * with mdadm.conf in some way */ - goto loop; + struct mddev_ident *match; + int rv = 0; + + match = conf_match(tst, content, devname, + report_mismatch ? c->verbose : -1, + &rv); + if (!match && rv == 2) + goto loop; + if (match && match->devname && + strcasecmp(match->devname, "") == 0) { + if (report_mismatch) + pr_err("%s is a member of an explicitly ignored array\n", + devname); + goto loop; + } + if (match && !ident_matches(match, content, tst, + c->homehost, c->require_homehost, + c->update, + report_mismatch ? devname : NULL)) + /* Array exists in mdadm.conf but some + * details don't match, so reject it + */ + goto loop; + } /* should be safe to try an exclusive open now, we * have rejected anything that some other mdadm might @@ -469,7 +481,7 @@ static int select_devices(struct mddev_dev *devlist, /* Collect domain information from members only */ if (tmpdev && tmpdev->used == 1) { if (!pol) - pol = devnum_policy(stb.st_rdev); + pol = devid_policy(rdev); domain_merge(&domains, pol, tst?tst->ss->name:NULL); } dev_policy_free(pol); @@ -502,15 +514,12 @@ static int select_devices(struct mddev_dev *devlist, /* Now reject spares that don't match domains of identified members */ for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) { - struct stat stb; if (tmpdev->used != 3) continue; - if (stat(tmpdev->devname, &stb)< 0) { - pr_err("fstat failed for %s: %s\n", - tmpdev->devname, strerror(errno)); + if (!stat_is_blkdev(tmpdev->devname, &rdev)) { tmpdev->used = 2; } else { - struct dev_policy *pol = devnum_policy(stb.st_rdev); + struct dev_policy *pol = devid_policy(rdev); int dt = domain_test(domains, pol, NULL); if (inargv && dt != 0) /* take this spare as domains match @@ -547,7 +556,7 @@ struct devs { }; static int load_devices(struct devs *devices, char *devmap, - struct mddev_ident *ident, struct supertype *st, + struct mddev_ident *ident, struct supertype **stp, struct mddev_dev *devlist, struct context *c, struct mdinfo *content, int mdfd, char *mddev, @@ -557,42 +566,41 @@ static int load_devices(struct devs *devices, char *devmap, struct mddev_dev *tmpdev; int devcnt = 0; int nextspare = 0; -#ifndef MDASSEMBLE int bitmap_done = 0; -#endif - int most_recent = 0; + int most_recent = -1; int bestcnt = 0; int *best = *bestp; + struct supertype *st = *stp; for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) { char *devname = tmpdev->devname; struct stat stb; + struct supertype *tst; int i; + int dfd; + int disk_state; if (tmpdev->used != 1) continue; /* looks like a good enough match to update the super block if needed */ -#ifndef MDASSEMBLE if (c->update) { - int dfd; /* prepare useful information in info structures */ struct stat stb2; - struct supertype *tst; int err; fstat(mdfd, &stb2); - if (strcmp(c->update, "uuid")==0 && - !ident->uuid_set) { - int rfd; - if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || - read(rfd, ident->uuid, 16) != 16) { - *(__u32*)(ident->uuid) = random(); - *(__u32*)(ident->uuid+1) = random(); - *(__u32*)(ident->uuid+2) = random(); - *(__u32*)(ident->uuid+3) = random(); - } - if (rfd >= 0) close(rfd); + if (strcmp(c->update, "uuid") == 0 && !ident->uuid_set) + random_uuid((__u8 *)ident->uuid); + + if (strcmp(c->update, "ppl") == 0 && + ident->bitmap_fd >= 0) { + pr_err("PPL is not compatible with bitmap\n"); + close(mdfd); + free(devices); + free(devmap); + return -1; } + dfd = dev_open(devname, tmpdev->disposition == 'I' ? O_RDWR : (O_RDWR|O_EXCL)); @@ -606,6 +614,9 @@ static int load_devices(struct devs *devices, char *devmap, close(mdfd); free(devices); free(devmap); + tst->ss->free_super(tst); + free(tst); + *stp = st; return -1; } tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks); @@ -616,21 +627,35 @@ static int load_devices(struct devs *devices, char *devmap, if (strcmp(c->update, "byteorder") == 0) err = 0; + else if (strcmp(c->update, "home-cluster") == 0) { + tst->cluster_name = c->homecluster; + err = tst->ss->write_bitmap(tst, dfd, NameUpdate); + } else if (strcmp(c->update, "nodes") == 0) { + tst->nodes = c->nodes; + err = tst->ss->write_bitmap(tst, dfd, NodeNumUpdate); + } else if (strcmp(c->update, "revert-reshape") == 0 && + c->invalid_backup) + err = tst->ss->update_super(tst, content, + "revert-reshape-nobackup", + devname, c->verbose, + ident->uuid_set, + c->homehost); else err = tst->ss->update_super(tst, content, c->update, devname, c->verbose, ident->uuid_set, c->homehost); if (err < 0) { - pr_err("--update=%s not understood" - " for %s metadata\n", - c->update, tst->ss->name); + if (err == -1) + pr_err("--update=%s not understood for %s metadata\n", + c->update, tst->ss->name); tst->ss->free_super(tst); free(tst); close(mdfd); close(dfd); free(devices); free(devmap); + *stp = st; return -1; } if (strcmp(c->update, "uuid")==0 && @@ -641,7 +666,6 @@ static int load_devices(struct devs *devices, char *devmap, if (tst->ss->store_super(tst, dfd)) pr_err("Could not re-write superblock on %s.\n", devname); - close(dfd); if (strcmp(c->update, "uuid")==0 && ident->bitmap_fd >= 0 && !bitmap_done) { @@ -652,15 +676,11 @@ static int load_devices(struct devs *devices, char *devmap, else bitmap_done = 1; } - tst->ss->free_super(tst); - } else -#endif - { - struct supertype *tst = dup_super(st); - int dfd; + } else { dfd = dev_open(devname, tmpdev->disposition == 'I' ? O_RDWR : (O_RDWR|O_EXCL)); + tst = dup_super(st); if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) { pr_err("cannot re-read metadata from %s - aborting\n", @@ -670,14 +690,16 @@ static int load_devices(struct devs *devices, char *devmap, close(mdfd); free(devices); free(devmap); + tst->ss->free_super(tst); + free(tst); + *stp = st; return -1; } tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks); - tst->ss->free_super(tst); - close(dfd); } - stat(devname, &stb); + fstat(dfd, &stb); + close(dfd); if (c->verbose > 0) pr_err("%s is identified as a member of %s, slot %d%s.\n", @@ -689,17 +711,28 @@ static int load_devices(struct devs *devices, char *devmap, devices[devcnt].i = *content; devices[devcnt].i.disk.major = major(stb.st_rdev); devices[devcnt].i.disk.minor = minor(stb.st_rdev); - if (most_recent < devcnt) { - if (devices[devcnt].i.events - > devices[most_recent].i.events) + + disk_state = devices[devcnt].i.disk.state & ~((1< devices[most_recent].i.events) { + struct supertype *tmp = tst; + tst = st; + st = tmp; most_recent = devcnt; + } } + tst->ss->free_super(tst); + free(tst); + if (content->array.level == LEVEL_MULTIPATH) /* with multipath, the raid_disk from the superblock is meaningless */ i = devcnt; else i = devices[devcnt].i.disk.raid_disk; - if (i+1 == 0) { + if (i+1 == 0 || i == MD_DISK_ROLE_JOURNAL) { if (nextspare < content->array.raid_disks*2) nextspare = content->array.raid_disks*2; i = nextspare++; @@ -728,22 +761,19 @@ static int load_devices(struct devs *devices, char *devmap, bestcnt = newbestcnt; } if (best[i] >=0 && - devices[best[i]].i.events - == devices[devcnt].i.events - && (devices[best[i]].i.disk.minor - != devices[devcnt].i.disk.minor) - && st->ss == &super0 - && content->array.level != LEVEL_MULTIPATH) { + devices[best[i]].i.events == + devices[devcnt].i.events && + (devices[best[i]].i.disk.minor != + devices[devcnt].i.disk.minor) && + st->ss == &super0 && + content->array.level != LEVEL_MULTIPATH) { /* two different devices with identical superblock. * Could be a mis-detection caused by overlapping * partitions. fail-safe. */ - pr_err("WARNING %s and %s appear" - " to have very similar superblocks.\n" - " If they are really different, " - "please --zero the superblock on one\n" - " If they are the same or overlap," - " please remove one from %s.\n", + pr_err("WARNING %s and %s appear to have very similar superblocks.\n" + " If they are really different, please --zero the superblock on one\n" + " If they are the same or overlap, please remove one from %s.\n", devices[best[i]].devname, devname, inargv ? "the list" : "the\n DEVICE list in mdadm.conf" @@ -751,18 +781,22 @@ static int load_devices(struct devs *devices, char *devmap, close(mdfd); free(devices); free(devmap); + *stp = st; return -1; } - if (best[i] == -1 - || (devices[best[i]].i.events - < devices[devcnt].i.events)) + if (best[i] == -1 || (devices[best[i]].i.events + < devices[devcnt].i.events)) best[i] = devcnt; + else if (st->ss == &super_imsm) + best[i+1] = devcnt; } devcnt++; } - *most_recentp = most_recent; + if (most_recent >= 0) + *most_recentp = most_recent; *bestcntp = bestcnt; *bestp = best; + *stp = st; return devcnt; } @@ -776,14 +810,11 @@ static int force_array(struct mdinfo *content, int okcnt = 0; while (!enough(content->array.level, content->array.raid_disks, content->array.layout, 1, - avail) - || + avail) || (content->reshape_active && content->delta_disks > 0 && !enough(content->array.level, (content->array.raid_disks - content->delta_disks), - content->new_layout, 1, - avail) - )) { + content->new_layout, 1, avail))) { /* Choose the newest best drive which is * not up-to-date, update the superblock * and add it. @@ -794,14 +825,53 @@ static int force_array(struct mdinfo *content, int chosen_drive = -1; int i; - for (i = 0; i < content->array.raid_disks && i < bestcnt; i++) { + for (i = 0; + i < content->array.raid_disks * 2 && i < bestcnt; + i += 2) { int j = best[i]; - if (j>=0 && - !devices[j].uptodate && - devices[j].i.recovery_start == MaxSector && - (chosen_drive < 0 || + if (j < 0) + continue; + if (devices[j].uptodate) + continue; + if (devices[j].i.recovery_start != MaxSector) { + int delta; + if (!devices[j].i.reshape_active || + devices[j].i.delta_disks <= 0) + continue; + /* When increasing number of devices, an + * added device also appears to be + * recovering. It is safe to include it + * as long as it won't be a source of + * data. + * For now, just allow for last data + * devices in RAID4 or last devices in RAID4/5/6. + */ + delta = devices[j].i.delta_disks; + if (devices[j].i.array.level >= 4 && + devices[j].i.array.level <= 6 && + i/2 >= content->array.raid_disks - delta) + /* OK */; + else if (devices[j].i.array.level == 4 && + i/2 >= content->array.raid_disks - delta - 1) + /* OK */; + else + continue; + } else if (devices[j].i.reshape_active != + content->reshape_active || + (devices[j].i.reshape_active && + devices[j].i.reshape_progress != + content->reshape_progress)) + /* Here, it may be a source of data. If two + * devices claim different progresses, it + * means that reshape boundaries differ for + * their own devices. Kernel will only treat + * the first one as reshape progress and + * go on. It may cause disaster, so avoid it. + */ + continue; + if (chosen_drive < 0 || devices[j].i.events - > devices[chosen_drive].i.events)) + > devices[chosen_drive].i.events) chosen_drive = j; } if (chosen_drive < 0) @@ -809,7 +879,7 @@ static int force_array(struct mdinfo *content, current_events = devices[chosen_drive].i.events; add_another: if (c->verbose >= 0) - pr_err("forcing event count in %s(%d) from %d upto %d\n", + pr_err("forcing event count in %s(%d) from %d up to %d\n", devices[chosen_drive].devname, devices[chosen_drive].i.disk.raid_disk, (int)(devices[chosen_drive].i.events), @@ -850,16 +920,23 @@ static int force_array(struct mdinfo *content, avail[chosen_drive] = 1; okcnt++; tst->ss->free_super(tst); - /* If there are any other drives of the same vintage, * add them in as well. We can't lose and we might gain */ - for (i = 0; i < content->array.raid_disks && i < bestcnt ; i++) { + for (i = 0; + i < content->array.raid_disks * 2 && i < bestcnt ; + i += 2) { int j = best[i]; if (j >= 0 && !devices[j].uptodate && devices[j].i.recovery_start == MaxSector && - devices[j].i.events == current_events) { + devices[j].i.events == current_events && + ((!devices[j].i.reshape_active && + !content->reshape_active) || + (devices[j].i.reshape_active == + content->reshape_active && + devices[j].i.reshape_progress == + content->reshape_progress))) { chosen_drive = j; goto add_another; } @@ -879,17 +956,32 @@ static int start_array(int mdfd, unsigned int okcnt, unsigned int sparecnt, unsigned int rebuilding_cnt, + unsigned int journalcnt, struct context *c, int clean, char *avail, - int start_partial_ok + int start_partial_ok, + int err_ok, + int was_forced ) { int rv; int i; unsigned int req_cnt; + if (content->journal_device_required && (content->journal_clean == 0)) { + if (!c->force) { + pr_err("Not safe to assemble with missing or stale journal device, consider --force.\n"); + return 1; + } + pr_err("Journal is missing or stale, starting array read only.\n"); + c->readonly = 1; + } + + if (content->consistency_policy == CONSISTENCY_POLICY_PPL) + clean = 1; + rv = set_array_info(mdfd, st, content); - if (rv) { + if (rv && !err_ok) { pr_err("failed to set array info for %s: %s\n", mddev, strerror(errno)); return 1; @@ -916,7 +1008,7 @@ static int start_array(int mdfd, } /* First, add the raid disks, but add the chosen one last */ - for (i=0; i<= bestcnt; i++) { + for (i = 0; i <= bestcnt; i++) { int j; if (i < bestcnt) { j = best[i]; @@ -926,8 +1018,9 @@ static int start_array(int mdfd, j = chosen_drive; if (j >= 0 && !devices[j].included) { - int dfd = dev_open(devices[j].devname, - O_RDWR|O_EXCL); + int dfd; + + dfd = dev_open(devices[j].devname, O_RDWR|O_EXCL); if (dfd >= 0) { remove_partitions(dfd); close(dfd); @@ -935,46 +1028,78 @@ static int start_array(int mdfd, rv = add_disk(mdfd, st, content, &devices[j].i); if (rv) { - pr_err("failed to add " - "%s to %s: %s\n", - devices[j].devname, - mddev, + pr_err("failed to add %s to %s: %s\n", + devices[j].devname, mddev, strerror(errno)); - if (i < content->array.raid_disks * 2 - || i == bestcnt) + if (errno == EINVAL && content->array.level == 0 && + content->array.layout != 0) { + cont_err("Possibly your kernel doesn't support RAID0 layouts.\n"); + cont_err("Please upgrade.\n"); + } + if (i < content->array.raid_disks * 2 || + i == bestcnt) okcnt--; else sparecnt--; - } else if (c->verbose > 0) + } else if (c->verbose > 0) { pr_err("added %s to %s as %d%s%s\n", devices[j].devname, mddev, devices[j].i.disk.raid_disk, devices[j].uptodate?"": " (possibly out of date)", - (devices[j].i.disk.state & (1<= 0) { if (c->verbose > 0) pr_err("%s is already in %s as %d\n", devices[j].devname, mddev, devices[j].i.disk.raid_disk); - } else if (c->verbose > 0 && i < content->array.raid_disks*2 - && (i&1) == 0) + } else if (c->verbose > 0 && + i < content->array.raid_disks * 2 && (i & 1) == 0) pr_err("no uptodate device for slot %d of %s\n", - i, mddev); + i/2, mddev); } if (content->array.level == LEVEL_CONTAINER) { if (c->verbose >= 0) { - pr_err("Container %s has been " - "assembled with %d drive%s", - mddev, okcnt+sparecnt, okcnt+sparecnt==1?"":"s"); + pr_err("Container %s has been assembled with %d drive%s", + mddev, okcnt + sparecnt + journalcnt, + okcnt + sparecnt + journalcnt == 1 ? "" : "s"); if (okcnt < (unsigned)content->array.raid_disks) - fprintf(stderr, " (out of %d)", + fprintf(stderr, " (out of %d)\n", content->array.raid_disks); - fprintf(stderr, "\n"); + else { + fprintf(stderr, "\n"); + sysfs_rules_apply(mddev, content); + } + } + + if (st->ss->validate_container) { + struct mdinfo *devices_list; + struct mdinfo *info_devices; + unsigned int count; + + devices_list = NULL; + info_devices = xmalloc(sizeof(struct mdinfo) * + (okcnt + sparecnt)); + for (count = 0; count < okcnt + sparecnt; count++) { + info_devices[count] = devices[count].i; + info_devices[count].next = devices_list; + devices_list = &info_devices[count]; + } + if (st->ss->validate_container(devices_list)) + pr_err("Mismatch detected!\n"); + free(info_devices); } + st->ss->free_super(st); sysfs_uevent(content, "change"); + if (err_ok && okcnt < (unsigned)content->array.raid_disks) + /* Was partial, is still partial, so signal an error + * to ensure we don't retry */ + return 1; return 0; } @@ -985,44 +1110,61 @@ static int start_array(int mdfd, if (c->runstop == 1 || (c->runstop <= 0 && - ( enough(content->array.level, content->array.raid_disks, - content->array.layout, clean, avail) && - (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok) - ))) { + (enough(content->array.level, content->array.raid_disks, + content->array.layout, clean, avail) && + (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok)))) { /* This array is good-to-go. * If a reshape is in progress then we might need to * continue monitoring it. In that case we start * it read-only and let the grow code make it writable. */ int rv; -#ifndef MDASSEMBLE + if (content->reshape_active && !(content->reshape_active & RESHAPE_NO_BACKUP) && content->delta_disks <= 0) { + if (!c->backup_file) { + pr_err("%s: Need a backup file to complete reshape of this array.\n", + mddev); + pr_err("Please provided one with \"--backup-file=...\"\n"); + if (c->update && + strcmp(c->update, "revert-reshape") == 0) + pr_err("(Don't specify --update=revert-reshape again, that part succeeded.)\n"); + return 1; + } rv = sysfs_set_str(content, NULL, "array_state", "readonly"); if (rv == 0) rv = Grow_continue(mdfd, st, content, - c->backup_file, + c->backup_file, 0, c->freeze_reshape); } else if (c->readonly && - sysfs_attribute_available( - content, NULL, "array_state")) { + sysfs_attribute_available(content, NULL, + "array_state")) { rv = sysfs_set_str(content, NULL, "array_state", "readonly"); } else -#endif rv = ioctl(mdfd, RUN_ARRAY, NULL); + reopen_mddev(mdfd); /* drop O_EXCL */ if (rv == 0) { + sysfs_rules_apply(mddev, content); if (c->verbose >= 0) { pr_err("%s has been started with %d drive%s", mddev, okcnt, okcnt==1?"":"s"); if (okcnt < (unsigned)content->array.raid_disks) - fprintf(stderr, " (out of %d)", content->array.raid_disks); + fprintf(stderr, " (out of %d)", + content->array.raid_disks); if (rebuilding_cnt) - fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt); + fprintf(stderr, "%s %d rebuilding", + sparecnt?",":" and", + rebuilding_cnt); if (sparecnt) - fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); + fprintf(stderr, " and %d spare%s", + sparecnt, + sparecnt == 1 ? "" : "s"); + if (content->journal_clean) + fprintf(stderr, " and %d journal", + journalcnt); fprintf(stderr, ".\n"); } if (content->reshape_active && @@ -1031,12 +1173,19 @@ static int start_array(int mdfd, /* might need to increase the size * of the stripe cache - default is 256 */ - if (256 < 4 * (content->array.chunk_size/4096)) { - struct mdinfo *sra = sysfs_read(mdfd, 0, 0); + int chunk_size = content->array.chunk_size; + + if (content->reshape_active && + content->new_chunk > chunk_size) + chunk_size = content->new_chunk; + if (256 < 4 * ((chunk_size+4065)/4096)) { + struct mdinfo *sra; + + sra = sysfs_read(mdfd, NULL, 0); if (sra) sysfs_set_num(sra, NULL, "stripe_cache_size", - (4 * content->array.chunk_size / 4096) + 1); + (4 * chunk_size / 4096) + 1); sysfs_free(sra); } } @@ -1062,52 +1211,63 @@ static int start_array(int mdfd, } } } + if (content->array.level == 6 && + okcnt + 1 == (unsigned)content->array.raid_disks && + was_forced) { + struct mdinfo *sra; + + sra = sysfs_read(mdfd, NULL, 0); + if (sra) + sysfs_set_str(sra, NULL, + "sync_action", "repair"); + sysfs_free(sra); + } return 0; } - pr_err("failed to RUN_ARRAY %s: %s\n", - mddev, strerror(errno)); + pr_err("failed to RUN_ARRAY %s: %s\n", mddev, strerror(errno)); + if (errno == 524 /* ENOTSUP */ && + content->array.level == 0 && content->array.layout == 0) + cont_err("Please use --update=layout-original or --update=layout-alternate\n"); if (!enough(content->array.level, content->array.raid_disks, content->array.layout, 1, avail)) - pr_err("Not enough devices to " - "start the array.\n"); + pr_err("Not enough devices to start the array.\n"); else if (!enough(content->array.level, content->array.raid_disks, - content->array.layout, clean, - avail)) - pr_err("Not enough devices to " - "start the array while not clean " - "- consider --force.\n"); + content->array.layout, clean, avail)) + pr_err("Not enough devices to start the array while not clean - consider --force.\n"); return 1; } if (c->runstop == -1) { pr_err("%s assembled from %d drive%s", - mddev, okcnt, okcnt==1?"":"s"); + mddev, okcnt, okcnt == 1 ? "" : "s"); if (okcnt != (unsigned)content->array.raid_disks) - fprintf(stderr, " (out of %d)", content->array.raid_disks); + fprintf(stderr, " (out of %d)", + content->array.raid_disks); fprintf(stderr, ", but not started.\n"); return 2; } if (c->verbose >= -1) { - pr_err("%s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s"); + pr_err("%s assembled from %d drive%s", + mddev, okcnt, okcnt == 1 ? "" : "s"); if (rebuilding_cnt) - fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt); + fprintf(stderr, "%s %d rebuilding", + sparecnt ? "," : " and", rebuilding_cnt); if (sparecnt) - fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); + fprintf(stderr, " and %d spare%s", sparecnt, + sparecnt == 1 ? "" : "s"); if (!enough(content->array.level, content->array.raid_disks, content->array.layout, 1, avail)) fprintf(stderr, " - not enough to start the array.\n"); else if (!enough(content->array.level, content->array.raid_disks, - content->array.layout, clean, - avail)) - fprintf(stderr, " - not enough to start the " - "array while not clean - consider " - "--force.\n"); + content->array.layout, clean, avail)) + fprintf(stderr, " - not enough to start the array while not clean - consider --force.\n"); else { if (req_cnt == (unsigned)content->array.raid_disks) - fprintf(stderr, " - need all %d to start it", req_cnt); + fprintf(stderr, " - need all %d to start it", + req_cnt); else fprintf(stderr, " - need %d to start", req_cnt); fprintf(stderr, " (use --run to insist).\n"); @@ -1175,19 +1335,21 @@ int Assemble(struct supertype *st, char *mddev, * START_ARRAY * */ - int rv; - int mdfd; + int rv = -1; + int mdfd = -1; int clean; int auto_assem = (mddev == NULL && !ident->uuid_set && - ident->super_minor == UnSet && ident->name[0] == 0 - && (ident->container == NULL || ident->member == NULL)); - struct devs *devices; + ident->super_minor == UnSet && ident->name[0] == 0 && + (ident->container == NULL || ident->member == NULL)); + struct devs *devices = NULL; char *devmap; int *best = NULL; /* indexed by raid_disk */ int bestcnt = 0; int devcnt; - unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt; + unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt, journalcnt; + int journal_clean = 0; int i; + int was_forced = 0; int most_recent = 0; int chosen_drive; int change = 0; @@ -1241,13 +1403,13 @@ try_again: mddev ? mddev : "further assembly"); content = &info; - if (st) + if (st && c->force) st->ignore_hw_compat = 1; num_devs = select_devices(devlist, ident, &st, &content, c, inargv, auto_assem); if (num_devs < 0) return 1; - + if (!st || !st->sb || !content) return 2; @@ -1265,16 +1427,19 @@ try_again: */ if (map_lock(&map)) pr_err("failed to get exclusive lock on mapfile - continue anyway...\n"); - mp = map_by_uuid(&map, content->uuid); + if (c->update && strcmp(c->update,"uuid") == 0) + mp = NULL; + else + mp = map_by_uuid(&map, content->uuid); if (mp) { struct mdinfo *dv; /* array already exists. */ - pre_exist = sysfs_read(-1, mp->devnum, GET_LEVEL|GET_DEVS); + pre_exist = sysfs_read(-1, mp->devnm, GET_LEVEL|GET_DEVS); if (pre_exist->array.level != UnSet) { pr_err("Found some drive for an array that is already active: %s\n", mp->path); pr_err("giving up.\n"); - return 1; + goto out; } for (dv = pre_exist->devs; dv; dv = dv->next) { /* We want to add this device to our list, @@ -1302,7 +1467,7 @@ try_again: strcmp(mddev, chosen_name) != 0) pr_err("Merging with already-assembled %s\n", chosen_name); - mdfd = open_dev_excl(mp->devnum); + mdfd = open_dev_excl(mp->devnm); } else { int trustworthy = FOREIGN; name = content->name; @@ -1341,25 +1506,17 @@ try_again: name = strchr(name, ':')+1; mdfd = create_mddev(mddev, name, ident->autof, trustworthy, - chosen_name); + chosen_name, 0); } if (mdfd < 0) { st->ss->free_super(st); if (auto_assem) goto try_again; - return 1; + goto out; } mddev = chosen_name; - if (get_linux_version() < 2004000 || - md_get_version(mdfd) < 9000) { - pr_err("Assemble requires Linux 2.4 or later, and\n" - " md driver version 0.90.0 or later.\n" - " Upgrade your kernel or try --build\n"); - close(mdfd); - return 1; - } if (pre_exist == NULL) { - if (mddev_busy(fd2devnum(mdfd))) { + if (mddev_busy(fd2devnm(mdfd))) { pr_err("%s already active, cannot restart it!\n", mddev); for (tmpdev = devlist ; @@ -1374,40 +1531,44 @@ try_again: st->ss->free_super(st); if (auto_assem) goto try_again; - return 1; + goto out; } /* just incase it was started but has no content */ ioctl(mdfd, STOP_ARRAY, NULL); } -#ifndef MDASSEMBLE if (content != &info) { /* This is a member of a container. Try starting the array. */ int err; err = assemble_container_content(st, mdfd, content, c, - chosen_name); + chosen_name, NULL); close(mdfd); return err; } -#endif + /* Ok, no bad inconsistancy, we can try updating etc */ devices = xcalloc(num_devs, sizeof(*devices)); devmap = xcalloc(num_devs, content->array.raid_disks); - devcnt = load_devices(devices, devmap, ident, st, devlist, + devcnt = load_devices(devices, devmap, ident, &st, devlist, c, content, mdfd, mddev, &most_recent, &bestcnt, &best, inargv); - if (devcnt < 0) - return 1; + if (devcnt < 0) { + mdfd = -3; + /* + * devices is already freed in load_devices, so set devices + * to NULL to avoid double free devices. + */ + devices = NULL; + goto out; + } if (devcnt == 0) { pr_err("no devices found for %s\n", mddev); if (st) st->ss->free_super(st); - close(mdfd); - free(devices); free(devmap); - return 1; + goto out; } if (c->update && strcmp(c->update, "byteorder")==0) @@ -1423,6 +1584,7 @@ try_again: okcnt = 0; replcnt = 0; sparecnt=0; + journalcnt=0; rebuilding_cnt=0; for (i=0; i< bestcnt; i++) { int j = best[i]; @@ -1433,8 +1595,13 @@ try_again: /* note: we ignore error flags in multipath arrays * as they don't make sense */ - if (content->array.level != LEVEL_MULTIPATH) - if (!(devices[j].i.disk.state & (1<array.level != LEVEL_MULTIPATH) { + if (devices[j].i.disk.state & (1<journal_device_required) + journalcnt++; + else /* unexpected journal, mark as faulty */ + devices[j].i.disk.state |= (1<force && content->array.raid_disks > 0 && devices[most_recent].i.disk.raid_disk >= 0 && devmap[j * content->array.raid_disks + devices[most_recent].i.disk.raid_disk] == 0) { @@ -1455,15 +1623,22 @@ try_again: best[i] = -1; continue; } + /* Require event counter to be same as, or just less than, + * most recent. If it is bigger, it must be a stray spare and + * should be ignored. + */ if (devices[j].i.events+event_margin >= - devices[most_recent].i.events) { + devices[most_recent].i.events && + devices[j].i.events <= + devices[most_recent].i.events + ) { devices[j].uptodate = 1; + if (devices[j].i.disk.state & (1<array.raid_disks * 2) { if (devices[j].i.recovery_start == MaxSector || (content->reshape_active && - ((i >= content->array.raid_disks - content->delta_disks) || - (i >= content->array.raid_disks - content->delta_disks - 1 - && content->array.level == 4)))) { + i >= content->array.raid_disks - content->delta_disks)) { if (!avail[i/2]) { okcnt++; avail[i/2]=1; @@ -1471,15 +1646,18 @@ try_again: replcnt++; } else rebuilding_cnt++; - } else + } else if (devices[j].i.disk.raid_disk != MD_DISK_ROLE_JOURNAL) sparecnt++; } } free(devmap); - if (c->force) - okcnt += force_array(content, devices, best, bestcnt, - avail, most_recent, st, c); - + if (c->force) { + int force_ok = force_array(content, devices, best, bestcnt, + avail, most_recent, st, c); + okcnt += force_ok; + if (force_ok) + was_forced = 1; + } /* Now we want to look at the superblock which the kernel will base things on * and compare the devices that we think are working with the devices that the * superblock thinks are working. @@ -1488,7 +1666,7 @@ try_again: */ chosen_drive = -1; st->ss->free_super(st); - for (i=0; chosen_drive < 0 && iss->load_super(st,fd, NULL)) { close(fd); pr_err("RAID superblock has disappeared from %s\n", devices[j].devname); - close(mdfd); - free(devices); - return 1; + goto out; } close(fd); } if (st->sb == NULL) { pr_err("No suitable drives found for %s\n", mddev); - close(mdfd); - free(devices); - return 1; + goto out; } st->ss->getinfo_super(st, content, NULL); -#ifndef MDASSEMBLE - sysfs_init(content, mdfd, 0); -#endif + if (sysfs_init(content, mdfd, NULL)) { + pr_err("Unable to initialize sysfs\n"); + goto out; + } + + /* after reload context, store journal_clean in context */ + content->journal_clean = journal_clean; for (i=0; i= content->array.raid_disks * 2) + if (j < 0) + continue; + if (devices[j].i.disk.raid_disk == MD_DISK_ROLE_JOURNAL) + desired_state = (1<= content->array.raid_disks * 2) desired_state = 0; else if (i & 1) desired_state = (1<ss->update_super(st, content, "force-array", devices[chosen_drive].devname, c->verbose, 0, NULL); + was_forced = 1; clean = 1; } @@ -1586,17 +1768,13 @@ try_again: if (fd < 0) { pr_err("Could not open %s for write - cannot Assemble array.\n", devices[chosen_drive].devname); - close(mdfd); - free(devices); - return 1; + goto out; } if (st->ss->store_super(st, fd)) { close(fd); pr_err("Could not re-write superblock on %s\n", devices[chosen_drive].devname); - close(mdfd); - free(devices); - return 1; + goto out; } if (c->verbose >= 0) pr_err("Marking array %s as 'clean'\n", @@ -1608,15 +1786,16 @@ try_again: * that was moved aside due to the reshape overwriting live data * The code of doing this lives in Grow.c */ -#ifndef MDASSEMBLE if (content->reshape_active && !(content->reshape_active & RESHAPE_NO_BACKUP)) { int err = 0; int *fdlist = xmalloc(sizeof(int)* bestcnt); if (c->verbose > 0) - pr_err(":%s has an active reshape - checking " - "if critical section needs to be restored\n", + pr_err("%s has an active reshape - checking if critical section needs to be restored\n", chosen_name); + if (!c->backup_file) + c->backup_file = locate_backup(content->sys_name); + enable_fds(bestcnt/2); for (i = 0; i < bestcnt/2; i++) { int j = best[i*2]; if (j >= 0) { @@ -1640,8 +1819,7 @@ try_again: c->backup_file, c->verbose > 0); if (err && c->invalid_backup) { if (c->verbose > 0) - pr_err("continuing" - " without restoring backup\n"); + pr_err("continuing without restoring backup\n"); err = 0; } } @@ -1654,29 +1832,35 @@ try_again: pr_err("Failed to restore critical section for reshape, sorry.\n"); if (c->backup_file == NULL) cont_err("Possibly you needed to specify the --backup-file\n"); - close(mdfd); - free(devices); - return err; + goto out; } } -#endif /* Almost ready to actually *do* something */ /* First, fill in the map, so that udev can find our name * as soon as we become active. */ - map_update(&map, fd2devnum(mdfd), content->text_version, + if (c->update && strcmp(c->update, "metadata")==0) { + content->array.major_version = 1; + content->array.minor_version = 0; + strcpy(content->text_version, "1.0"); + } + + map_update(&map, fd2devnm(mdfd), content->text_version, content->uuid, chosen_name); rv = start_array(mdfd, mddev, content, st, ident, best, bestcnt, chosen_drive, devices, okcnt, sparecnt, - rebuilding_cnt, + rebuilding_cnt, journalcnt, c, - clean, avail, start_partial_ok); + clean, avail, start_partial_ok, + pre_exist != NULL, + was_forced); if (rv == 1 && !pre_exist) ioctl(mdfd, STOP_ARRAY, NULL); free(devices); +out: map_unlock(&map); if (rv == 0) { wait_for(chosen_name, mdfd); @@ -1707,37 +1891,43 @@ try_again: usecs <<= 1; } } - } else + } else if (mdfd >= 0) close(mdfd); /* '2' means 'OK, but not started yet' */ + if (rv == -1) { + free(devices); + return 1; + } return rv == 2 ? 0 : rv; } -#ifndef MDASSEMBLE int assemble_container_content(struct supertype *st, int mdfd, struct mdinfo *content, struct context *c, - char *chosen_name) + char *chosen_name, int *result) { - struct mdinfo *dev, *sra; + struct mdinfo *dev, *sra, *dev2; int working = 0, preexist = 0; int expansion = 0; - struct map_ent *map = NULL; int old_raid_disks; int start_reshape; + char *avail; + int err; - sysfs_init(content, mdfd, 0); + if (sysfs_init(content, mdfd, NULL)) { + pr_err("Unable to initialize sysfs\n"); + return 1; + } - sra = sysfs_read(mdfd, 0, GET_VERSION); + sra = sysfs_read(mdfd, NULL, GET_VERSION|GET_DEVS); if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) { if (content->array.major_version == -1 && content->array.minor_version == -2 && c->readonly && content->text_version[0] == '/') content->text_version[0] = '-'; - if (sysfs_set_array(content, md_get_version(mdfd)) != 0) { - if (sra) - sysfs_free(sra); + if (sysfs_set_array(content, 9003) != 0) { + sysfs_free(sra); return 1; } } @@ -1755,10 +1945,27 @@ int assemble_container_content(struct supertype *st, int mdfd, if (st->ss->external && content->recovery_blocked && start_reshape) block_subarray(content); - if (sra) - sysfs_free(sra); + for (dev2 = sra->devs; dev2; dev2 = dev2->next) { + for (dev = content->devs; dev; dev = dev->next) + if (dev2->disk.major == dev->disk.major && + dev2->disk.minor == dev->disk.minor) + break; + if (dev) + continue; + /* Don't want this one any more */ + if (sysfs_set_str(sra, dev2, "slot", "none") < 0 && + errno == EBUSY) { + pr_err("Cannot remove old device %s: not updating %s\n", dev2->sys_name, sra->sys_name); + sysfs_free(sra); + return 1; + } + sysfs_set_str(sra, dev2, "state", "remove"); + } old_raid_disks = content->array.raid_disks - content->delta_disks; - for (dev = content->devs; dev; dev = dev->next) + avail = xcalloc(content->array.raid_disks, 1); + for (dev = content->devs; dev; dev = dev->next) { + if (dev->disk.raid_disk >= 0) + avail[dev->disk.raid_disk] = 1; if (sysfs_add_disk(content, dev, 1) == 0) { if (dev->disk.raid_disk >= old_raid_disks && content->reshape_active) @@ -1767,101 +1974,189 @@ int assemble_container_content(struct supertype *st, int mdfd, working++; } else if (errno == EEXIST) preexist++; - if (working + expansion == 0) + } + sysfs_free(sra); + if (working + expansion == 0 && c->runstop <= 0) { + free(avail); return 1;/* Nothing new, don't try to start */ - - map_update(&map, fd2devnum(mdfd), - content->text_version, + } + map_update(NULL, fd2devnm(mdfd), content->text_version, content->uuid, chosen_name); - if (c->runstop > 0 || - (working + preexist + expansion) >= - content->array.working_disks) { - int err; + if (content->consistency_policy == CONSISTENCY_POLICY_PPL && + st->ss->validate_ppl) { + content->array.state |= 1; + err = 0; - if (start_reshape) { - int spare = content->array.raid_disks + expansion; - if (restore_backup(st, content, - working, - spare, c->backup_file, c->verbose) == 1) - return 1; + for (dev = content->devs; dev; dev = dev->next) { + int dfd; + char *devpath; + int ret; - err = sysfs_set_str(content, NULL, - "array_state", "readonly"); - if (err) - return 1; + ret = st->ss->validate_ppl(st, content, dev); + if (ret == 0) + continue; - if (st->ss->external) { - if (!mdmon_running(st->container_dev)) - start_mdmon(st->container_dev); - ping_monitor_by_id(st->container_dev); - if (mdmon_running(st->container_dev) && - st->update_tail == NULL) - st->update_tail = &st->updates; + if (ret < 0) { + err = 1; + break; } - err = Grow_continue(mdfd, st, content, c->backup_file, - c->freeze_reshape); - } else switch(content->array.level) { - case LEVEL_LINEAR: - case LEVEL_MULTIPATH: - case 0: - err = sysfs_set_str(content, NULL, "array_state", - c->readonly ? "readonly" : "active"); + if (!c->force) { + pr_err("%s contains invalid PPL - consider --force or --update-subarray with --update=no-ppl\n", + chosen_name); + content->array.state &= ~1; + avail[dev->disk.raid_disk] = 0; break; - default: - err = sysfs_set_str(content, NULL, "array_state", - "readonly"); - /* start mdmon if needed. */ - if (!err) { - if (!mdmon_running(st->container_dev)) - start_mdmon(st->container_dev); - ping_monitor_by_id(st->container_dev); - } + } + + /* have --force - overwrite the invalid ppl */ + devpath = map_dev(dev->disk.major, dev->disk.minor, 0); + dfd = dev_open(devpath, O_RDWR); + if (dfd < 0) { + pr_err("Failed to open %s\n", devpath); + err = 1; break; } - if (!err) - sysfs_set_safemode(content, content->safe_mode_delay); - /* Block subarray here if it is not reshaped now - * It has be blocked a little later to allow mdmon to switch in - * in to R/W state - */ - if (st->ss->external && content->recovery_blocked && - !start_reshape) - block_subarray(content); + err = st->ss->write_init_ppl(st, content, dfd); + close(dfd); - if (c->verbose >= 0) { if (err) - pr_err("array %s now has %d device%s", - chosen_name, working + preexist, - working + preexist == 1 ? "":"s"); - else - pr_err("Started %s with %d device%s", - chosen_name, working + preexist, - working + preexist == 1 ? "":"s"); + break; + } + + if (err) { + free(avail); + return err; + } + } + + if (enough(content->array.level, content->array.raid_disks, + content->array.layout, content->array.state & 1, avail) == 0) { + if (c->export && result) + *result |= INCR_NO; + else if (c->verbose >= 0) { + pr_err("%s assembled with %d device%s", + chosen_name, preexist + working, + preexist + working == 1 ? "":"s"); if (preexist) fprintf(stderr, " (%d new)", working); - if (expansion) - fprintf(stderr, " ( + %d for expansion)", - expansion); - fprintf(stderr, "\n"); + fprintf(stderr, " but not started\n"); } - if (!err) - wait_for(chosen_name, mdfd); - return err; - /* FIXME should have an O_EXCL and wait for read-auto */ - } else { - if (c->verbose >= 0) { + free(avail); + return 1; + } + free(avail); + + if (c->runstop <= 0 && + (working + preexist + expansion) < + content->array.working_disks) { + if (c->export && result) + *result |= INCR_UNSAFE; + else if (c->verbose >= 0) { pr_err("%s assembled with %d device%s", chosen_name, preexist + working, preexist + working == 1 ? "":"s"); if (preexist) fprintf(stderr, " (%d new)", working); - fprintf(stderr, " but not started\n"); + fprintf(stderr, " but not safe to start\n"); } return 1; } -} -#endif + + if (start_reshape) { + int spare = content->array.raid_disks + expansion; + if (restore_backup(st, content, + working, + spare, &c->backup_file, c->verbose) == 1) + return 1; + + if (content->reshape_progress == 0) { + /* If reshape progress is 0 - we are assembling the + * array that was stopped, before reshape has started. + * Array needs to be started as active, Grow_continue() + * will start the reshape. + */ + sysfs_set_num(content, NULL, "reshape_position", + MaxSector); + err = sysfs_set_str(content, NULL, + "array_state", "active"); + sysfs_set_num(content, NULL, "reshape_position", 0); + } else { + err = sysfs_set_str(content, NULL, + "array_state", "readonly"); + } + + if (err) + return 1; + + if (st->ss->external) { + if (!mdmon_running(st->container_devnm)) + start_mdmon(st->container_devnm); + ping_monitor(st->container_devnm); + if (mdmon_running(st->container_devnm) && + st->update_tail == NULL) + st->update_tail = &st->updates; + } + + err = Grow_continue(mdfd, st, content, c->backup_file, + 0, c->freeze_reshape); + } else switch(content->array.level) { + case LEVEL_LINEAR: + case LEVEL_MULTIPATH: + case 0: + err = sysfs_set_str(content, NULL, "array_state", + c->readonly ? "readonly" : "active"); + break; + default: + err = sysfs_set_str(content, NULL, "array_state", + "readonly"); + /* start mdmon if needed. */ + if (!err) { + if (!mdmon_running(st->container_devnm)) + start_mdmon(st->container_devnm); + ping_monitor(st->container_devnm); + } + break; + } + if (!err) + sysfs_set_safemode(content, content->safe_mode_delay); + + /* Block subarray here if it is not reshaped now + * It has be blocked a little later to allow mdmon to switch in + * in to R/W state + */ + if (st->ss->external && content->recovery_blocked && + !start_reshape) + block_subarray(content); + + if (c->export && result) { + if (err) + *result |= INCR_NO; + else + *result |= INCR_YES; + } else if (c->verbose >= 0) { + if (err) + pr_err("array %s now has %d device%s", + chosen_name, working + preexist, + working + preexist == 1 ? "":"s"); + else { + sysfs_rules_apply(chosen_name, content); + pr_err("Started %s with %d device%s", + chosen_name, working + preexist, + working + preexist == 1 ? "":"s"); + } + if (preexist) + fprintf(stderr, " (%d new)", working); + if (expansion) + fprintf(stderr, " ( + %d for expansion)", + expansion); + fprintf(stderr, "\n"); + } + if (!err) + wait_for(chosen_name, mdfd); + return err; + /* FIXME should have an O_EXCL and wait for read-auto */ +}