X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=Assemble.c;h=9f75c68c9d49b9b2a9a7daf8201e1a6de4a71657;hb=757e55435997e355ee9b03e5d913b5496a3c39a8;hp=05ace561fb507bb6a557b3d15e6d35f18517400b;hpb=56bbc588f7f0f3bdd3ec23f02109b427c1d3b8f1;p=thirdparty%2Fmdadm.git diff --git a/Assemble.c b/Assemble.c index 05ace561..9f75c68c 100644 --- a/Assemble.c +++ b/Assemble.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2001-2013 Neil Brown + * Copyright (C) 2001-2016 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -25,21 +25,27 @@ #include "mdadm.h" #include -static int name_matches(char *found, char *required, char *homehost) +static int name_matches(char *found, char *required, char *homehost, int require_homehost) { /* See if the name found matches the required name, possibly * prefixed with 'homehost' */ - char fnd[33]; + char *sep; + unsigned int l; - strncpy(fnd, found, 32); - fnd[32] = 0; if (strcmp(found, required)==0) return 1; - if (homehost) { - int l = strlen(homehost); - if (l < 32 && fnd[l] == ':' && - strcmp(fnd+l+1, required)==0) + sep = strchr(found, ':'); + if (!sep) + return 0; + l = sep - found; + if (strncmp(found, "any:", 4) == 0 || + (homehost && strcmp(homehost, "any") == 0) || + !require_homehost || + (homehost && strlen(homehost) == l && + strncmp(found, homehost, l) == 0)) { + /* matching homehost */ + if (strcmp(sep+1, required) == 0) return 1; } return 0; @@ -73,7 +79,7 @@ static int is_member_busy(char *metadata_version) static int ident_matches(struct mddev_ident *ident, struct mdinfo *content, struct supertype *tst, - char *homehost, + char *homehost, int require_homehost, char *update, char *devname) { @@ -85,7 +91,7 @@ static int ident_matches(struct mddev_ident *ident, return 0; } if (ident->name[0] && (!update || strcmp(update, "name")!= 0) && - name_matches(content->name, ident->name, homehost)==0) { + name_matches(content->name, ident->name, homehost, require_homehost)==0) { if (devname) pr_err("%s has wrong name.\n", devname); return 0; @@ -143,6 +149,7 @@ static int select_devices(struct mddev_dev *devlist, struct mdinfo *content = NULL; int report_mismatch = ((inargv && c->verbose >= 0) || c->verbose > 0); struct domainlist *domains = NULL; + dev_t rdev; tmpdev = devlist; num_devs = 0; while (tmpdev) { @@ -163,7 +170,6 @@ static int select_devices(struct mddev_dev *devlist, tmpdev = tmpdev ? tmpdev->next : NULL) { char *devname = tmpdev->devname; int dfd; - struct stat stb; struct supertype *tst; struct dev_policy *pol = NULL; int found_container = 0; @@ -198,14 +204,7 @@ static int select_devices(struct mddev_dev *devlist, pr_err("cannot open device %s: %s\n", devname, strerror(errno)); tmpdev->used = 2; - } else if (fstat(dfd, &stb)< 0) { - /* Impossible! */ - pr_err("fstat failed for %s: %s\n", - devname, strerror(errno)); - tmpdev->used = 2; - } else if ((stb.st_mode & S_IFMT) != S_IFBLK) { - pr_err("%s is not a block device.\n", - devname); + } else if (!fstat_is_blkdev(dfd, devname, &rdev)) { tmpdev->used = 2; } else if (must_be_container(dfd)) { if (st) { @@ -216,25 +215,23 @@ static int select_devices(struct mddev_dev *devlist, pr_err("%s is a container, but we are looking for components\n", devname); tmpdev->used = 2; -#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) } if (!tst && (tst = super_by_fd(dfd, NULL)) == NULL) { if (report_mismatch) pr_err("not a recognisable container: %s\n", devname); tmpdev->used = 2; -#endif - } else if (!tst->ss->load_container - || tst->ss->load_container(tst, dfd, NULL)) { + } else if (!tst->ss->load_container || + tst->ss->load_container(tst, dfd, NULL)) { if (report_mismatch) pr_err("no correct container type: %s\n", devname); tmpdev->used = 2; } else if (auto_assem && - !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)), + !conf_test_metadata(tst->ss->name, + (pol = devid_policy(rdev)), tst->ss->match_home(tst, c->homehost) == 1)) { if (report_mismatch) - pr_err("%s has metadata type %s for which " - "auto-assembly is disabled\n", + pr_err("%s has metadata type %s for which auto-assembly is disabled\n", devname, tst->ss->name); tmpdev->used = 2; } else @@ -245,7 +242,9 @@ static int select_devices(struct mddev_dev *devlist, pr_err("no recogniseable superblock on %s\n", devname); tmpdev->used = 2; - } else if (tst->ss->load_super(tst,dfd, NULL)) { + } else if ((tst->ignore_hw_compat = 0), + tst->ss->load_super(tst, dfd, + report_mismatch ? devname : NULL)) { if (report_mismatch) pr_err("no RAID superblock on %s\n", devname); @@ -256,11 +255,11 @@ static int select_devices(struct mddev_dev *devlist, tst->ss->name, devname); tmpdev->used = 2; } else if (auto_assem && st == NULL && - !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)), + !conf_test_metadata(tst->ss->name, + (pol = devid_policy(rdev)), tst->ss->match_home(tst, c->homehost) == 1)) { if (report_mismatch) - pr_err("%s has metadata type %s for which " - "auto-assembly is disabled\n", + pr_err("%s has metadata type %s for which auto-assembly is disabled\n", devname, tst->ss->name); tmpdev->used = 2; } @@ -282,6 +281,8 @@ static int select_devices(struct mddev_dev *devlist, st->ss->free_super(st); dev_policy_free(pol); domain_free(domains); + if (tst) + tst->ss->free_super(tst); return -1; } @@ -327,7 +328,8 @@ static int select_devices(struct mddev_dev *devlist, content = content->next) { if (!ident_matches(ident, content, tst, - c->homehost, c->update, + c->homehost, c->require_homehost, + c->update, report_mismatch ? devname : NULL)) /* message already printed */; else if (is_member_busy(content->text_version)) { @@ -350,8 +352,7 @@ static int select_devices(struct mddev_dev *devlist, st = tst; tst = NULL; if (!auto_assem && inargv && tmpdev->next != NULL) { - pr_err("%s is a container, but is not " - "only device given: confused and aborting\n", + pr_err("%s is a container, but is not only device given: confused and aborting\n", devname); st->ss->free_super(st); dev_policy_free(pol); @@ -366,36 +367,43 @@ static int select_devices(struct mddev_dev *devlist, tmpdev = NULL; goto loop; } else { - int rv = 0; - struct mddev_ident *match; - content = *contentp; tst->ss->getinfo_super(tst, content, NULL); if (!ident_matches(ident, content, tst, - c->homehost, c->update, + c->homehost, c->require_homehost, + c->update, report_mismatch ? devname : NULL)) goto loop; - match = conf_match(tst, content, devname, - report_mismatch ? c->verbose : -1, - &rv); - if (!match && rv == 2) - goto loop; - if (match && match->devname && - strcasecmp(match->devname, "") == 0) { - if (report_mismatch) - pr_err("%s is a member of an explicitly ignored array\n", - devname); - goto loop; - } - if (match && !ident_matches(match, content, tst, - c->homehost, c->update, - report_mismatch ? devname : NULL)) - /* Array exists in mdadm.conf but some - * details don't match, so reject it + if (auto_assem) { + /* Never auto-assemble things that conflict + * with mdadm.conf in some way */ - goto loop; + struct mddev_ident *match; + int rv = 0; + + match = conf_match(tst, content, devname, + report_mismatch ? c->verbose : -1, + &rv); + if (!match && rv == 2) + goto loop; + if (match && match->devname && + strcasecmp(match->devname, "") == 0) { + if (report_mismatch) + pr_err("%s is a member of an explicitly ignored array\n", + devname); + goto loop; + } + if (match && !ident_matches(match, content, tst, + c->homehost, c->require_homehost, + c->update, + report_mismatch ? devname : NULL)) + /* Array exists in mdadm.conf but some + * details don't match, so reject it + */ + goto loop; + } /* should be safe to try an exclusive open now, we * have rejected anything that some other mdadm might @@ -473,7 +481,7 @@ static int select_devices(struct mddev_dev *devlist, /* Collect domain information from members only */ if (tmpdev && tmpdev->used == 1) { if (!pol) - pol = devid_policy(stb.st_rdev); + pol = devid_policy(rdev); domain_merge(&domains, pol, tst?tst->ss->name:NULL); } dev_policy_free(pol); @@ -506,15 +514,12 @@ static int select_devices(struct mddev_dev *devlist, /* Now reject spares that don't match domains of identified members */ for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) { - struct stat stb; if (tmpdev->used != 3) continue; - if (stat(tmpdev->devname, &stb)< 0) { - pr_err("fstat failed for %s: %s\n", - tmpdev->devname, strerror(errno)); + if (!stat_is_blkdev(tmpdev->devname, &rdev)) { tmpdev->used = 2; } else { - struct dev_policy *pol = devid_policy(stb.st_rdev); + struct dev_policy *pol = devid_policy(rdev); int dt = domain_test(domains, pol, NULL); if (inargv && dt != 0) /* take this spare as domains match @@ -561,9 +566,7 @@ static int load_devices(struct devs *devices, char *devmap, struct mddev_dev *tmpdev; int devcnt = 0; int nextspare = 0; -#ifndef MDASSEMBLE int bitmap_done = 0; -#endif int most_recent = -1; int bestcnt = 0; int *best = *bestp; @@ -574,30 +577,30 @@ static int load_devices(struct devs *devices, char *devmap, struct stat stb; struct supertype *tst; int i; + int dfd; + int disk_state; if (tmpdev->used != 1) continue; /* looks like a good enough match to update the super block if needed */ -#ifndef MDASSEMBLE if (c->update) { - int dfd; /* prepare useful information in info structures */ struct stat stb2; int err; fstat(mdfd, &stb2); - if (strcmp(c->update, "uuid")==0 && - !ident->uuid_set) { - int rfd; - if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || - read(rfd, ident->uuid, 16) != 16) { - *(__u32*)(ident->uuid) = random(); - *(__u32*)(ident->uuid+1) = random(); - *(__u32*)(ident->uuid+2) = random(); - *(__u32*)(ident->uuid+3) = random(); - } - if (rfd >= 0) close(rfd); + if (strcmp(c->update, "uuid") == 0 && !ident->uuid_set) + random_uuid((__u8 *)ident->uuid); + + if (strcmp(c->update, "ppl") == 0 && + ident->bitmap_fd >= 0) { + pr_err("PPL is not compatible with bitmap\n"); + close(mdfd); + free(devices); + free(devmap); + return -1; } + dfd = dev_open(devname, tmpdev->disposition == 'I' ? O_RDWR : (O_RDWR|O_EXCL)); @@ -624,6 +627,19 @@ static int load_devices(struct devs *devices, char *devmap, if (strcmp(c->update, "byteorder") == 0) err = 0; + else if (strcmp(c->update, "home-cluster") == 0) { + tst->cluster_name = c->homecluster; + err = tst->ss->write_bitmap(tst, dfd, NameUpdate); + } else if (strcmp(c->update, "nodes") == 0) { + tst->nodes = c->nodes; + err = tst->ss->write_bitmap(tst, dfd, NodeNumUpdate); + } else if (strcmp(c->update, "revert-reshape") == 0 && + c->invalid_backup) + err = tst->ss->update_super(tst, content, + "revert-reshape-nobackup", + devname, c->verbose, + ident->uuid_set, + c->homehost); else err = tst->ss->update_super(tst, content, c->update, devname, c->verbose, @@ -631,8 +647,7 @@ static int load_devices(struct devs *devices, char *devmap, c->homehost); if (err < 0) { if (err == -1) - pr_err("--update=%s not understood" - " for %s metadata\n", + pr_err("--update=%s not understood for %s metadata\n", c->update, tst->ss->name); tst->ss->free_super(tst); free(tst); @@ -651,7 +666,6 @@ static int load_devices(struct devs *devices, char *devmap, if (tst->ss->store_super(tst, dfd)) pr_err("Could not re-write superblock on %s.\n", devname); - close(dfd); if (strcmp(c->update, "uuid")==0 && ident->bitmap_fd >= 0 && !bitmap_done) { @@ -662,12 +676,10 @@ static int load_devices(struct devs *devices, char *devmap, else bitmap_done = 1; } - } else -#endif - { - int dfd = dev_open(devname, - tmpdev->disposition == 'I' - ? O_RDWR : (O_RDWR|O_EXCL)); + } else { + dfd = dev_open(devname, + tmpdev->disposition == 'I' + ? O_RDWR : (O_RDWR|O_EXCL)); tst = dup_super(st); if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) { @@ -684,10 +696,10 @@ static int load_devices(struct devs *devices, char *devmap, return -1; } tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks); - close(dfd); } - stat(devname, &stb); + fstat(dfd, &stb); + close(dfd); if (c->verbose > 0) pr_err("%s is identified as a member of %s, slot %d%s.\n", @@ -700,7 +712,9 @@ static int load_devices(struct devs *devices, char *devmap, devices[devcnt].i.disk.major = major(stb.st_rdev); devices[devcnt].i.disk.minor = minor(stb.st_rdev); - if (devices[devcnt].i.disk.state == 6) { + disk_state = devices[devcnt].i.disk.state & ~((1< devices[most_recent].i.events) { @@ -718,7 +732,7 @@ static int load_devices(struct devs *devices, char *devmap, i = devcnt; else i = devices[devcnt].i.disk.raid_disk; - if (i+1 == 0) { + if (i+1 == 0 || i == MD_DISK_ROLE_JOURNAL) { if (nextspare < content->array.raid_disks*2) nextspare = content->array.raid_disks*2; i = nextspare++; @@ -747,22 +761,19 @@ static int load_devices(struct devs *devices, char *devmap, bestcnt = newbestcnt; } if (best[i] >=0 && - devices[best[i]].i.events - == devices[devcnt].i.events - && (devices[best[i]].i.disk.minor - != devices[devcnt].i.disk.minor) - && st->ss == &super0 - && content->array.level != LEVEL_MULTIPATH) { + devices[best[i]].i.events == + devices[devcnt].i.events && + (devices[best[i]].i.disk.minor != + devices[devcnt].i.disk.minor) && + st->ss == &super0 && + content->array.level != LEVEL_MULTIPATH) { /* two different devices with identical superblock. * Could be a mis-detection caused by overlapping * partitions. fail-safe. */ - pr_err("WARNING %s and %s appear" - " to have very similar superblocks.\n" - " If they are really different, " - "please --zero the superblock on one\n" - " If they are the same or overlap," - " please remove one from %s.\n", + pr_err("WARNING %s and %s appear to have very similar superblocks.\n" + " If they are really different, please --zero the superblock on one\n" + " If they are the same or overlap, please remove one from %s.\n", devices[best[i]].devname, devname, inargv ? "the list" : "the\n DEVICE list in mdadm.conf" @@ -773,10 +784,11 @@ static int load_devices(struct devs *devices, char *devmap, *stp = st; return -1; } - if (best[i] == -1 - || (devices[best[i]].i.events - < devices[devcnt].i.events)) + if (best[i] == -1 || (devices[best[i]].i.events + < devices[devcnt].i.events)) best[i] = devcnt; + else if (st->ss == &super_imsm) + best[i+1] = devcnt; } devcnt++; } @@ -798,14 +810,11 @@ static int force_array(struct mdinfo *content, int okcnt = 0; while (!enough(content->array.level, content->array.raid_disks, content->array.layout, 1, - avail) - || + avail) || (content->reshape_active && content->delta_disks > 0 && !enough(content->array.level, (content->array.raid_disks - content->delta_disks), - content->new_layout, 1, - avail) - )) { + content->new_layout, 1, avail))) { /* Choose the newest best drive which is * not up-to-date, update the superblock * and add it. @@ -820,12 +829,49 @@ static int force_array(struct mdinfo *content, i < content->array.raid_disks * 2 && i < bestcnt; i += 2) { int j = best[i]; - if (j>=0 && - !devices[j].uptodate && - devices[j].i.recovery_start == MaxSector && - (chosen_drive < 0 || + if (j < 0) + continue; + if (devices[j].uptodate) + continue; + if (devices[j].i.recovery_start != MaxSector) { + int delta; + if (!devices[j].i.reshape_active || + devices[j].i.delta_disks <= 0) + continue; + /* When increasing number of devices, an + * added device also appears to be + * recovering. It is safe to include it + * as long as it won't be a source of + * data. + * For now, just allow for last data + * devices in RAID4 or last devices in RAID4/5/6. + */ + delta = devices[j].i.delta_disks; + if (devices[j].i.array.level >= 4 && + devices[j].i.array.level <= 6 && + i/2 >= content->array.raid_disks - delta) + /* OK */; + else if (devices[j].i.array.level == 4 && + i/2 >= content->array.raid_disks - delta - 1) + /* OK */; + else + continue; + } else if (devices[j].i.reshape_active != + content->reshape_active || + (devices[j].i.reshape_active && + devices[j].i.reshape_progress != + content->reshape_progress)) + /* Here, it may be a source of data. If two + * devices claim different progresses, it + * means that reshape boundaries differ for + * their own devices. Kernel will only treat + * the first one as reshape progress and + * go on. It may cause disaster, so avoid it. + */ + continue; + if (chosen_drive < 0 || devices[j].i.events - > devices[chosen_drive].i.events)) + > devices[chosen_drive].i.events) chosen_drive = j; } if (chosen_drive < 0) @@ -874,7 +920,6 @@ static int force_array(struct mdinfo *content, avail[chosen_drive] = 1; okcnt++; tst->ss->free_super(tst); - /* If there are any other drives of the same vintage, * add them in as well. We can't lose and we might gain */ @@ -885,7 +930,13 @@ static int force_array(struct mdinfo *content, if (j >= 0 && !devices[j].uptodate && devices[j].i.recovery_start == MaxSector && - devices[j].i.events == current_events) { + devices[j].i.events == current_events && + ((!devices[j].i.reshape_active && + !content->reshape_active) || + (devices[j].i.reshape_active == + content->reshape_active && + devices[j].i.reshape_progress == + content->reshape_progress))) { chosen_drive = j; goto add_another; } @@ -905,6 +956,7 @@ static int start_array(int mdfd, unsigned int okcnt, unsigned int sparecnt, unsigned int rebuilding_cnt, + unsigned int journalcnt, struct context *c, int clean, char *avail, int start_partial_ok, @@ -916,6 +968,18 @@ static int start_array(int mdfd, int i; unsigned int req_cnt; + if (content->journal_device_required && (content->journal_clean == 0)) { + if (!c->force) { + pr_err("Not safe to assemble with missing or stale journal device, consider --force.\n"); + return 1; + } + pr_err("Journal is missing or stale, starting array read only.\n"); + c->readonly = 1; + } + + if (content->consistency_policy == CONSISTENCY_POLICY_PPL) + clean = 1; + rv = set_array_info(mdfd, st, content); if (rv && !err_ok) { pr_err("failed to set array info for %s: %s\n", @@ -944,7 +1008,7 @@ static int start_array(int mdfd, } /* First, add the raid disks, but add the chosen one last */ - for (i=0; i<= bestcnt; i++) { + for (i = 0; i <= bestcnt; i++) { int j; if (i < bestcnt) { j = best[i]; @@ -954,8 +1018,9 @@ static int start_array(int mdfd, j = chosen_drive; if (j >= 0 && !devices[j].included) { - int dfd = dev_open(devices[j].devname, - O_RDWR|O_EXCL); + int dfd; + + dfd = dev_open(devices[j].devname, O_RDWR|O_EXCL); if (dfd >= 0) { remove_partitions(dfd); close(dfd); @@ -963,44 +1028,64 @@ static int start_array(int mdfd, rv = add_disk(mdfd, st, content, &devices[j].i); if (rv) { - pr_err("failed to add " - "%s to %s: %s\n", - devices[j].devname, - mddev, + pr_err("failed to add %s to %s: %s\n", + devices[j].devname, mddev, strerror(errno)); - if (i < content->array.raid_disks * 2 - || i == bestcnt) + if (i < content->array.raid_disks * 2 || + i == bestcnt) okcnt--; else sparecnt--; - } else if (c->verbose > 0) + } else if (c->verbose > 0) { pr_err("added %s to %s as %d%s%s\n", devices[j].devname, mddev, devices[j].i.disk.raid_disk, devices[j].uptodate?"": " (possibly out of date)", - (devices[j].i.disk.state & (1<= 0) { if (c->verbose > 0) pr_err("%s is already in %s as %d\n", devices[j].devname, mddev, devices[j].i.disk.raid_disk); - } else if (c->verbose > 0 && i < content->array.raid_disks*2 - && (i&1) == 0) + } else if (c->verbose > 0 && + i < content->array.raid_disks * 2 && (i & 1) == 0) pr_err("no uptodate device for slot %d of %s\n", - i, mddev); + i/2, mddev); } if (content->array.level == LEVEL_CONTAINER) { if (c->verbose >= 0) { - pr_err("Container %s has been " - "assembled with %d drive%s", - mddev, okcnt+sparecnt, okcnt+sparecnt==1?"":"s"); + pr_err("Container %s has been assembled with %d drive%s", + mddev, okcnt + sparecnt + journalcnt, + okcnt + sparecnt + journalcnt == 1 ? "" : "s"); if (okcnt < (unsigned)content->array.raid_disks) fprintf(stderr, " (out of %d)", content->array.raid_disks); fprintf(stderr, "\n"); } + + if (st->ss->validate_container) { + struct mdinfo *devices_list; + struct mdinfo *info_devices; + unsigned int count; + + devices_list = NULL; + info_devices = xmalloc(sizeof(struct mdinfo) * + (okcnt + sparecnt)); + for (count = 0; count < okcnt + sparecnt; count++) { + info_devices[count] = devices[count].i; + info_devices[count].next = devices_list; + devices_list = &info_devices[count]; + } + if (st->ss->validate_container(devices_list)) + pr_err("Mismatch detected!\n"); + free(info_devices); + } + st->ss->free_super(st); sysfs_uevent(content, "change"); if (err_ok && okcnt < (unsigned)content->array.raid_disks) @@ -1017,17 +1102,16 @@ static int start_array(int mdfd, if (c->runstop == 1 || (c->runstop <= 0 && - ( enough(content->array.level, content->array.raid_disks, - content->array.layout, clean, avail) && - (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok) - ))) { + (enough(content->array.level, content->array.raid_disks, + content->array.layout, clean, avail) && + (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok)))) { /* This array is good-to-go. * If a reshape is in progress then we might need to * continue monitoring it. In that case we start * it read-only and let the grow code make it writable. */ int rv; -#ifndef MDASSEMBLE + if (content->reshape_active && !(content->reshape_active & RESHAPE_NO_BACKUP) && content->delta_disks <= 0) { @@ -1044,15 +1128,14 @@ static int start_array(int mdfd, "array_state", "readonly"); if (rv == 0) rv = Grow_continue(mdfd, st, content, - c->backup_file, + c->backup_file, 0, c->freeze_reshape); } else if (c->readonly && - sysfs_attribute_available( - content, NULL, "array_state")) { + sysfs_attribute_available(content, NULL, + "array_state")) { rv = sysfs_set_str(content, NULL, "array_state", "readonly"); } else -#endif rv = ioctl(mdfd, RUN_ARRAY, NULL); reopen_mddev(mdfd); /* drop O_EXCL */ if (rv == 0) { @@ -1060,11 +1143,19 @@ static int start_array(int mdfd, pr_err("%s has been started with %d drive%s", mddev, okcnt, okcnt==1?"":"s"); if (okcnt < (unsigned)content->array.raid_disks) - fprintf(stderr, " (out of %d)", content->array.raid_disks); + fprintf(stderr, " (out of %d)", + content->array.raid_disks); if (rebuilding_cnt) - fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt); + fprintf(stderr, "%s %d rebuilding", + sparecnt?",":" and", + rebuilding_cnt); if (sparecnt) - fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); + fprintf(stderr, " and %d spare%s", + sparecnt, + sparecnt == 1 ? "" : "s"); + if (content->journal_clean) + fprintf(stderr, " and %d journal", + journalcnt); fprintf(stderr, ".\n"); } if (content->reshape_active && @@ -1073,12 +1164,19 @@ static int start_array(int mdfd, /* might need to increase the size * of the stripe cache - default is 256 */ - if (256 < 4 * (content->array.chunk_size/4096)) { - struct mdinfo *sra = sysfs_read(mdfd, NULL, 0); + int chunk_size = content->array.chunk_size; + + if (content->reshape_active && + content->new_chunk > chunk_size) + chunk_size = content->new_chunk; + if (256 < 4 * ((chunk_size+4065)/4096)) { + struct mdinfo *sra; + + sra = sysfs_read(mdfd, NULL, 0); if (sra) sysfs_set_num(sra, NULL, "stripe_cache_size", - (4 * content->array.chunk_size / 4096) + 1); + (4 * chunk_size / 4096) + 1); sysfs_free(sra); } } @@ -1107,7 +1205,9 @@ static int start_array(int mdfd, if (content->array.level == 6 && okcnt + 1 == (unsigned)content->array.raid_disks && was_forced) { - struct mdinfo *sra = sysfs_read(mdfd, NULL, 0); + struct mdinfo *sra; + + sra = sysfs_read(mdfd, NULL, 0); if (sra) sysfs_set_str(sra, NULL, "sync_action", "repair"); @@ -1115,50 +1215,47 @@ static int start_array(int mdfd, } return 0; } - pr_err("failed to RUN_ARRAY %s: %s\n", - mddev, strerror(errno)); + pr_err("failed to RUN_ARRAY %s: %s\n", mddev, strerror(errno)); if (!enough(content->array.level, content->array.raid_disks, content->array.layout, 1, avail)) - pr_err("Not enough devices to " - "start the array.\n"); + pr_err("Not enough devices to start the array.\n"); else if (!enough(content->array.level, content->array.raid_disks, - content->array.layout, clean, - avail)) - pr_err("Not enough devices to " - "start the array while not clean " - "- consider --force.\n"); + content->array.layout, clean, avail)) + pr_err("Not enough devices to start the array while not clean - consider --force.\n"); return 1; } if (c->runstop == -1) { pr_err("%s assembled from %d drive%s", - mddev, okcnt, okcnt==1?"":"s"); + mddev, okcnt, okcnt == 1 ? "" : "s"); if (okcnt != (unsigned)content->array.raid_disks) - fprintf(stderr, " (out of %d)", content->array.raid_disks); + fprintf(stderr, " (out of %d)", + content->array.raid_disks); fprintf(stderr, ", but not started.\n"); return 2; } if (c->verbose >= -1) { - pr_err("%s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s"); + pr_err("%s assembled from %d drive%s", + mddev, okcnt, okcnt == 1 ? "" : "s"); if (rebuilding_cnt) - fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt); + fprintf(stderr, "%s %d rebuilding", + sparecnt ? "," : " and", rebuilding_cnt); if (sparecnt) - fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); + fprintf(stderr, " and %d spare%s", sparecnt, + sparecnt == 1 ? "" : "s"); if (!enough(content->array.level, content->array.raid_disks, content->array.layout, 1, avail)) fprintf(stderr, " - not enough to start the array.\n"); else if (!enough(content->array.level, content->array.raid_disks, - content->array.layout, clean, - avail)) - fprintf(stderr, " - not enough to start the " - "array while not clean - consider " - "--force.\n"); + content->array.layout, clean, avail)) + fprintf(stderr, " - not enough to start the array while not clean - consider --force.\n"); else { if (req_cnt == (unsigned)content->array.raid_disks) - fprintf(stderr, " - need all %d to start it", req_cnt); + fprintf(stderr, " - need all %d to start it", + req_cnt); else fprintf(stderr, " - need %d to start", req_cnt); fprintf(stderr, " (use --run to insist).\n"); @@ -1226,18 +1323,19 @@ int Assemble(struct supertype *st, char *mddev, * START_ARRAY * */ - int rv; - int mdfd; + int rv = -1; + int mdfd = -1; int clean; int auto_assem = (mddev == NULL && !ident->uuid_set && - ident->super_minor == UnSet && ident->name[0] == 0 - && (ident->container == NULL || ident->member == NULL)); - struct devs *devices; + ident->super_minor == UnSet && ident->name[0] == 0 && + (ident->container == NULL || ident->member == NULL)); + struct devs *devices = NULL; char *devmap; int *best = NULL; /* indexed by raid_disk */ int bestcnt = 0; int devcnt; - unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt; + unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt, journalcnt; + int journal_clean = 0; int i; int was_forced = 0; int most_recent = 0; @@ -1293,7 +1391,7 @@ try_again: mddev ? mddev : "further assembly"); content = &info; - if (st) + if (st && c->force) st->ignore_hw_compat = 1; num_devs = select_devices(devlist, ident, &st, &content, c, inargv, auto_assem); @@ -1317,7 +1415,10 @@ try_again: */ if (map_lock(&map)) pr_err("failed to get exclusive lock on mapfile - continue anyway...\n"); - mp = map_by_uuid(&map, content->uuid); + if (c->update && strcmp(c->update,"uuid") == 0) + mp = NULL; + else + mp = map_by_uuid(&map, content->uuid); if (mp) { struct mdinfo *dv; /* array already exists. */ @@ -1326,7 +1427,7 @@ try_again: pr_err("Found some drive for an array that is already active: %s\n", mp->path); pr_err("giving up.\n"); - return 1; + goto out; } for (dv = pre_exist->devs; dv; dv = dv->next) { /* We want to add this device to our list, @@ -1393,23 +1494,15 @@ try_again: name = strchr(name, ':')+1; mdfd = create_mddev(mddev, name, ident->autof, trustworthy, - chosen_name); + chosen_name, 0); } if (mdfd < 0) { st->ss->free_super(st); if (auto_assem) goto try_again; - return 1; + goto out; } mddev = chosen_name; - if (get_linux_version() < 2004000 || - md_get_version(mdfd) < 9000) { - pr_err("Assemble requires Linux 2.4 or later, and\n" - " md driver version 0.90.0 or later.\n" - " Upgrade your kernel or try --build\n"); - close(mdfd); - return 1; - } if (pre_exist == NULL) { if (mddev_busy(fd2devnm(mdfd))) { pr_err("%s already active, cannot restart it!\n", @@ -1426,13 +1519,12 @@ try_again: st->ss->free_super(st); if (auto_assem) goto try_again; - return 1; + goto out; } /* just incase it was started but has no content */ ioctl(mdfd, STOP_ARRAY, NULL); } -#ifndef MDASSEMBLE if (content != &info) { /* This is a member of a container. Try starting the array. */ int err; @@ -1441,25 +1533,30 @@ try_again: close(mdfd); return err; } -#endif + /* Ok, no bad inconsistancy, we can try updating etc */ devices = xcalloc(num_devs, sizeof(*devices)); devmap = xcalloc(num_devs, content->array.raid_disks); devcnt = load_devices(devices, devmap, ident, &st, devlist, c, content, mdfd, mddev, &most_recent, &bestcnt, &best, inargv); - if (devcnt < 0) - return 1; + if (devcnt < 0) { + mdfd = -3; + /* + * devices is already freed in load_devices, so set devices + * to NULL to avoid double free devices. + */ + devices = NULL; + goto out; + } if (devcnt == 0) { pr_err("no devices found for %s\n", mddev); if (st) st->ss->free_super(st); - close(mdfd); - free(devices); free(devmap); - return 1; + goto out; } if (c->update && strcmp(c->update, "byteorder")==0) @@ -1475,6 +1572,7 @@ try_again: okcnt = 0; replcnt = 0; sparecnt=0; + journalcnt=0; rebuilding_cnt=0; for (i=0; i< bestcnt; i++) { int j = best[i]; @@ -1485,8 +1583,13 @@ try_again: /* note: we ignore error flags in multipath arrays * as they don't make sense */ - if (content->array.level != LEVEL_MULTIPATH) - if (!(devices[j].i.disk.state & (1<array.level != LEVEL_MULTIPATH) { + if (devices[j].i.disk.state & (1<journal_device_required) + journalcnt++; + else /* unexpected journal, mark as faulty */ + devices[j].i.disk.state |= (1<array.raid_disks * 2) { if (devices[j].i.recovery_start == MaxSector || (content->reshape_active && - ((i >= content->array.raid_disks - content->delta_disks) || - (i >= content->array.raid_disks - content->delta_disks - 1 - && content->array.level == 4)))) { + i >= content->array.raid_disks - content->delta_disks)) { if (!avail[i/2]) { okcnt++; avail[i/2]=1; @@ -1530,7 +1634,7 @@ try_again: replcnt++; } else rebuilding_cnt++; - } else + } else if (devices[j].i.disk.raid_disk != MD_DISK_ROLE_JOURNAL) sparecnt++; } } @@ -1566,43 +1670,46 @@ try_again: : (O_RDONLY|O_EXCL)))< 0) { pr_err("Cannot open %s: %s\n", devices[j].devname, strerror(errno)); - close(mdfd); - free(devices); - return 1; + goto out; } if (st->ss->load_super(st,fd, NULL)) { close(fd); pr_err("RAID superblock has disappeared from %s\n", devices[j].devname); - close(mdfd); - free(devices); - return 1; + goto out; } close(fd); } if (st->sb == NULL) { pr_err("No suitable drives found for %s\n", mddev); - close(mdfd); - free(devices); - return 1; + goto out; } st->ss->getinfo_super(st, content, NULL); -#ifndef MDASSEMBLE - sysfs_init(content, mdfd, NULL); -#endif + if (sysfs_init(content, mdfd, NULL)) { + pr_err("Unable to initialize sysfs\n"); + goto out; + } + + /* after reload context, store journal_clean in context */ + content->journal_clean = journal_clean; for (i=0; i= content->array.raid_disks * 2) + if (j < 0) + continue; + if (devices[j].i.disk.raid_disk == MD_DISK_ROLE_JOURNAL) + desired_state = (1<= content->array.raid_disks * 2) desired_state = 0; else if (i & 1) desired_state = (1<ss->store_super(st, fd)) { close(fd); pr_err("Could not re-write superblock on %s\n", devices[chosen_drive].devname); - close(mdfd); - free(devices); - return 1; + goto out; } if (c->verbose >= 0) pr_err("Marking array %s as 'clean'\n", @@ -1671,15 +1774,15 @@ try_again: * that was moved aside due to the reshape overwriting live data * The code of doing this lives in Grow.c */ -#ifndef MDASSEMBLE if (content->reshape_active && !(content->reshape_active & RESHAPE_NO_BACKUP)) { int err = 0; int *fdlist = xmalloc(sizeof(int)* bestcnt); if (c->verbose > 0) - pr_err(":%s has an active reshape - checking " - "if critical section needs to be restored\n", + pr_err("%s has an active reshape - checking if critical section needs to be restored\n", chosen_name); + if (!c->backup_file) + c->backup_file = locate_backup(content->sys_name); enable_fds(bestcnt/2); for (i = 0; i < bestcnt/2; i++) { int j = best[i*2]; @@ -1704,8 +1807,7 @@ try_again: c->backup_file, c->verbose > 0); if (err && c->invalid_backup) { if (c->verbose > 0) - pr_err("continuing" - " without restoring backup\n"); + pr_err("continuing without restoring backup\n"); err = 0; } } @@ -1718,12 +1820,9 @@ try_again: pr_err("Failed to restore critical section for reshape, sorry.\n"); if (c->backup_file == NULL) cont_err("Possibly you needed to specify the --backup-file\n"); - close(mdfd); - free(devices); - return err; + goto out; } } -#endif /* Almost ready to actually *do* something */ /* First, fill in the map, so that udev can find our name @@ -1741,7 +1840,7 @@ try_again: rv = start_array(mdfd, mddev, content, st, ident, best, bestcnt, chosen_drive, devices, okcnt, sparecnt, - rebuilding_cnt, + rebuilding_cnt, journalcnt, c, clean, avail, start_partial_ok, pre_exist != NULL, @@ -1749,6 +1848,7 @@ try_again: if (rv == 1 && !pre_exist) ioctl(mdfd, STOP_ARRAY, NULL); free(devices); +out: map_unlock(&map); if (rv == 0) { wait_for(chosen_name, mdfd); @@ -1779,39 +1879,43 @@ try_again: usecs <<= 1; } } - } else + } else if (mdfd >= 0) close(mdfd); /* '2' means 'OK, but not started yet' */ + if (rv == -1) { + free(devices); + return 1; + } return rv == 2 ? 0 : rv; } -#ifndef MDASSEMBLE int assemble_container_content(struct supertype *st, int mdfd, struct mdinfo *content, struct context *c, char *chosen_name, int *result) { - struct mdinfo *dev, *sra; + struct mdinfo *dev, *sra, *dev2; int working = 0, preexist = 0; int expansion = 0; - struct map_ent *map = NULL; int old_raid_disks; int start_reshape; - char *avail = NULL; + char *avail; int err; - sysfs_init(content, mdfd, NULL); + if (sysfs_init(content, mdfd, NULL)) { + pr_err("Unable to initialize sysfs\n"); + return 1; + } - sra = sysfs_read(mdfd, NULL, GET_VERSION); + sra = sysfs_read(mdfd, NULL, GET_VERSION|GET_DEVS); if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) { if (content->array.major_version == -1 && content->array.minor_version == -2 && c->readonly && content->text_version[0] == '/') content->text_version[0] = '-'; - if (sysfs_set_array(content, md_get_version(mdfd)) != 0) { - if (sra) - sysfs_free(sra); + if (sysfs_set_array(content, 9003) != 0) { + sysfs_free(sra); return 1; } } @@ -1829,8 +1933,22 @@ int assemble_container_content(struct supertype *st, int mdfd, if (st->ss->external && content->recovery_blocked && start_reshape) block_subarray(content); - if (sra) - sysfs_free(sra); + for (dev2 = sra->devs; dev2; dev2 = dev2->next) { + for (dev = content->devs; dev; dev = dev->next) + if (dev2->disk.major == dev->disk.major && + dev2->disk.minor == dev->disk.minor) + break; + if (dev) + continue; + /* Don't want this one any more */ + if (sysfs_set_str(sra, dev2, "slot", "none") < 0 && + errno == EBUSY) { + pr_err("Cannot remove old device %s: not updating %s\n", dev2->sys_name, sra->sys_name); + sysfs_free(sra); + return 1; + } + sysfs_set_str(sra, dev2, "state", "remove"); + } old_raid_disks = content->array.raid_disks - content->delta_disks; avail = xcalloc(content->array.raid_disks, 1); for (dev = content->devs; dev; dev = dev->next) { @@ -1845,14 +1963,62 @@ int assemble_container_content(struct supertype *st, int mdfd, } else if (errno == EEXIST) preexist++; } + sysfs_free(sra); if (working + expansion == 0 && c->runstop <= 0) { free(avail); return 1;/* Nothing new, don't try to start */ } - map_update(&map, fd2devnm(mdfd), - content->text_version, + map_update(NULL, fd2devnm(mdfd), content->text_version, content->uuid, chosen_name); + if (content->consistency_policy == CONSISTENCY_POLICY_PPL && + st->ss->validate_ppl) { + content->array.state |= 1; + err = 0; + + for (dev = content->devs; dev; dev = dev->next) { + int dfd; + char *devpath; + int ret; + + ret = st->ss->validate_ppl(st, content, dev); + if (ret == 0) + continue; + + if (ret < 0) { + err = 1; + break; + } + + if (!c->force) { + pr_err("%s contains invalid PPL - consider --force or --update-subarray with --update=no-ppl\n", + chosen_name); + content->array.state &= ~1; + avail[dev->disk.raid_disk] = 0; + break; + } + + /* have --force - overwrite the invalid ppl */ + devpath = map_dev(dev->disk.major, dev->disk.minor, 0); + dfd = dev_open(devpath, O_RDWR); + if (dfd < 0) { + pr_err("Failed to open %s\n", devpath); + err = 1; + break; + } + + err = st->ss->write_init_ppl(st, content, dfd); + close(dfd); + + if (err) + break; + } + + if (err) { + free(avail); + return err; + } + } if (enough(content->array.level, content->array.raid_disks, content->array.layout, content->array.state & 1, avail) == 0) { @@ -1892,7 +2058,7 @@ int assemble_container_content(struct supertype *st, int mdfd, int spare = content->array.raid_disks + expansion; if (restore_backup(st, content, working, - spare, c->backup_file, c->verbose) == 1) + spare, &c->backup_file, c->verbose) == 1) return 1; err = sysfs_set_str(content, NULL, @@ -1910,7 +2076,7 @@ int assemble_container_content(struct supertype *st, int mdfd, } err = Grow_continue(mdfd, st, content, c->backup_file, - c->freeze_reshape); + 0, c->freeze_reshape); } else switch(content->array.level) { case LEVEL_LINEAR: case LEVEL_MULTIPATH: @@ -1966,4 +2132,3 @@ int assemble_container_content(struct supertype *st, int mdfd, return err; /* FIXME should have an O_EXCL and wait for read-auto */ } -#endif