X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=Assemble.c;h=6a6a56bfb8b9311197a96c0f7e5c98ba3bb7301e;hb=d97572f5a59ca1ddde9971a79d47c9ea4db5891b;hp=dab4e6b825de0215d93cbea942d4e58cfff3fb59;hpb=8cde842b189368d7c27923497e5d6be9b35b241b;p=thirdparty%2Fmdadm.git diff --git a/Assemble.c b/Assemble.c index dab4e6b8..6a6a56bf 100644 --- a/Assemble.c +++ b/Assemble.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2001-2012 Neil Brown + * Copyright (C) 2001-2016 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -25,21 +25,27 @@ #include "mdadm.h" #include -static int name_matches(char *found, char *required, char *homehost) +static int name_matches(char *found, char *required, char *homehost, int require_homehost) { /* See if the name found matches the required name, possibly * prefixed with 'homehost' */ - char fnd[33]; + char *sep; + unsigned int l; - strncpy(fnd, found, 32); - fnd[32] = 0; if (strcmp(found, required)==0) return 1; - if (homehost) { - int l = strlen(homehost); - if (l < 32 && fnd[l] == ':' && - strcmp(fnd+l+1, required)==0) + sep = strchr(found, ':'); + if (!sep) + return 0; + l = sep - found; + if (strncmp(found, "any:", 4) == 0 || + (homehost && strcmp(homehost, "any") == 0) || + !require_homehost || + (homehost && strlen(homehost) == l && + strncmp(found, homehost, l) == 0)) { + /* matching homehost */ + if (strcmp(sep+1, required) == 0) return 1; } return 0; @@ -48,7 +54,7 @@ static int name_matches(char *found, char *required, char *homehost) static int is_member_busy(char *metadata_version) { /* check if the given member array is active */ - struct mdstat_ent *mdstat = mdstat_read(1, 0); + struct mdstat_ent *mdstat = mdstat_read(0, 0); struct mdstat_ent *ent; int busy = 0; @@ -73,7 +79,7 @@ static int is_member_busy(char *metadata_version) static int ident_matches(struct mddev_ident *ident, struct mdinfo *content, struct supertype *tst, - char *homehost, + char *homehost, int require_homehost, char *update, char *devname) { @@ -85,7 +91,7 @@ static int ident_matches(struct mddev_ident *ident, return 0; } if (ident->name[0] && (!update || strcmp(update, "name")!= 0) && - name_matches(content->name, ident->name, homehost)==0) { + name_matches(content->name, ident->name, homehost, require_homehost)==0) { if (devname) pr_err("%s has wrong name.\n", devname); return 0; @@ -105,6 +111,7 @@ static int ident_matches(struct mddev_ident *ident, return 0; } if (ident->raid_disks != UnSet && + content->array.raid_disks != 0 && /* metadata doesn't know how many to expect */ ident->raid_disks!= content->array.raid_disks) { if (devname) pr_err("%s requires wrong number of drives.\n", @@ -170,8 +177,20 @@ static int select_devices(struct mddev_dev *devlist, if (tmpdev->used > 1) continue; - if (ident->devices && - !match_oneof(ident->devices, devname)) { + if (ident->container) { + if (ident->container[0] == '/' && + !same_dev(ident->container, devname)) { + if (report_mismatch) + pr_err("%s is not the container required (%s)\n", + devname, ident->container); + continue; + } + } else if (ident->devices && + !match_oneof(ident->devices, devname)) { + /* Note that we ignore the "device=" identifier if a + * "container=" is given. Checking both is unnecessarily + * complicated. + */ if (report_mismatch) pr_err("%s is not one of %s\n", devname, ident->devices); continue; @@ -220,8 +239,7 @@ static int select_devices(struct mddev_dev *devlist, !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)), tst->ss->match_home(tst, c->homehost) == 1)) { if (report_mismatch) - pr_err("%s has metadata type %s for which " - "auto-assembly is disabled\n", + pr_err("%s has metadata type %s for which auto-assembly is disabled\n", devname, tst->ss->name); tmpdev->used = 2; } else @@ -232,7 +250,9 @@ static int select_devices(struct mddev_dev *devlist, pr_err("no recogniseable superblock on %s\n", devname); tmpdev->used = 2; - } else if (tst->ss->load_super(tst,dfd, NULL)) { + } else if ((tst->ignore_hw_compat = 0), + tst->ss->load_super(tst, dfd, + report_mismatch ? devname : NULL)) { if (report_mismatch) pr_err("no RAID superblock on %s\n", devname); @@ -246,8 +266,7 @@ static int select_devices(struct mddev_dev *devlist, !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)), tst->ss->match_home(tst, c->homehost) == 1)) { if (report_mismatch) - pr_err("%s has metadata type %s for which " - "auto-assembly is disabled\n", + pr_err("%s has metadata type %s for which auto-assembly is disabled\n", devname, tst->ss->name); tmpdev->used = 2; } @@ -288,29 +307,20 @@ static int select_devices(struct mddev_dev *devlist, } close(dfd); - if (ident->container) { - if (ident->container[0] == '/' && - !same_dev(ident->container, devname)) { + if (ident->container && ident->container[0] != '/') { + /* we have a uuid */ + int uuid[4]; + + content = *contentp; + tst->ss->getinfo_super(tst, content, NULL); + + if (!parse_uuid(ident->container, uuid) || + !same_uuid(content->uuid, uuid, tst->ss->swapuuid)) { if (report_mismatch) - pr_err("%s is not the container required (%s)\n", - devname, ident->container); + pr_err("%s has wrong UUID to be required container\n", + devname); goto loop; } - if (ident->container[0] != '/') { - /* we have a uuid */ - int uuid[4]; - - content = *contentp; - tst->ss->getinfo_super(tst, content, NULL); - - if (!parse_uuid(ident->container, uuid) || - !same_uuid(content->uuid, uuid, tst->ss->swapuuid)) { - if (report_mismatch) - pr_err("%s has wrong UUID to be required container\n", - devname); - goto loop; - } - } } /* It is worth looking inside this container. */ @@ -323,7 +333,8 @@ static int select_devices(struct mddev_dev *devlist, content = content->next) { if (!ident_matches(ident, content, tst, - c->homehost, c->update, + c->homehost, c->require_homehost, + c->update, report_mismatch ? devname : NULL)) /* message already printed */; else if (is_member_busy(content->text_version)) { @@ -346,8 +357,7 @@ static int select_devices(struct mddev_dev *devlist, st = tst; tst = NULL; if (!auto_assem && inargv && tmpdev->next != NULL) { - pr_err("%s is a container, but is not " - "only device given: confused and aborting\n", + pr_err("%s is a container, but is not only device given: confused and aborting\n", devname); st->ss->free_super(st); dev_policy_free(pol); @@ -362,36 +372,43 @@ static int select_devices(struct mddev_dev *devlist, tmpdev = NULL; goto loop; } else { - int rv = 0; - struct mddev_ident *match; - content = *contentp; tst->ss->getinfo_super(tst, content, NULL); if (!ident_matches(ident, content, tst, - c->homehost, c->update, + c->homehost, c->require_homehost, + c->update, report_mismatch ? devname : NULL)) goto loop; - match = conf_match(tst, content, devname, - report_mismatch ? c->verbose : -1, - &rv); - if (!match && rv == 2) - goto loop; - if (match && match->devname && - strcasecmp(match->devname, "") == 0) { - if (report_mismatch) - pr_err("%s is a member of an explicitly ignored array\n", - devname); - goto loop; - } - if (match && !ident_matches(match, content, tst, - c->homehost, c->update, - report_mismatch ? devname : NULL)) - /* Array exists in mdadm.conf but some - * details don't match, so reject it + if (auto_assem) { + /* Never auto-assemble things that conflict + * with mdadm.conf in some way */ - goto loop; + struct mddev_ident *match; + int rv = 0; + + match = conf_match(tst, content, devname, + report_mismatch ? c->verbose : -1, + &rv); + if (!match && rv == 2) + goto loop; + if (match && match->devname && + strcasecmp(match->devname, "") == 0) { + if (report_mismatch) + pr_err("%s is a member of an explicitly ignored array\n", + devname); + goto loop; + } + if (match && !ident_matches(match, content, tst, + c->homehost, c->require_homehost, + c->update, + report_mismatch ? devname : NULL)) + /* Array exists in mdadm.conf but some + * details don't match, so reject it + */ + goto loop; + } /* should be safe to try an exclusive open now, we * have rejected anything that some other mdadm might @@ -547,7 +564,7 @@ struct devs { }; static int load_devices(struct devs *devices, char *devmap, - struct mddev_ident *ident, struct supertype *st, + struct mddev_ident *ident, struct supertype **stp, struct mddev_dev *devlist, struct context *c, struct mdinfo *content, int mdfd, char *mddev, @@ -560,39 +577,37 @@ static int load_devices(struct devs *devices, char *devmap, #ifndef MDASSEMBLE int bitmap_done = 0; #endif - int most_recent = 0; + int most_recent = -1; int bestcnt = 0; int *best = *bestp; + struct supertype *st = *stp; for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) { char *devname = tmpdev->devname; struct stat stb; + struct supertype *tst; int i; + int dfd; if (tmpdev->used != 1) continue; /* looks like a good enough match to update the super block if needed */ #ifndef MDASSEMBLE if (c->update) { - int dfd; /* prepare useful information in info structures */ struct stat stb2; - struct supertype *tst; int err; fstat(mdfd, &stb2); - if (strcmp(c->update, "uuid")==0 && - !ident->uuid_set) { - int rfd; - if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || - read(rfd, ident->uuid, 16) != 16) { - *(__u32*)(ident->uuid) = random(); - *(__u32*)(ident->uuid+1) = random(); - *(__u32*)(ident->uuid+2) = random(); - *(__u32*)(ident->uuid+3) = random(); - } - if (rfd >= 0) close(rfd); + if (strcmp(c->update, "uuid") == 0 && !ident->uuid_set) + random_uuid((__u8 *)ident->uuid); + + if (strcmp(c->update, "ppl") == 0 && + ident->bitmap_fd >= 0) { + pr_err("PPL is not compatible with bitmap\n"); + return -1; } + dfd = dev_open(devname, tmpdev->disposition == 'I' ? O_RDWR : (O_RDWR|O_EXCL)); @@ -606,6 +621,9 @@ static int load_devices(struct devs *devices, char *devmap, close(mdfd); free(devices); free(devmap); + tst->ss->free_super(tst); + free(tst); + *stp = st; return -1; } tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks); @@ -616,6 +634,19 @@ static int load_devices(struct devs *devices, char *devmap, if (strcmp(c->update, "byteorder") == 0) err = 0; + else if (strcmp(c->update, "home-cluster") == 0) { + tst->cluster_name = c->homecluster; + err = tst->ss->write_bitmap(tst, dfd, NameUpdate); + } else if (strcmp(c->update, "nodes") == 0) { + tst->nodes = c->nodes; + err = tst->ss->write_bitmap(tst, dfd, NodeNumUpdate); + } else if (strcmp(c->update, "revert-reshape") == 0 && + c->invalid_backup) + err = tst->ss->update_super(tst, content, + "revert-reshape-nobackup", + devname, c->verbose, + ident->uuid_set, + c->homehost); else err = tst->ss->update_super(tst, content, c->update, devname, c->verbose, @@ -623,8 +654,7 @@ static int load_devices(struct devs *devices, char *devmap, c->homehost); if (err < 0) { if (err == -1) - pr_err("--update=%s not understood" - " for %s metadata\n", + pr_err("--update=%s not understood for %s metadata\n", c->update, tst->ss->name); tst->ss->free_super(tst); free(tst); @@ -632,6 +662,7 @@ static int load_devices(struct devs *devices, char *devmap, close(dfd); free(devices); free(devmap); + *stp = st; return -1; } if (strcmp(c->update, "uuid")==0 && @@ -642,7 +673,6 @@ static int load_devices(struct devs *devices, char *devmap, if (tst->ss->store_super(tst, dfd)) pr_err("Could not re-write superblock on %s.\n", devname); - close(dfd); if (strcmp(c->update, "uuid")==0 && ident->bitmap_fd >= 0 && !bitmap_done) { @@ -653,15 +683,13 @@ static int load_devices(struct devs *devices, char *devmap, else bitmap_done = 1; } - tst->ss->free_super(tst); } else #endif { - struct supertype *tst = dup_super(st); - int dfd; dfd = dev_open(devname, tmpdev->disposition == 'I' ? O_RDWR : (O_RDWR|O_EXCL)); + tst = dup_super(st); if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) { pr_err("cannot re-read metadata from %s - aborting\n", @@ -671,14 +699,16 @@ static int load_devices(struct devs *devices, char *devmap, close(mdfd); free(devices); free(devmap); + tst->ss->free_super(tst); + free(tst); + *stp = st; return -1; } tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks); - tst->ss->free_super(tst); - close(dfd); } - stat(devname, &stb); + fstat(dfd, &stb); + close(dfd); if (c->verbose > 0) pr_err("%s is identified as a member of %s, slot %d%s.\n", @@ -691,17 +721,25 @@ static int load_devices(struct devs *devices, char *devmap, devices[devcnt].i.disk.major = major(stb.st_rdev); devices[devcnt].i.disk.minor = minor(stb.st_rdev); - if (devices[devcnt].i.events - > devices[most_recent].i.events && - devices[devcnt].i.disk.state == 6) + if (devices[devcnt].i.disk.state == 6) { + if (most_recent < 0 || + devices[devcnt].i.events + > devices[most_recent].i.events) { + struct supertype *tmp = tst; + tst = st; + st = tmp; most_recent = devcnt; + } + } + tst->ss->free_super(tst); + free(tst); if (content->array.level == LEVEL_MULTIPATH) /* with multipath, the raid_disk from the superblock is meaningless */ i = devcnt; else i = devices[devcnt].i.disk.raid_disk; - if (i+1 == 0) { + if (i+1 == 0 || i == MD_DISK_ROLE_JOURNAL) { if (nextspare < content->array.raid_disks*2) nextspare = content->array.raid_disks*2; i = nextspare++; @@ -740,12 +778,9 @@ static int load_devices(struct devs *devices, char *devmap, * Could be a mis-detection caused by overlapping * partitions. fail-safe. */ - pr_err("WARNING %s and %s appear" - " to have very similar superblocks.\n" - " If they are really different, " - "please --zero the superblock on one\n" - " If they are the same or overlap," - " please remove one from %s.\n", + pr_err("WARNING %s and %s appear to have very similar superblocks.\n" + " If they are really different, please --zero the superblock on one\n" + " If they are the same or overlap, please remove one from %s.\n", devices[best[i]].devname, devname, inargv ? "the list" : "the\n DEVICE list in mdadm.conf" @@ -753,6 +788,7 @@ static int load_devices(struct devs *devices, char *devmap, close(mdfd); free(devices); free(devmap); + *stp = st; return -1; } if (best[i] == -1 @@ -762,9 +798,11 @@ static int load_devices(struct devs *devices, char *devmap, } devcnt++; } - *most_recentp = most_recent; + if (most_recent >= 0) + *most_recentp = most_recent; *bestcntp = bestcnt; *bestp = best; + *stp = st; return devcnt; } @@ -796,14 +834,41 @@ static int force_array(struct mdinfo *content, int chosen_drive = -1; int i; - for (i = 0; i < content->array.raid_disks && i < bestcnt; i++) { + for (i = 0; + i < content->array.raid_disks * 2 && i < bestcnt; + i += 2) { int j = best[i]; - if (j>=0 && - !devices[j].uptodate && - devices[j].i.recovery_start == MaxSector && - (chosen_drive < 0 || + if (j < 0) + continue; + if (devices[j].uptodate) + continue; + if (devices[j].i.recovery_start != MaxSector) { + int delta; + if (!devices[j].i.reshape_active || + devices[j].i.delta_disks <= 0) + continue; + /* When increasing number of devices, an + * added device also appears to be + * recovering. It is safe to include it + * as long as it won't be a source of + * data. + * For now, just allow for last data + * devices in RAID4 or last devices in RAID4/5/6. + */ + delta = devices[j].i.delta_disks; + if (devices[j].i.array.level >= 4 && + devices[j].i.array.level <= 6 && + i/2 >= content->array.raid_disks - delta) + /* OK */; + else if (devices[j].i.array.level == 4 && + i/2 >= content->array.raid_disks - delta - 1) + /* OK */; + else + continue; + } + if (chosen_drive < 0 || devices[j].i.events - > devices[chosen_drive].i.events)) + > devices[chosen_drive].i.events) chosen_drive = j; } if (chosen_drive < 0) @@ -852,11 +917,12 @@ static int force_array(struct mdinfo *content, avail[chosen_drive] = 1; okcnt++; tst->ss->free_super(tst); - /* If there are any other drives of the same vintage, * add them in as well. We can't lose and we might gain */ - for (i = 0; i < content->array.raid_disks && i < bestcnt ; i++) { + for (i = 0; + i < content->array.raid_disks * 2 && i < bestcnt ; + i += 2) { int j = best[i]; if (j >= 0 && !devices[j].uptodate && @@ -881,9 +947,11 @@ static int start_array(int mdfd, unsigned int okcnt, unsigned int sparecnt, unsigned int rebuilding_cnt, + unsigned int journalcnt, struct context *c, int clean, char *avail, int start_partial_ok, + int err_ok, int was_forced ) { @@ -891,8 +959,20 @@ static int start_array(int mdfd, int i; unsigned int req_cnt; + if (content->journal_device_required && (content->journal_clean == 0)) { + if (!c->force) { + pr_err("Not safe to assemble with missing or stale journal device, consider --force.\n"); + return 1; + } + pr_err("Journal is missing or stale, starting array read only.\n"); + c->readonly = 1; + } + + if (content->consistency_policy == CONSISTENCY_POLICY_PPL) + clean = 1; + rv = set_array_info(mdfd, st, content); - if (rv) { + if (rv && !err_ok) { pr_err("failed to set array info for %s: %s\n", mddev, strerror(errno)); return 1; @@ -938,8 +1018,7 @@ static int start_array(int mdfd, rv = add_disk(mdfd, st, content, &devices[j].i); if (rv) { - pr_err("failed to add " - "%s to %s: %s\n", + pr_err("failed to add %s to %s: %s\n", devices[j].devname, mddev, strerror(errno)); @@ -963,21 +1042,41 @@ static int start_array(int mdfd, } else if (c->verbose > 0 && i < content->array.raid_disks*2 && (i&1) == 0) pr_err("no uptodate device for slot %d of %s\n", - i, mddev); + i/2, mddev); } if (content->array.level == LEVEL_CONTAINER) { if (c->verbose >= 0) { - pr_err("Container %s has been " - "assembled with %d drive%s", - mddev, okcnt+sparecnt, okcnt+sparecnt==1?"":"s"); + pr_err("Container %s has been assembled with %d drive%s", + mddev, okcnt+sparecnt+journalcnt, + okcnt+sparecnt+journalcnt==1?"":"s"); if (okcnt < (unsigned)content->array.raid_disks) fprintf(stderr, " (out of %d)", content->array.raid_disks); fprintf(stderr, "\n"); } + + if (st->ss->validate_container) { + struct mdinfo *devices_list; + struct mdinfo *info_devices = xmalloc(sizeof(struct mdinfo)*(okcnt+sparecnt)); + unsigned int count; + devices_list = NULL; + for (count = 0; count < okcnt+sparecnt; count++) { + info_devices[count] = devices[count].i; + info_devices[count].next = devices_list; + devices_list = &info_devices[count]; + } + if (st->ss->validate_container(devices_list)) + pr_err("Mismatch detected!\n"); + free(info_devices); + } + st->ss->free_super(st); sysfs_uevent(content, "change"); + if (err_ok && okcnt < (unsigned)content->array.raid_disks) + /* Was partial, is still partial, so signal an error + * to ensure we don't retry */ + return 1; return 0; } @@ -1002,11 +1101,20 @@ static int start_array(int mdfd, if (content->reshape_active && !(content->reshape_active & RESHAPE_NO_BACKUP) && content->delta_disks <= 0) { + if (!c->backup_file) { + pr_err("%s: Need a backup file to complete reshape of this array.\n", + mddev); + pr_err("Please provided one with \"--backup-file=...\"\n"); + if (c->update && + strcmp(c->update, "revert-reshape") == 0) + pr_err("(Don't specify --update=revert-reshape again, that part succeeded.)\n"); + return 1; + } rv = sysfs_set_str(content, NULL, "array_state", "readonly"); if (rv == 0) rv = Grow_continue(mdfd, st, content, - c->backup_file, + c->backup_file, 0, c->freeze_reshape); } else if (c->readonly && sysfs_attribute_available( @@ -1016,6 +1124,7 @@ static int start_array(int mdfd, } else #endif rv = ioctl(mdfd, RUN_ARRAY, NULL); + reopen_mddev(mdfd); /* drop O_EXCL */ if (rv == 0) { if (c->verbose >= 0) { pr_err("%s has been started with %d drive%s", @@ -1026,6 +1135,8 @@ static int start_array(int mdfd, fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt); if (sparecnt) fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); + if (content->journal_clean) + fprintf(stderr, " and %d journal", journalcnt); fprintf(stderr, ".\n"); } if (content->reshape_active && @@ -1034,12 +1145,16 @@ static int start_array(int mdfd, /* might need to increase the size * of the stripe cache - default is 256 */ - if (256 < 4 * (content->array.chunk_size/4096)) { + int chunk_size = content->array.chunk_size; + if (content->reshape_active && + content->new_chunk > chunk_size) + chunk_size = content->new_chunk; + if (256 < 4 * ((chunk_size+4065)/4096)) { struct mdinfo *sra = sysfs_read(mdfd, NULL, 0); if (sra) sysfs_set_num(sra, NULL, "stripe_cache_size", - (4 * content->array.chunk_size / 4096) + 1); + (4 * chunk_size / 4096) + 1); sysfs_free(sra); } } @@ -1065,8 +1180,6 @@ static int start_array(int mdfd, } } } - printf("l=%d o=%d r=%d w=%d\n",content->array.level, - okcnt, content->array.raid_disks, was_forced); if (content->array.level == 6 && okcnt + 1 == (unsigned)content->array.raid_disks && was_forced) { @@ -1083,15 +1196,12 @@ static int start_array(int mdfd, if (!enough(content->array.level, content->array.raid_disks, content->array.layout, 1, avail)) - pr_err("Not enough devices to " - "start the array.\n"); + pr_err("Not enough devices to start the array.\n"); else if (!enough(content->array.level, content->array.raid_disks, content->array.layout, clean, avail)) - pr_err("Not enough devices to " - "start the array while not clean " - "- consider --force.\n"); + pr_err("Not enough devices to start the array while not clean - consider --force.\n"); return 1; } @@ -1116,9 +1226,7 @@ static int start_array(int mdfd, content->array.raid_disks, content->array.layout, clean, avail)) - fprintf(stderr, " - not enough to start the " - "array while not clean - consider " - "--force.\n"); + fprintf(stderr, " - not enough to start the array while not clean - consider --force.\n"); else { if (req_cnt == (unsigned)content->array.raid_disks) fprintf(stderr, " - need all %d to start it", req_cnt); @@ -1200,7 +1308,8 @@ int Assemble(struct supertype *st, char *mddev, int *best = NULL; /* indexed by raid_disk */ int bestcnt = 0; int devcnt; - unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt; + unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt, journalcnt; + int journal_clean = 0; int i; int was_forced = 0; int most_recent = 0; @@ -1256,13 +1365,13 @@ try_again: mddev ? mddev : "further assembly"); content = &info; - if (st) + if (st && c->force) st->ignore_hw_compat = 1; num_devs = select_devices(devlist, ident, &st, &content, c, inargv, auto_assem); if (num_devs < 0) return 1; - + if (!st || !st->sb || !content) return 2; @@ -1280,7 +1389,10 @@ try_again: */ if (map_lock(&map)) pr_err("failed to get exclusive lock on mapfile - continue anyway...\n"); - mp = map_by_uuid(&map, content->uuid); + if (c->update && strcmp(c->update,"uuid") == 0) + mp = NULL; + else + mp = map_by_uuid(&map, content->uuid); if (mp) { struct mdinfo *dv; /* array already exists. */ @@ -1400,7 +1512,7 @@ try_again: /* This is a member of a container. Try starting the array. */ int err; err = assemble_container_content(st, mdfd, content, c, - chosen_name); + chosen_name, NULL); close(mdfd); return err; } @@ -1408,7 +1520,7 @@ try_again: /* Ok, no bad inconsistancy, we can try updating etc */ devices = xcalloc(num_devs, sizeof(*devices)); devmap = xcalloc(num_devs, content->array.raid_disks); - devcnt = load_devices(devices, devmap, ident, st, devlist, + devcnt = load_devices(devices, devmap, ident, &st, devlist, c, content, mdfd, mddev, &most_recent, &bestcnt, &best, inargv); if (devcnt < 0) @@ -1438,6 +1550,7 @@ try_again: okcnt = 0; replcnt = 0; sparecnt=0; + journalcnt=0; rebuilding_cnt=0; for (i=0; i< bestcnt; i++) { int j = best[i]; @@ -1448,8 +1561,13 @@ try_again: /* note: we ignore error flags in multipath arrays * as they don't make sense */ - if (content->array.level != LEVEL_MULTIPATH) - if (!(devices[j].i.disk.state & (1<array.level != LEVEL_MULTIPATH) { + if (devices[j].i.disk.state & (1<journal_device_required) + journalcnt++; + else /* unexpected journal, mark as faulty */ + devices[j].i.disk.state |= (1<force && content->array.raid_disks > 0 && devices[most_recent].i.disk.raid_disk >= 0 && devmap[j * content->array.raid_disks + devices[most_recent].i.disk.raid_disk] == 0) { @@ -1480,12 +1599,12 @@ try_again: devices[most_recent].i.events ) { devices[j].uptodate = 1; + if (devices[j].i.disk.state & (1<array.raid_disks * 2) { if (devices[j].i.recovery_start == MaxSector || (content->reshape_active && - ((i >= content->array.raid_disks - content->delta_disks) || - (i >= content->array.raid_disks - content->delta_disks - 1 - && content->array.level == 4)))) { + i >= content->array.raid_disks - content->delta_disks)) { if (!avail[i/2]) { okcnt++; avail[i/2]=1; @@ -1493,7 +1612,7 @@ try_again: replcnt++; } else rebuilding_cnt++; - } else + } else if (devices[j].i.disk.raid_disk != MD_DISK_ROLE_JOURNAL) sparecnt++; } } @@ -1513,7 +1632,7 @@ try_again: */ chosen_drive = -1; st->ss->free_super(st); - for (i=0; chosen_drive < 0 && ijournal_clean = journal_clean; for (i=0; i= content->array.raid_disks * 2) + if (devices[j].i.disk.raid_disk == MD_DISK_ROLE_JOURNAL) + desired_state = (1<= content->array.raid_disks * 2) desired_state = 0; else if (i & 1) desired_state = (1<verbose > 0) - pr_err(":%s has an active reshape - checking " - "if critical section needs to be restored\n", + pr_err("%s has an active reshape - checking if critical section needs to be restored\n", chosen_name); + if (!c->backup_file) + c->backup_file = locate_backup(content->sys_name); enable_fds(bestcnt/2); for (i = 0; i < bestcnt/2; i++) { int j = best[i*2]; @@ -1667,8 +1791,7 @@ try_again: c->backup_file, c->verbose > 0); if (err && c->invalid_backup) { if (c->verbose > 0) - pr_err("continuing" - " without restoring backup\n"); + pr_err("continuing without restoring backup\n"); err = 0; } } @@ -1704,9 +1827,11 @@ try_again: rv = start_array(mdfd, mddev, content, st, ident, best, bestcnt, chosen_drive, devices, okcnt, sparecnt, - rebuilding_cnt, + rebuilding_cnt, journalcnt, c, - clean, avail, start_partial_ok, was_forced); + clean, avail, start_partial_ok, + pre_exist != NULL, + was_forced); if (rv == 1 && !pre_exist) ioctl(mdfd, STOP_ARRAY, NULL); free(devices); @@ -1750,18 +1875,19 @@ try_again: #ifndef MDASSEMBLE int assemble_container_content(struct supertype *st, int mdfd, struct mdinfo *content, struct context *c, - char *chosen_name) + char *chosen_name, int *result) { - struct mdinfo *dev, *sra; + struct mdinfo *dev, *sra, *dev2; int working = 0, preexist = 0; int expansion = 0; - struct map_ent *map = NULL; int old_raid_disks; int start_reshape; + char *avail; + int err; sysfs_init(content, mdfd, NULL); - sra = sysfs_read(mdfd, NULL, GET_VERSION); + sra = sysfs_read(mdfd, NULL, GET_VERSION|GET_DEVS); if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) { if (content->array.major_version == -1 && content->array.minor_version == -2 && @@ -1769,8 +1895,7 @@ int assemble_container_content(struct supertype *st, int mdfd, content->text_version[0] == '/') content->text_version[0] = '-'; if (sysfs_set_array(content, md_get_version(mdfd)) != 0) { - if (sra) - sysfs_free(sra); + sysfs_free(sra); return 1; } } @@ -1788,10 +1913,27 @@ int assemble_container_content(struct supertype *st, int mdfd, if (st->ss->external && content->recovery_blocked && start_reshape) block_subarray(content); - if (sra) - sysfs_free(sra); + for (dev2 = sra->devs; dev2; dev2 = dev2->next) { + for (dev = content->devs; dev; dev = dev->next) + if (dev2->disk.major == dev->disk.major && + dev2->disk.minor == dev->disk.minor) + break; + if (dev) + continue; + /* Don't want this one any more */ + if (sysfs_set_str(sra, dev2, "slot", "none") < 0 && + errno == EBUSY) { + pr_err("Cannot remove old device %s: not updating %s\n", dev2->sys_name, sra->sys_name); + sysfs_free(sra); + return 1; + } + sysfs_set_str(sra, dev2, "state", "remove"); + } old_raid_disks = content->array.raid_disks - content->delta_disks; - for (dev = content->devs; dev; dev = dev->next) + avail = xcalloc(content->array.raid_disks, 1); + for (dev = content->devs; dev; dev = dev->next) { + if (dev->disk.raid_disk >= 0) + avail[dev->disk.raid_disk] = 1; if (sysfs_add_disk(content, dev, 1) == 0) { if (dev->disk.raid_disk >= old_raid_disks && content->reshape_active) @@ -1800,101 +1942,174 @@ int assemble_container_content(struct supertype *st, int mdfd, working++; } else if (errno == EEXIST) preexist++; - if (working + expansion == 0) + } + sysfs_free(sra); + if (working + expansion == 0 && c->runstop <= 0) { + free(avail); return 1;/* Nothing new, don't try to start */ - - map_update(&map, fd2devnm(mdfd), - content->text_version, + } + map_update(NULL, fd2devnm(mdfd), content->text_version, content->uuid, chosen_name); - if (c->runstop > 0 || - (working + preexist + expansion) >= - content->array.working_disks) { - int err; + if (content->consistency_policy == CONSISTENCY_POLICY_PPL && + st->ss->validate_ppl) { + content->array.state |= 1; + err = 0; - if (start_reshape) { - int spare = content->array.raid_disks + expansion; - if (restore_backup(st, content, - working, - spare, c->backup_file, c->verbose) == 1) - return 1; + for (dev = content->devs; dev; dev = dev->next) { + int dfd; + char *devpath; + int ret; - err = sysfs_set_str(content, NULL, - "array_state", "readonly"); - if (err) - return 1; + ret = st->ss->validate_ppl(st, content, dev); + if (ret == 0) + continue; - if (st->ss->external) { - if (!mdmon_running(st->container_devnm)) - start_mdmon(st->container_devnm); - ping_monitor(st->container_devnm); - if (mdmon_running(st->container_devnm) && - st->update_tail == NULL) - st->update_tail = &st->updates; + if (ret < 0) { + err = 1; + break; } - err = Grow_continue(mdfd, st, content, c->backup_file, - c->freeze_reshape); - } else switch(content->array.level) { - case LEVEL_LINEAR: - case LEVEL_MULTIPATH: - case 0: - err = sysfs_set_str(content, NULL, "array_state", - c->readonly ? "readonly" : "active"); + if (!c->force) { + pr_err("%s contains invalid PPL - consider --force or --update-subarray with --update=no-ppl\n", + chosen_name); + content->array.state &= ~1; + avail[dev->disk.raid_disk] = 0; break; - default: - err = sysfs_set_str(content, NULL, "array_state", - "readonly"); - /* start mdmon if needed. */ - if (!err) { - if (!mdmon_running(st->container_devnm)) - start_mdmon(st->container_devnm); - ping_monitor(st->container_devnm); - } + } + + /* have --force - overwrite the invalid ppl */ + devpath = map_dev(dev->disk.major, dev->disk.minor, 0); + dfd = dev_open(devpath, O_RDWR); + if (dfd < 0) { + pr_err("Failed to open %s\n", devpath); + err = 1; break; } - if (!err) - sysfs_set_safemode(content, content->safe_mode_delay); - /* Block subarray here if it is not reshaped now - * It has be blocked a little later to allow mdmon to switch in - * in to R/W state - */ - if (st->ss->external && content->recovery_blocked && - !start_reshape) - block_subarray(content); + err = st->ss->write_init_ppl(st, content, dfd); + close(dfd); - if (c->verbose >= 0) { if (err) - pr_err("array %s now has %d device%s", - chosen_name, working + preexist, - working + preexist == 1 ? "":"s"); - else - pr_err("Started %s with %d device%s", - chosen_name, working + preexist, - working + preexist == 1 ? "":"s"); + break; + } + + if (err) { + free(avail); + return err; + } + } + + if (enough(content->array.level, content->array.raid_disks, + content->array.layout, content->array.state & 1, avail) == 0) { + if (c->export && result) + *result |= INCR_NO; + else if (c->verbose >= 0) { + pr_err("%s assembled with %d device%s", + chosen_name, preexist + working, + preexist + working == 1 ? "":"s"); if (preexist) fprintf(stderr, " (%d new)", working); - if (expansion) - fprintf(stderr, " ( + %d for expansion)", - expansion); - fprintf(stderr, "\n"); + fprintf(stderr, " but not started\n"); } - if (!err) - wait_for(chosen_name, mdfd); - return err; - /* FIXME should have an O_EXCL and wait for read-auto */ - } else { - if (c->verbose >= 0) { + free(avail); + return 1; + } + free(avail); + + if (c->runstop <= 0 && + (working + preexist + expansion) < + content->array.working_disks) { + if (c->export && result) + *result |= INCR_UNSAFE; + else if (c->verbose >= 0) { pr_err("%s assembled with %d device%s", chosen_name, preexist + working, preexist + working == 1 ? "":"s"); if (preexist) fprintf(stderr, " (%d new)", working); - fprintf(stderr, " but not started\n"); + fprintf(stderr, " but not safe to start\n"); } return 1; } + + + if (start_reshape) { + int spare = content->array.raid_disks + expansion; + if (restore_backup(st, content, + working, + spare, &c->backup_file, c->verbose) == 1) + return 1; + + err = sysfs_set_str(content, NULL, + "array_state", "readonly"); + if (err) + return 1; + + if (st->ss->external) { + if (!mdmon_running(st->container_devnm)) + start_mdmon(st->container_devnm); + ping_monitor(st->container_devnm); + if (mdmon_running(st->container_devnm) && + st->update_tail == NULL) + st->update_tail = &st->updates; + } + + err = Grow_continue(mdfd, st, content, c->backup_file, + 0, c->freeze_reshape); + } else switch(content->array.level) { + case LEVEL_LINEAR: + case LEVEL_MULTIPATH: + case 0: + err = sysfs_set_str(content, NULL, "array_state", + c->readonly ? "readonly" : "active"); + break; + default: + err = sysfs_set_str(content, NULL, "array_state", + "readonly"); + /* start mdmon if needed. */ + if (!err) { + if (!mdmon_running(st->container_devnm)) + start_mdmon(st->container_devnm); + ping_monitor(st->container_devnm); + } + break; + } + if (!err) + sysfs_set_safemode(content, content->safe_mode_delay); + + /* Block subarray here if it is not reshaped now + * It has be blocked a little later to allow mdmon to switch in + * in to R/W state + */ + if (st->ss->external && content->recovery_blocked && + !start_reshape) + block_subarray(content); + + if (c->export && result) { + if (err) + *result |= INCR_NO; + else + *result |= INCR_YES; + } else if (c->verbose >= 0) { + if (err) + pr_err("array %s now has %d device%s", + chosen_name, working + preexist, + working + preexist == 1 ? "":"s"); + else + pr_err("Started %s with %d device%s", + chosen_name, working + preexist, + working + preexist == 1 ? "":"s"); + if (preexist) + fprintf(stderr, " (%d new)", working); + if (expansion) + fprintf(stderr, " ( + %d for expansion)", + expansion); + fprintf(stderr, "\n"); + } + if (!err) + wait_for(chosen_name, mdfd); + return err; + /* FIXME should have an O_EXCL and wait for read-auto */ } #endif -