X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=Assemble.c;h=c09842016c0afcbca48c2262da5f09b2f347e80c;hb=e97a7cd011345e5dead736de51b33968da49d876;hp=a57d384daeff06b77d75580f9ad86723233c26f4;hpb=06e293d0970e36b1ed049b9d3ccb21a870e9d2eb;p=thirdparty%2Fmdadm.git diff --git a/Assemble.c b/Assemble.c index a57d384d..c0984201 100644 --- a/Assemble.c +++ b/Assemble.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2001-2013 Neil Brown + * Copyright (C) 2001-2016 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -25,21 +25,27 @@ #include "mdadm.h" #include -static int name_matches(char *found, char *required, char *homehost) +static int name_matches(char *found, char *required, char *homehost, int require_homehost) { /* See if the name found matches the required name, possibly * prefixed with 'homehost' */ - char fnd[33]; + char *sep; + unsigned int l; - strncpy(fnd, found, 32); - fnd[32] = 0; if (strcmp(found, required)==0) return 1; - if (homehost) { - int l = strlen(homehost); - if (l < 32 && fnd[l] == ':' && - strcmp(fnd+l+1, required)==0) + sep = strchr(found, ':'); + if (!sep) + return 0; + l = sep - found; + if (strncmp(found, "any:", 4) == 0 || + (homehost && strcmp(homehost, "any") == 0) || + !require_homehost || + (homehost && strlen(homehost) == l && + strncmp(found, homehost, l) == 0)) { + /* matching homehost */ + if (strcmp(sep+1, required) == 0) return 1; } return 0; @@ -73,7 +79,7 @@ static int is_member_busy(char *metadata_version) static int ident_matches(struct mddev_ident *ident, struct mdinfo *content, struct supertype *tst, - char *homehost, + char *homehost, int require_homehost, char *update, char *devname) { @@ -85,7 +91,7 @@ static int ident_matches(struct mddev_ident *ident, return 0; } if (ident->name[0] && (!update || strcmp(update, "name")!= 0) && - name_matches(content->name, ident->name, homehost)==0) { + name_matches(content->name, ident->name, homehost, require_homehost)==0) { if (devname) pr_err("%s has wrong name.\n", devname); return 0; @@ -233,8 +239,7 @@ static int select_devices(struct mddev_dev *devlist, !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)), tst->ss->match_home(tst, c->homehost) == 1)) { if (report_mismatch) - pr_err("%s has metadata type %s for which " - "auto-assembly is disabled\n", + pr_err("%s has metadata type %s for which auto-assembly is disabled\n", devname, tst->ss->name); tmpdev->used = 2; } else @@ -245,7 +250,9 @@ static int select_devices(struct mddev_dev *devlist, pr_err("no recogniseable superblock on %s\n", devname); tmpdev->used = 2; - } else if (tst->ss->load_super(tst,dfd, NULL)) { + } else if ((tst->ignore_hw_compat = 0), + tst->ss->load_super(tst, dfd, + report_mismatch ? devname : NULL)) { if (report_mismatch) pr_err("no RAID superblock on %s\n", devname); @@ -259,8 +266,7 @@ static int select_devices(struct mddev_dev *devlist, !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)), tst->ss->match_home(tst, c->homehost) == 1)) { if (report_mismatch) - pr_err("%s has metadata type %s for which " - "auto-assembly is disabled\n", + pr_err("%s has metadata type %s for which auto-assembly is disabled\n", devname, tst->ss->name); tmpdev->used = 2; } @@ -327,7 +333,8 @@ static int select_devices(struct mddev_dev *devlist, content = content->next) { if (!ident_matches(ident, content, tst, - c->homehost, c->update, + c->homehost, c->require_homehost, + c->update, report_mismatch ? devname : NULL)) /* message already printed */; else if (is_member_busy(content->text_version)) { @@ -350,8 +357,7 @@ static int select_devices(struct mddev_dev *devlist, st = tst; tst = NULL; if (!auto_assem && inargv && tmpdev->next != NULL) { - pr_err("%s is a container, but is not " - "only device given: confused and aborting\n", + pr_err("%s is a container, but is not only device given: confused and aborting\n", devname); st->ss->free_super(st); dev_policy_free(pol); @@ -366,36 +372,43 @@ static int select_devices(struct mddev_dev *devlist, tmpdev = NULL; goto loop; } else { - int rv = 0; - struct mddev_ident *match; - content = *contentp; tst->ss->getinfo_super(tst, content, NULL); if (!ident_matches(ident, content, tst, - c->homehost, c->update, + c->homehost, c->require_homehost, + c->update, report_mismatch ? devname : NULL)) goto loop; - match = conf_match(tst, content, devname, - report_mismatch ? c->verbose : -1, - &rv); - if (!match && rv == 2) - goto loop; - if (match && match->devname && - strcasecmp(match->devname, "") == 0) { - if (report_mismatch) - pr_err("%s is a member of an explicitly ignored array\n", - devname); - goto loop; - } - if (match && !ident_matches(match, content, tst, - c->homehost, c->update, - report_mismatch ? devname : NULL)) - /* Array exists in mdadm.conf but some - * details don't match, so reject it + if (auto_assem) { + /* Never auto-assemble things that conflict + * with mdadm.conf in some way */ - goto loop; + struct mddev_ident *match; + int rv = 0; + + match = conf_match(tst, content, devname, + report_mismatch ? c->verbose : -1, + &rv); + if (!match && rv == 2) + goto loop; + if (match && match->devname && + strcasecmp(match->devname, "") == 0) { + if (report_mismatch) + pr_err("%s is a member of an explicitly ignored array\n", + devname); + goto loop; + } + if (match && !ident_matches(match, content, tst, + c->homehost, c->require_homehost, + c->update, + report_mismatch ? devname : NULL)) + /* Array exists in mdadm.conf but some + * details don't match, so reject it + */ + goto loop; + } /* should be safe to try an exclusive open now, we * have rejected anything that some other mdadm might @@ -574,30 +587,21 @@ static int load_devices(struct devs *devices, char *devmap, struct stat stb; struct supertype *tst; int i; + int dfd; if (tmpdev->used != 1) continue; /* looks like a good enough match to update the super block if needed */ #ifndef MDASSEMBLE if (c->update) { - int dfd; /* prepare useful information in info structures */ struct stat stb2; int err; fstat(mdfd, &stb2); - if (strcmp(c->update, "uuid")==0 && - !ident->uuid_set) { - int rfd; - if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 || - read(rfd, ident->uuid, 16) != 16) { - *(__u32*)(ident->uuid) = random(); - *(__u32*)(ident->uuid+1) = random(); - *(__u32*)(ident->uuid+2) = random(); - *(__u32*)(ident->uuid+3) = random(); - } - if (rfd >= 0) close(rfd); - } + if (strcmp(c->update, "uuid") == 0 && !ident->uuid_set) + random_uuid((__u8 *)ident->uuid); + dfd = dev_open(devname, tmpdev->disposition == 'I' ? O_RDWR : (O_RDWR|O_EXCL)); @@ -624,6 +628,19 @@ static int load_devices(struct devs *devices, char *devmap, if (strcmp(c->update, "byteorder") == 0) err = 0; + else if (strcmp(c->update, "home-cluster") == 0) { + tst->cluster_name = c->homecluster; + err = tst->ss->write_bitmap(tst, dfd, NameUpdate); + } else if (strcmp(c->update, "nodes") == 0) { + tst->nodes = c->nodes; + err = tst->ss->write_bitmap(tst, dfd, NodeNumUpdate); + } else if (strcmp(c->update, "revert-reshape") == 0 && + c->invalid_backup) + err = tst->ss->update_super(tst, content, + "revert-reshape-nobackup", + devname, c->verbose, + ident->uuid_set, + c->homehost); else err = tst->ss->update_super(tst, content, c->update, devname, c->verbose, @@ -631,8 +648,7 @@ static int load_devices(struct devs *devices, char *devmap, c->homehost); if (err < 0) { if (err == -1) - pr_err("--update=%s not understood" - " for %s metadata\n", + pr_err("--update=%s not understood for %s metadata\n", c->update, tst->ss->name); tst->ss->free_super(tst); free(tst); @@ -651,7 +667,6 @@ static int load_devices(struct devs *devices, char *devmap, if (tst->ss->store_super(tst, dfd)) pr_err("Could not re-write superblock on %s.\n", devname); - close(dfd); if (strcmp(c->update, "uuid")==0 && ident->bitmap_fd >= 0 && !bitmap_done) { @@ -665,9 +680,9 @@ static int load_devices(struct devs *devices, char *devmap, } else #endif { - int dfd = dev_open(devname, - tmpdev->disposition == 'I' - ? O_RDWR : (O_RDWR|O_EXCL)); + dfd = dev_open(devname, + tmpdev->disposition == 'I' + ? O_RDWR : (O_RDWR|O_EXCL)); tst = dup_super(st); if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) { @@ -684,10 +699,10 @@ static int load_devices(struct devs *devices, char *devmap, return -1; } tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks); - close(dfd); } - stat(devname, &stb); + fstat(dfd, &stb); + close(dfd); if (c->verbose > 0) pr_err("%s is identified as a member of %s, slot %d%s.\n", @@ -718,7 +733,7 @@ static int load_devices(struct devs *devices, char *devmap, i = devcnt; else i = devices[devcnt].i.disk.raid_disk; - if (i+1 == 0) { + if (i+1 == 0 || i == MD_DISK_ROLE_JOURNAL) { if (nextspare < content->array.raid_disks*2) nextspare = content->array.raid_disks*2; i = nextspare++; @@ -757,12 +772,9 @@ static int load_devices(struct devs *devices, char *devmap, * Could be a mis-detection caused by overlapping * partitions. fail-safe. */ - pr_err("WARNING %s and %s appear" - " to have very similar superblocks.\n" - " If they are really different, " - "please --zero the superblock on one\n" - " If they are the same or overlap," - " please remove one from %s.\n", + pr_err("WARNING %s and %s appear to have very similar superblocks.\n" + " If they are really different, please --zero the superblock on one\n" + " If they are the same or overlap, please remove one from %s.\n", devices[best[i]].devname, devname, inargv ? "the list" : "the\n DEVICE list in mdadm.conf" @@ -820,12 +832,37 @@ static int force_array(struct mdinfo *content, i < content->array.raid_disks * 2 && i < bestcnt; i += 2) { int j = best[i]; - if (j>=0 && - !devices[j].uptodate && - devices[j].i.recovery_start == MaxSector && - (chosen_drive < 0 || + if (j < 0) + continue; + if (devices[j].uptodate) + continue; + if (devices[j].i.recovery_start != MaxSector) { + int delta; + if (!devices[j].i.reshape_active || + devices[j].i.delta_disks <= 0) + continue; + /* When increasing number of devices, an + * added device also appears to be + * recovering. It is safe to include it + * as long as it won't be a source of + * data. + * For now, just allow for last data + * devices in RAID4 or last devices in RAID4/5/6. + */ + delta = devices[j].i.delta_disks; + if (devices[j].i.array.level >= 4 && + devices[j].i.array.level <= 6 && + i/2 >= content->array.raid_disks - delta) + /* OK */; + else if (devices[j].i.array.level == 4 && + i/2 >= content->array.raid_disks - delta - 1) + /* OK */; + else + continue; + } + if (chosen_drive < 0 || devices[j].i.events - > devices[chosen_drive].i.events)) + > devices[chosen_drive].i.events) chosen_drive = j; } if (chosen_drive < 0) @@ -874,7 +911,6 @@ static int force_array(struct mdinfo *content, avail[chosen_drive] = 1; okcnt++; tst->ss->free_super(tst); - /* If there are any other drives of the same vintage, * add them in as well. We can't lose and we might gain */ @@ -905,6 +941,7 @@ static int start_array(int mdfd, unsigned int okcnt, unsigned int sparecnt, unsigned int rebuilding_cnt, + unsigned int journalcnt, struct context *c, int clean, char *avail, int start_partial_ok, @@ -916,6 +953,18 @@ static int start_array(int mdfd, int i; unsigned int req_cnt; + if (content->journal_device_required && (content->journal_clean == 0)) { + if (!c->force) { + pr_err("Not safe to assemble with missing or stale journal device, consider --force.\n"); + return 1; + } + pr_err("Journal is missing or stale, starting array read only.\n"); + c->readonly = 1; + } + + if (content->consistency_policy == CONSISTENCY_POLICY_PPL) + clean = 1; + rv = set_array_info(mdfd, st, content); if (rv && !err_ok) { pr_err("failed to set array info for %s: %s\n", @@ -963,8 +1012,7 @@ static int start_array(int mdfd, rv = add_disk(mdfd, st, content, &devices[j].i); if (rv) { - pr_err("failed to add " - "%s to %s: %s\n", + pr_err("failed to add %s to %s: %s\n", devices[j].devname, mddev, strerror(errno)); @@ -988,19 +1036,35 @@ static int start_array(int mdfd, } else if (c->verbose > 0 && i < content->array.raid_disks*2 && (i&1) == 0) pr_err("no uptodate device for slot %d of %s\n", - i, mddev); + i/2, mddev); } if (content->array.level == LEVEL_CONTAINER) { if (c->verbose >= 0) { - pr_err("Container %s has been " - "assembled with %d drive%s", - mddev, okcnt+sparecnt, okcnt+sparecnt==1?"":"s"); + pr_err("Container %s has been assembled with %d drive%s", + mddev, okcnt+sparecnt+journalcnt, + okcnt+sparecnt+journalcnt==1?"":"s"); if (okcnt < (unsigned)content->array.raid_disks) fprintf(stderr, " (out of %d)", content->array.raid_disks); fprintf(stderr, "\n"); } + + if (st->ss->validate_container) { + struct mdinfo *devices_list; + struct mdinfo *info_devices = xmalloc(sizeof(struct mdinfo)*(okcnt+sparecnt)); + unsigned int count; + devices_list = NULL; + for (count = 0; count < okcnt+sparecnt; count++) { + info_devices[count] = devices[count].i; + info_devices[count].next = devices_list; + devices_list = &info_devices[count]; + } + if (st->ss->validate_container(devices_list)) + pr_err("Mismatch detected!\n"); + free(info_devices); + } + st->ss->free_super(st); sysfs_uevent(content, "change"); if (err_ok && okcnt < (unsigned)content->array.raid_disks) @@ -1065,6 +1129,8 @@ static int start_array(int mdfd, fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt); if (sparecnt) fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s"); + if (content->journal_clean) + fprintf(stderr, " and %d journal", journalcnt); fprintf(stderr, ".\n"); } if (content->reshape_active && @@ -1073,12 +1139,16 @@ static int start_array(int mdfd, /* might need to increase the size * of the stripe cache - default is 256 */ - if (256 < 4 * (content->array.chunk_size/4096)) { + int chunk_size = content->array.chunk_size; + if (content->reshape_active && + content->new_chunk > chunk_size) + chunk_size = content->new_chunk; + if (256 < 4 * ((chunk_size+4065)/4096)) { struct mdinfo *sra = sysfs_read(mdfd, NULL, 0); if (sra) sysfs_set_num(sra, NULL, "stripe_cache_size", - (4 * content->array.chunk_size / 4096) + 1); + (4 * chunk_size / 4096) + 1); sysfs_free(sra); } } @@ -1120,15 +1190,12 @@ static int start_array(int mdfd, if (!enough(content->array.level, content->array.raid_disks, content->array.layout, 1, avail)) - pr_err("Not enough devices to " - "start the array.\n"); + pr_err("Not enough devices to start the array.\n"); else if (!enough(content->array.level, content->array.raid_disks, content->array.layout, clean, avail)) - pr_err("Not enough devices to " - "start the array while not clean " - "- consider --force.\n"); + pr_err("Not enough devices to start the array while not clean - consider --force.\n"); return 1; } @@ -1153,9 +1220,7 @@ static int start_array(int mdfd, content->array.raid_disks, content->array.layout, clean, avail)) - fprintf(stderr, " - not enough to start the " - "array while not clean - consider " - "--force.\n"); + fprintf(stderr, " - not enough to start the array while not clean - consider --force.\n"); else { if (req_cnt == (unsigned)content->array.raid_disks) fprintf(stderr, " - need all %d to start it", req_cnt); @@ -1237,7 +1302,8 @@ int Assemble(struct supertype *st, char *mddev, int *best = NULL; /* indexed by raid_disk */ int bestcnt = 0; int devcnt; - unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt; + unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt, journalcnt; + int journal_clean = 0; int i; int was_forced = 0; int most_recent = 0; @@ -1293,7 +1359,7 @@ try_again: mddev ? mddev : "further assembly"); content = &info; - if (st) + if (st && c->force) st->ignore_hw_compat = 1; num_devs = select_devices(devlist, ident, &st, &content, c, inargv, auto_assem); @@ -1317,7 +1383,10 @@ try_again: */ if (map_lock(&map)) pr_err("failed to get exclusive lock on mapfile - continue anyway...\n"); - mp = map_by_uuid(&map, content->uuid); + if (c->update && strcmp(c->update,"uuid") == 0) + mp = NULL; + else + mp = map_by_uuid(&map, content->uuid); if (mp) { struct mdinfo *dv; /* array already exists. */ @@ -1475,6 +1544,7 @@ try_again: okcnt = 0; replcnt = 0; sparecnt=0; + journalcnt=0; rebuilding_cnt=0; for (i=0; i< bestcnt; i++) { int j = best[i]; @@ -1485,8 +1555,13 @@ try_again: /* note: we ignore error flags in multipath arrays * as they don't make sense */ - if (content->array.level != LEVEL_MULTIPATH) - if (!(devices[j].i.disk.state & (1<array.level != LEVEL_MULTIPATH) { + if (devices[j].i.disk.state & (1<journal_device_required) + journalcnt++; + else /* unexpected journal, mark as faulty */ + devices[j].i.disk.state |= (1<array.raid_disks * 2) { if (devices[j].i.recovery_start == MaxSector || (content->reshape_active && - ((i >= content->array.raid_disks - content->delta_disks) || - (i >= content->array.raid_disks - content->delta_disks - 1 - && content->array.level == 4)))) { + i >= content->array.raid_disks - content->delta_disks)) { if (!avail[i/2]) { okcnt++; avail[i/2]=1; @@ -1530,7 +1606,7 @@ try_again: replcnt++; } else rebuilding_cnt++; - } else + } else if (devices[j].i.disk.raid_disk != MD_DISK_ROLE_JOURNAL) sparecnt++; } } @@ -1590,11 +1666,15 @@ try_again: #ifndef MDASSEMBLE sysfs_init(content, mdfd, NULL); #endif + /* after reload context, store journal_clean in context */ + content->journal_clean = journal_clean; for (i=0; i= content->array.raid_disks * 2) + if (devices[j].i.disk.raid_disk == MD_DISK_ROLE_JOURNAL) + desired_state = (1<= content->array.raid_disks * 2) desired_state = 0; else if (i & 1) desired_state = (1<verbose > 0) - pr_err(":%s has an active reshape - checking " - "if critical section needs to be restored\n", + pr_err("%s has an active reshape - checking if critical section needs to be restored\n", chosen_name); if (!c->backup_file) c->backup_file = locate_backup(content->sys_name); @@ -1706,8 +1785,7 @@ try_again: c->backup_file, c->verbose > 0); if (err && c->invalid_backup) { if (c->verbose > 0) - pr_err("continuing" - " without restoring backup\n"); + pr_err("continuing without restoring backup\n"); err = 0; } } @@ -1743,7 +1821,7 @@ try_again: rv = start_array(mdfd, mddev, content, st, ident, best, bestcnt, chosen_drive, devices, okcnt, sparecnt, - rebuilding_cnt, + rebuilding_cnt, journalcnt, c, clean, avail, start_partial_ok, pre_exist != NULL, @@ -1793,18 +1871,17 @@ int assemble_container_content(struct supertype *st, int mdfd, struct mdinfo *content, struct context *c, char *chosen_name, int *result) { - struct mdinfo *dev, *sra; + struct mdinfo *dev, *sra, *dev2; int working = 0, preexist = 0; int expansion = 0; - struct map_ent *map = NULL; int old_raid_disks; int start_reshape; - char *avail = NULL; + char *avail; int err; sysfs_init(content, mdfd, NULL); - sra = sysfs_read(mdfd, NULL, GET_VERSION); + sra = sysfs_read(mdfd, NULL, GET_VERSION|GET_DEVS); if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) { if (content->array.major_version == -1 && content->array.minor_version == -2 && @@ -1812,8 +1889,7 @@ int assemble_container_content(struct supertype *st, int mdfd, content->text_version[0] == '/') content->text_version[0] = '-'; if (sysfs_set_array(content, md_get_version(mdfd)) != 0) { - if (sra) - sysfs_free(sra); + sysfs_free(sra); return 1; } } @@ -1831,8 +1907,22 @@ int assemble_container_content(struct supertype *st, int mdfd, if (st->ss->external && content->recovery_blocked && start_reshape) block_subarray(content); - if (sra) - sysfs_free(sra); + for (dev2 = sra->devs; dev2; dev2 = dev2->next) { + for (dev = content->devs; dev; dev = dev->next) + if (dev2->disk.major == dev->disk.major && + dev2->disk.minor == dev->disk.minor) + break; + if (dev) + continue; + /* Don't want this one any more */ + if (sysfs_set_str(sra, dev2, "slot", "none") < 0 && + errno == EBUSY) { + pr_err("Cannot remove old device %s: not updating %s\n", dev2->sys_name, sra->sys_name); + sysfs_free(sra); + return 1; + } + sysfs_set_str(sra, dev2, "state", "remove"); + } old_raid_disks = content->array.raid_disks - content->delta_disks; avail = xcalloc(content->array.raid_disks, 1); for (dev = content->devs; dev; dev = dev->next) { @@ -1847,14 +1937,62 @@ int assemble_container_content(struct supertype *st, int mdfd, } else if (errno == EEXIST) preexist++; } + sysfs_free(sra); if (working + expansion == 0 && c->runstop <= 0) { free(avail); return 1;/* Nothing new, don't try to start */ } - map_update(&map, fd2devnm(mdfd), - content->text_version, + map_update(NULL, fd2devnm(mdfd), content->text_version, content->uuid, chosen_name); + if (content->consistency_policy == CONSISTENCY_POLICY_PPL && + st->ss->validate_ppl) { + content->array.state |= 1; + err = 0; + + for (dev = content->devs; dev; dev = dev->next) { + int dfd; + char *devpath; + int ret; + + ret = st->ss->validate_ppl(st, content, dev); + if (ret == 0) + continue; + + if (ret < 0) { + err = 1; + break; + } + + if (!c->force) { + pr_err("%s contains invalid PPL - consider --force or --update-subarray with --update=no-ppl\n", + chosen_name); + content->array.state &= ~1; + avail[dev->disk.raid_disk] = 0; + break; + } + + /* have --force - overwrite the invalid ppl */ + devpath = map_dev(dev->disk.major, dev->disk.minor, 0); + dfd = dev_open(devpath, O_RDWR); + if (dfd < 0) { + pr_err("Failed to open %s\n", devpath); + err = 1; + break; + } + + err = st->ss->write_init_ppl(st, content, dfd); + close(dfd); + + if (err) + break; + } + + if (err) { + free(avail); + return err; + } + } if (enough(content->array.level, content->array.raid_disks, content->array.layout, content->array.state & 1, avail) == 0) {