X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=super-intel.c;h=e53afbbc9877a3ad1670f9e78bd2e781c627698b;hb=a2b9798159755b6f5e867fae0dd3e25af59fc85e;hp=3abaca5adecc6249b791a7e297a7da468837c83d;hpb=fa09d4961e5c72da3c7f78d53a7d64f5196110a3;p=thirdparty%2Fmdadm.git diff --git a/super-intel.c b/super-intel.c index 3abaca5a..e53afbbc 100644 --- a/super-intel.c +++ b/super-intel.c @@ -64,7 +64,6 @@ struct imsm_disk { #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */ #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */ #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */ -#define USABLE_DISK __cpu_to_le32(0x08) /* Fully usable unless FAILED_DISK is set */ __u32 status; /* 0xF0 - 0xF3 */ __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */ #define IMSM_DISK_FILLERS 4 @@ -247,6 +246,7 @@ struct intel_super { int creating_imsm; /* flag to indicate container creation */ int current_vol; /* index of raid device undergoing creation */ __u32 create_offset; /* common start for 'current_vol' */ + __u32 random; /* random data for seeding new family numbers */ struct intel_dev *devlist; struct dl { struct dl *next; @@ -265,6 +265,14 @@ struct intel_super { struct bbm_log *bbm_log; const char *hba; /* device path of the raid controller for this metadata */ const struct imsm_orom *orom; /* platform firmware support */ + struct intel_super *next; /* (temp) list for disambiguating family_num */ +}; + +struct intel_disk { + struct imsm_disk disk; + #define IMSM_UNKNOWN_OWNER (-1) + int owner; + struct intel_disk *next; }; struct extent { @@ -612,6 +620,21 @@ static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl) } #ifndef MDASSEMBLE +static int is_spare(struct imsm_disk *disk) +{ + return (disk->status & SPARE_DISK) == SPARE_DISK; +} + +static int is_configured(struct imsm_disk *disk) +{ + return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK; +} + +static int is_failed(struct imsm_disk *disk) +{ + return (disk->status & FAILED_DISK) == FAILED_DISK; +} + static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) { __u64 sz; @@ -676,7 +699,6 @@ static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved) { struct imsm_disk *disk = __get_imsm_disk(mpb, index); char str[MAX_RAID_SERIAL_LEN + 1]; - __u32 s; __u64 sz; if (index < 0) @@ -685,11 +707,9 @@ static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved) printf("\n"); snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); printf(" Disk%02d Serial : %s\n", index, str); - s = disk->status; - printf(" State :%s%s%s%s\n", s&SPARE_DISK ? " spare" : "", - s&CONFIGURED_DISK ? " active" : "", - s&FAILED_DISK ? " failed" : "", - s&USABLE_DISK ? " usable" : ""); + printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", + is_configured(disk) ? " active" : "", + is_failed(disk) ? " failed" : ""); printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id)); sz = __le32_to_cpu(disk->total_blocks) - reserved; printf(" Usable Size : %llu%s\n", (unsigned long long)sz, @@ -714,6 +734,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" Magic : %s\n", str); snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb)); printf(" Version : %s\n", get_imsm_version(mpb)); + printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num)); printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num)); printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num)); getinfo_super_imsm(st, &info); @@ -755,6 +776,23 @@ static void examine_super_imsm(struct supertype *st, char *homehost) } static void brief_examine_super_imsm(struct supertype *st, int verbose) +{ + /* We just write a generic IMSM ARRAY entry */ + struct mdinfo info; + char nbuf[64]; + struct intel_super *super = st->sb; + + if (!super->anchor->num_raid_devs) { + printf("ARRAY metadata=imsm\n"); + return; + } + + getinfo_super_imsm(st, &info); + fname_from_uuid(st, &info, nbuf, ':'); + printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5); +} + +static void brief_examine_subarrays_imsm(struct supertype *st, int verbose) { /* We just write a generic IMSM ARRAY entry */ struct mdinfo info; @@ -777,7 +815,6 @@ static void brief_examine_super_imsm(struct supertype *st, int verbose) printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n", dev->volume, nbuf + 5, i, nbuf1 + 5); } - printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5); } static void export_examine_super_imsm(struct supertype *st) @@ -1089,7 +1126,7 @@ static int match_home_imsm(struct supertype *st, char *homehost) /* the imsm metadata format does not specify any host * identification information. We return -1 since we can never * confirm nor deny whether a given array is "meant" for this - * host. We rely on compare_super and the 'family_num' field to + * host. We rely on compare_super and the 'family_num' fields to * exclude member disks that do not belong, and we rely on * mdadm.conf to specify the arrays that should be assembled. * Auto-assembly may still pick up "foreign" arrays. @@ -1117,7 +1154,7 @@ static void uuid_from_super_imsm(struct supertype *st, int uuid[4]) */ /* imsm does not track uuid's so we synthesis one using sha1 on * - The signature (Which is constant for all imsm array, but no matter) - * - the family_num of the container + * - the orig_family_num of the container * - the index number of the volume * - the 'serial' number of the volume. * Hopefully these are all constant. @@ -1127,10 +1164,18 @@ static void uuid_from_super_imsm(struct supertype *st, int uuid[4]) char buf[20]; struct sha1_ctx ctx; struct imsm_dev *dev = NULL; + __u32 family_num; + /* some mdadm versions failed to set ->orig_family_num, in which + * case fall back to ->family_num. orig_family_num will be + * fixed up with the first metadata update. + */ + family_num = super->anchor->orig_family_num; + if (family_num == 0) + family_num = super->anchor->family_num; sha1_init_ctx(&ctx); sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx); - sha1_process_bytes(&super->anchor->family_num, sizeof(__u32), &ctx); + sha1_process_bytes(&family_num, sizeof(__u32), &ctx); if (super->current_vol >= 0) dev = get_imsm_dev(super, super->current_vol); if (dev) { @@ -1256,7 +1301,11 @@ static void fixup_container_spare_uuid(struct mdinfo *inf) struct supertype *_cst; /* container supertype */ _cst = array_list->st; - _sst = _cst->ss->match_metadata_desc(inf->text_version); + if (_cst) + _sst = _cst->ss->match_metadata_desc(inf->text_version); + else + _sst = NULL; + if (_sst) { memcpy(inf->uuid, array_list->uuid, sizeof(int[4])); free(_sst); @@ -1270,7 +1319,6 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) { struct intel_super *super = st->sb; struct imsm_disk *disk; - __u32 s; if (super->current_vol >= 0) { getinfo_super_imsm_volume(st, info); @@ -1306,14 +1354,13 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) disk = &super->disks->disk; info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved; info->component_size = reserved; - s = disk->status; - info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0; + info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0; /* we don't change info->disk.raid_disk here because * this state will be finalized in mdmon after we have * found the 'most fresh' version of the metadata */ - info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0; - info->disk.state |= s & SPARE_DISK ? 0 : (1 << MD_DISK_SYNC); + info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0; + info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC); } /* only call uuid_from_super_imsm when this disk is part of a populated container, @@ -1438,7 +1485,19 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst) */ if (first->anchor->num_raid_devs > 0 && sec->anchor->num_raid_devs > 0) { - if (first->anchor->family_num != sec->anchor->family_num) + /* Determine if these disks might ever have been + * related. Further disambiguation can only take place + * in load_super_imsm_all + */ + __u32 first_family = first->anchor->orig_family_num; + __u32 sec_family = sec->anchor->orig_family_num; + + if (first_family == 0) + first_family = first->anchor->family_num; + if (sec_family == 0) + sec_family = sec->anchor->family_num; + + if (first_family != sec_family) return 3; } @@ -1468,17 +1527,17 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst) dv->next = first->devlist; first->devlist = dv; } - if (i <= sec->anchor->num_raid_devs) { + if (i < sec->anchor->num_raid_devs) { /* allocation failure */ free_devlist(first); fprintf(stderr, "imsm: failed to associate spare\n"); return 3; } - for (i = 0; i < sec->anchor->num_raid_devs; i++) - imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i)); - first->anchor->num_raid_devs = sec->anchor->num_raid_devs; + first->anchor->orig_family_num = sec->anchor->orig_family_num; first->anchor->family_num = sec->anchor->family_num; + for (i = 0; i < sec->anchor->num_raid_devs; i++) + imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i)); } return 0; @@ -1508,7 +1567,6 @@ static void fd2devname(int fd, char *name) snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm); } - extern int scsi_get_serial(int fd, void *buf, size_t buf_len); static int imsm_read_serial(int fd, char *devname, @@ -1602,14 +1660,32 @@ static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super) return dl; } +static struct imsm_disk * +__serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx) +{ + int i; + + for (i = 0; i < mpb->num_disks; i++) { + struct imsm_disk *disk = __get_imsm_disk(mpb, i); + + if (serialcmp(disk->serial, serial) == 0) { + if (idx) + *idx = i; + return disk; + } + } + + return NULL; +} + static int load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) { + struct imsm_disk *disk; struct dl *dl; struct stat stb; int rv; - int i; - int alloc = 1; + char name[40]; __u8 serial[MAX_RAID_SERIAL_LEN]; rv = imsm_read_serial(fd, devname, serial); @@ -1617,16 +1693,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) if (rv != 0) return 2; - /* check if this is a disk we have seen before. it may be a spare in - * super->disks while the current anchor believes it is a raid member, - * check if we need to update dl->index - */ - dl = serial_to_dl(serial, super); - if (!dl) - dl = malloc(sizeof(*dl)); - else - alloc = 0; - + dl = calloc(1, sizeof(*dl)); if (!dl) { if (devname) fprintf(stderr, @@ -1635,53 +1702,35 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) return 2; } - if (alloc) { - fstat(fd, &stb); - dl->major = major(stb.st_rdev); - dl->minor = minor(stb.st_rdev); - dl->next = super->disks; - dl->fd = keep_fd ? fd : -1; - dl->devname = devname ? strdup(devname) : NULL; - serialcpy(dl->serial, serial); - dl->index = -2; - dl->e = NULL; - } else if (keep_fd) { - close(dl->fd); - dl->fd = fd; - } + fstat(fd, &stb); + dl->major = major(stb.st_rdev); + dl->minor = minor(stb.st_rdev); + dl->next = super->disks; + dl->fd = keep_fd ? fd : -1; + assert(super->disks == NULL); + super->disks = dl; + serialcpy(dl->serial, serial); + dl->index = -2; + dl->e = NULL; + fd2devname(fd, name); + if (devname) + dl->devname = strdup(devname); + else + dl->devname = strdup(name); /* look up this disk's index in the current anchor */ - for (i = 0; i < super->anchor->num_disks; i++) { - struct imsm_disk *disk_iter; - - disk_iter = __get_imsm_disk(super->anchor, i); - - if (serialcmp(disk_iter->serial, dl->serial) == 0) { - dl->disk = *disk_iter; - /* only set index on disks that are a member of a - * populated contianer, i.e. one with raid_devs - */ - if (dl->disk.status & FAILED_DISK) - dl->index = -2; - else if (dl->disk.status & SPARE_DISK) - dl->index = -1; - else - dl->index = i; - - break; - } - } - - /* no match, maybe a stale failed drive */ - if (i == super->anchor->num_disks && dl->index >= 0) { - dl->disk = *__get_imsm_disk(super->anchor, dl->index); - if (dl->disk.status & FAILED_DISK) + disk = __serial_to_disk(dl->serial, super->anchor, &dl->index); + if (disk) { + dl->disk = *disk; + /* only set index on disks that are a member of a + * populated contianer, i.e. one with raid_devs + */ + if (is_failed(&dl->disk)) dl->index = -2; + else if (is_spare(&dl->disk)) + dl->index = -1; } - if (alloc) - super->disks = dl; - return 0; } @@ -1823,7 +1872,6 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) struct stat; struct imsm_super *anchor; __u32 check_sum; - int rc; get_dev_size(fd, NULL, &dsize); @@ -1885,10 +1933,7 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) return 2; } - rc = load_imsm_disk(fd, super, devname, 0); - if (rc == 0) - rc = parse_raid_devices(super); - return rc; + return 0; } /* read the extended mpb */ @@ -1924,11 +1969,23 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) */ super->bbm_log = __get_imsm_bbm_log(super->anchor); - rc = load_imsm_disk(fd, super, devname, 0); - if (rc == 0) - rc = parse_raid_devices(super); + return 0; +} + +static int +load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd) +{ + int err; + + err = load_imsm_mpb(fd, super, devname); + if (err) + return err; + err = load_imsm_disk(fd, super, devname, keep_fd); + if (err) + return err; + err = parse_raid_devices(super); - return rc; + return err; } static void __free_imsm_disk(struct dl *d) @@ -2058,19 +2115,333 @@ static int find_missing(struct intel_super *super) return 0; } +static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list) +{ + struct intel_disk *idisk = disk_list; + + while (idisk) { + if (serialcmp(idisk->disk.serial, serial) == 0) + break; + idisk = idisk->next; + } + + return idisk; +} + +static int __prep_thunderdome(struct intel_super **table, int tbl_size, + struct intel_super *super, + struct intel_disk **disk_list) +{ + struct imsm_disk *d = &super->disks->disk; + struct imsm_super *mpb = super->anchor; + int i, j; + + for (i = 0; i < tbl_size; i++) { + struct imsm_super *tbl_mpb = table[i]->anchor; + struct imsm_disk *tbl_d = &table[i]->disks->disk; + + if (tbl_mpb->family_num == mpb->family_num) { + if (tbl_mpb->check_sum == mpb->check_sum) { + dprintf("%s: mpb from %d:%d matches %d:%d\n", + __func__, super->disks->major, + super->disks->minor, + table[i]->disks->major, + table[i]->disks->minor); + break; + } + + if (((is_configured(d) && !is_configured(tbl_d)) || + is_configured(d) == is_configured(tbl_d)) && + tbl_mpb->generation_num < mpb->generation_num) { + /* current version of the mpb is a + * better candidate than the one in + * super_table, but copy over "cross + * generational" status + */ + struct intel_disk *idisk; + + dprintf("%s: mpb from %d:%d replaces %d:%d\n", + __func__, super->disks->major, + super->disks->minor, + table[i]->disks->major, + table[i]->disks->minor); + + idisk = disk_list_get(tbl_d->serial, *disk_list); + if (idisk && is_failed(&idisk->disk)) + tbl_d->status |= FAILED_DISK; + break; + } else { + struct intel_disk *idisk; + struct imsm_disk *disk; + + /* tbl_mpb is more up to date, but copy + * over cross generational status before + * returning + */ + disk = __serial_to_disk(d->serial, mpb, NULL); + if (disk && is_failed(disk)) + d->status |= FAILED_DISK; + + idisk = disk_list_get(d->serial, *disk_list); + if (idisk) { + idisk->owner = i; + if (disk && is_configured(disk)) + idisk->disk.status |= CONFIGURED_DISK; + } + + dprintf("%s: mpb from %d:%d prefer %d:%d\n", + __func__, super->disks->major, + super->disks->minor, + table[i]->disks->major, + table[i]->disks->minor); + + return tbl_size; + } + } + } + + if (i >= tbl_size) + table[tbl_size++] = super; + else + table[i] = super; + + /* update/extend the merged list of imsm_disk records */ + for (j = 0; j < mpb->num_disks; j++) { + struct imsm_disk *disk = __get_imsm_disk(mpb, j); + struct intel_disk *idisk; + + idisk = disk_list_get(disk->serial, *disk_list); + if (idisk) { + idisk->disk.status |= disk->status; + if (is_configured(&idisk->disk) || + is_failed(&idisk->disk)) + idisk->disk.status &= ~(SPARE_DISK); + } else { + idisk = calloc(1, sizeof(*idisk)); + if (!idisk) + return -1; + idisk->owner = IMSM_UNKNOWN_OWNER; + idisk->disk = *disk; + idisk->next = *disk_list; + *disk_list = idisk; + } + + if (serialcmp(idisk->disk.serial, d->serial) == 0) + idisk->owner = i; + } + + return tbl_size; +} + +static struct intel_super * +validate_members(struct intel_super *super, struct intel_disk *disk_list, + const int owner) +{ + struct imsm_super *mpb = super->anchor; + int ok_count = 0; + int i; + + for (i = 0; i < mpb->num_disks; i++) { + struct imsm_disk *disk = __get_imsm_disk(mpb, i); + struct intel_disk *idisk; + + idisk = disk_list_get(disk->serial, disk_list); + if (idisk) { + if (idisk->owner == owner || + idisk->owner == IMSM_UNKNOWN_OWNER) + ok_count++; + else + dprintf("%s: '%.16s' owner %d != %d\n", + __func__, disk->serial, idisk->owner, + owner); + } else { + dprintf("%s: unknown disk %x [%d]: %.16s\n", + __func__, __le32_to_cpu(mpb->family_num), i, + disk->serial); + break; + } + } + + if (ok_count == mpb->num_disks) + return super; + return NULL; +} + +static void show_conflicts(__u32 family_num, struct intel_super *super_list) +{ + struct intel_super *s; + + for (s = super_list; s; s = s->next) { + if (family_num != s->anchor->family_num) + continue; + fprintf(stderr, "Conflict, offlining family %#x on '%s'\n", + __le32_to_cpu(family_num), s->disks->devname); + } +} + +static struct intel_super * +imsm_thunderdome(struct intel_super **super_list, int len) +{ + struct intel_super *super_table[len]; + struct intel_disk *disk_list = NULL; + struct intel_super *champion, *spare; + struct intel_super *s, **del; + int tbl_size = 0; + int conflict; + int i; + + memset(super_table, 0, sizeof(super_table)); + for (s = *super_list; s; s = s->next) + tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list); + + for (i = 0; i < tbl_size; i++) { + struct imsm_disk *d; + struct intel_disk *idisk; + struct imsm_super *mpb = super_table[i]->anchor; + + s = super_table[i]; + d = &s->disks->disk; + + /* 'd' must appear in merged disk list for its + * configuration to be valid + */ + idisk = disk_list_get(d->serial, disk_list); + if (idisk && idisk->owner == i) + s = validate_members(s, disk_list, i); + else + s = NULL; + + if (!s) + dprintf("%s: marking family: %#x from %d:%d offline\n", + __func__, mpb->family_num, + super_table[i]->disks->major, + super_table[i]->disks->minor); + super_table[i] = s; + } + + /* This is where the mdadm implementation differs from the Windows + * driver which has no strict concept of a container. We can only + * assemble one family from a container, so when returning a prodigal + * array member to this system the code will not be able to disambiguate + * the container contents that should be assembled ("foreign" versus + * "local"). It requires user intervention to set the orig_family_num + * to a new value to establish a new container. The Windows driver in + * this situation fixes up the volume name in place and manages the + * foreign array as an independent entity. + */ + s = NULL; + spare = NULL; + conflict = 0; + for (i = 0; i < tbl_size; i++) { + struct intel_super *tbl_ent = super_table[i]; + int is_spare = 0; + + if (!tbl_ent) + continue; + + if (tbl_ent->anchor->num_raid_devs == 0) { + spare = tbl_ent; + is_spare = 1; + } + + if (s && !is_spare) { + show_conflicts(tbl_ent->anchor->family_num, *super_list); + conflict++; + } else if (!s && !is_spare) + s = tbl_ent; + } + + if (!s) + s = spare; + if (!s) { + champion = NULL; + goto out; + } + champion = s; + + if (conflict) + fprintf(stderr, "Chose family %#x on '%s', " + "assemble conflicts to new container with '--update=uuid'\n", + __le32_to_cpu(s->anchor->family_num), s->disks->devname); + + /* collect all dl's onto 'champion', and update them to + * champion's version of the status + */ + for (s = *super_list; s; s = s->next) { + struct imsm_super *mpb = champion->anchor; + struct dl *dl = s->disks; + + if (s == champion) + continue; + + for (i = 0; i < mpb->num_disks; i++) { + struct imsm_disk *disk; + + disk = __serial_to_disk(dl->serial, mpb, &dl->index); + if (disk) { + dl->disk = *disk; + /* only set index on disks that are a member of + * a populated contianer, i.e. one with + * raid_devs + */ + if (is_failed(&dl->disk)) + dl->index = -2; + else if (is_spare(&dl->disk)) + dl->index = -1; + break; + } + } + + if (i >= mpb->num_disks) { + struct intel_disk *idisk; + + idisk = disk_list_get(dl->serial, disk_list); + if (is_spare(&idisk->disk) && + !is_failed(&idisk->disk) && !is_configured(&idisk->disk)) + dl->index = -1; + else { + dl->index = -2; + continue; + } + } + + dl->next = champion->disks; + champion->disks = dl; + s->disks = NULL; + } + + /* delete 'champion' from super_list */ + for (del = super_list; *del; ) { + if (*del == champion) { + *del = (*del)->next; + break; + } else + del = &(*del)->next; + } + champion->next = NULL; + + out: + while (disk_list) { + struct intel_disk *idisk = disk_list; + + disk_list = disk_list->next; + free(idisk); + } + + return champion; +} + static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, char *devname, int keep_fd) { struct mdinfo *sra; - struct intel_super *super; - struct mdinfo *sd, *best = NULL; - __u32 bestgen = 0; - __u32 gen; - char nm[20]; - int dfd; - int rv; + struct intel_super *super_list = NULL; + struct intel_super *super = NULL; int devnum = fd2devnum(fd); + struct mdinfo *sd; int retry; + int err = 0; + int i; enum sysfs_read_flags flags; flags = GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE; @@ -2087,93 +2458,78 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, strcmp(sra->text_version, "imsm") != 0) return 1; - super = alloc_super(0); - if (!super) - return 1; + /* load all mpbs */ + for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) { + struct intel_super *s = alloc_super(0); + char nm[20]; + int dfd; - /* find the most up to date disk in this array, skipping spares */ - for (sd = sra->devs; sd; sd = sd->next) { + err = 1; + if (!s) + goto error; + s->next = super_list; + super_list = s; + + err = 2; sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); dfd = dev_open(nm, keep_fd ? O_RDWR : O_RDONLY); - if (dfd < 0) { - free_imsm(super); - return 2; - } - rv = load_imsm_mpb(dfd, super, NULL); + if (dfd < 0) + goto error; + + err = load_and_parse_mpb(dfd, s, NULL, keep_fd); /* retry the load if we might have raced against mdmon */ - if (rv == 3 && mdmon_running(devnum)) + if (err == 3 && mdmon_running(devnum)) for (retry = 0; retry < 3; retry++) { usleep(3000); - rv = load_imsm_mpb(dfd, super, NULL); - if (rv != 3) + err = load_and_parse_mpb(dfd, s, NULL, keep_fd); + if (err != 3) break; } if (!keep_fd) close(dfd); - if (rv == 0) { - if (super->anchor->num_raid_devs == 0) - gen = 0; - else - gen = __le32_to_cpu(super->anchor->generation_num); - if (!best || gen > bestgen) { - bestgen = gen; - best = sd; - } - } else { - free_imsm(super); - return rv; - } + if (err) + goto error; } - if (!best) { - free_imsm(super); - return 1; + /* all mpbs enter, maybe one leaves */ + super = imsm_thunderdome(&super_list, i); + if (!super) { + err = 1; + goto error; } - /* load the most up to date anchor */ - sprintf(nm, "%d:%d", best->disk.major, best->disk.minor); - dfd = dev_open(nm, O_RDONLY); - if (dfd < 0) { - free_imsm(super); - return 1; - } - rv = load_imsm_mpb(dfd, super, NULL); - close(dfd); - if (rv != 0) { + if (find_missing(super) != 0) { free_imsm(super); - return 2; + err = 2; + goto error; } - /* re-parse the disk list with the current anchor */ - for (sd = sra->devs ; sd ; sd = sd->next) { - sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); - dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY); - if (dfd < 0) { + if (st->subarray[0]) { + if (atoi(st->subarray) <= super->anchor->num_raid_devs) + super->current_vol = atoi(st->subarray); + else { free_imsm(super); - return 2; + err = 1; + goto error; } - load_imsm_disk(dfd, super, NULL, keep_fd); - if (!keep_fd) - close(dfd); } + err = 0; + error: + while (super_list) { + struct intel_super *s = super_list; - if (find_missing(super) != 0) { - free_imsm(super); - return 2; + super_list = super_list->next; + free_imsm(s); } - if (st->subarray[0]) { - if (atoi(st->subarray) <= super->anchor->num_raid_devs) - super->current_vol = atoi(st->subarray); - else - return 1; - } + if (err) + return err; *sbp = super; st->container_dev = devnum; - if (st->ss == NULL) { + if (err == 0 && st->ss == NULL) { st->ss = &super_imsm; st->minor_version = 0; st->max_devs = IMSM_MAX_DEVICES; @@ -2193,8 +2549,8 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) if (load_super_imsm_all(st, fd, &st->sb, devname, 1) == 0) return 0; #endif - if (st->subarray[0]) - return 1; /* FIXME */ + + free_super_imsm(st); super = alloc_super(0); if (!super) { @@ -2204,7 +2560,7 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) return 1; } - rv = load_imsm_mpb(fd, super, devname); + rv = load_and_parse_mpb(fd, super, devname, 0); if (rv) { if (devname) @@ -2215,6 +2571,15 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) return rv; } + if (st->subarray[0]) { + if (atoi(st->subarray) <= super->anchor->num_raid_devs) + super->current_vol = atoi(st->subarray); + else { + free_imsm(super); + return 1; + } + } + st->sb = super; if (st->ss == NULL) { st->ss = &super_imsm; @@ -2526,7 +2891,7 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, super->anchor->num_disks++; } set_imsm_ord_tbl_ent(map, dk->number, dl->index); - dl->disk.status = CONFIGURED_DISK | USABLE_DISK; + dl->disk.status = CONFIGURED_DISK; /* if we are creating the first raid device update the family number */ if (super->current_vol == 0) { @@ -2536,8 +2901,10 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, *_dev = *dev; *_disk = dl->disk; - sum = __gen_imsm_checksum(mpb); + sum = random32(); + sum += __gen_imsm_checksum(mpb); mpb->family_num = __cpu_to_le32(sum); + mpb->orig_family_num = mpb->family_num; } return 0; @@ -2592,7 +2959,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, size /= 512; serialcpy(dd->disk.serial, dd->serial); dd->disk.total_blocks = __cpu_to_le32(size); - dd->disk.status = USABLE_DISK | SPARE_DISK; + dd->disk.status = SPARE_DISK; if (sysfs_disk_to_scsi_id(fd, &id) == 0) dd->disk.scsi_id = __cpu_to_le32(id); else @@ -2634,6 +3001,7 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose) mpb->disk[0] = d->disk; sum = __gen_imsm_checksum(mpb); mpb->family_num = __cpu_to_le32(sum); + mpb->orig_family_num = 0; sum = __gen_imsm_checksum(mpb); mpb->check_sum = __cpu_to_le32(sum); @@ -2668,6 +3036,12 @@ static int write_super_imsm(struct intel_super *super, int doclose) generation++; mpb->generation_num = __cpu_to_le32(generation); + /* fix up cases where previous mdadm releases failed to set + * orig_family_num + */ + if (mpb->orig_family_num == 0) + mpb->orig_family_num = mpb->family_num; + mpb_size += sizeof(struct imsm_disk) * mpb->num_disks; for (d = super->disks; d; d = d->next) { if (d->index == -1) @@ -2711,17 +3085,16 @@ static int write_super_imsm(struct intel_super *super, int doclose) } -static int create_array(struct supertype *st) +static int create_array(struct supertype *st, int dev_idx) { size_t len; struct imsm_update_create_array *u; struct intel_super *super = st->sb; - struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); + struct imsm_dev *dev = get_imsm_dev(super, dev_idx); struct imsm_map *map = get_imsm_map(dev, 0); struct disk_info *inf; struct imsm_disk *disk; int i; - int idx; len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) + sizeof(*inf) * map->num_members; @@ -2733,11 +3106,12 @@ static int create_array(struct supertype *st) } u->type = update_create_array; - u->dev_idx = super->current_vol; + u->dev_idx = dev_idx; imsm_copy_dev(&u->dev, dev); inf = get_disk_info(u); for (i = 0; i < map->num_members; i++) { - idx = get_imsm_disk_idx(dev, i); + int idx = get_imsm_disk_idx(dev, i); + disk = get_imsm_disk(super, idx); serialcpy(inf[i].serial, disk->serial); } @@ -2771,21 +3145,26 @@ static int _add_disk(struct supertype *st) static int write_init_super_imsm(struct supertype *st) { + struct intel_super *super = st->sb; + int current_vol = super->current_vol; + + /* we are done with current_vol reset it to point st at the container */ + super->current_vol = -1; + if (st->update_tail) { /* queue the recently created array / added disk * as a metadata update */ - struct intel_super *super = st->sb; struct dl *d; int rv; /* determine if we are creating a volume or adding a disk */ - if (super->current_vol < 0) { + if (current_vol < 0) { /* in the add disk case we are running in mdmon * context, so don't close fd's */ return _add_disk(st); } else - rv = create_array(st); + rv = create_array(st, current_vol); for (d = super->disks; d ; d = d->next) { close(d->fd); @@ -3390,7 +3769,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st) struct dl *d; int idx; int skip; - __u32 s; __u32 ord; skip = 0; @@ -3402,11 +3780,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st) if (d == NULL) skip = 1; - - s = d ? d->disk.status : 0; - if (s & FAILED_DISK) - skip = 1; - if (!(s & USABLE_DISK)) + if (d && is_failed(&d->disk)) skip = 1; if (ord & IMSM_ORD_REBUILD) skip = 1; @@ -3513,8 +3887,7 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, insync = 2; disk = get_imsm_disk(super, idx); - if (!disk || disk->status & FAILED_DISK || - ord & IMSM_ORD_REBUILD) + if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD) insync--; /* no in-sync disks left in this mirror the @@ -3564,8 +3937,7 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev) idx = ord_to_idx(ord); disk = get_imsm_disk(super, idx); - if (!disk || disk->status & FAILED_DISK || - ord & IMSM_ORD_REBUILD) + if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD) failed++; } @@ -3624,10 +3996,11 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) return 0; ord = __le32_to_cpu(map->disk_ord_tbl[slot]); - if ((disk->status & FAILED_DISK) && (ord & IMSM_ORD_REBUILD)) + if (is_failed(disk) && (ord & IMSM_ORD_REBUILD)) return 0; disk->status |= FAILED_DISK; + disk->status &= ~CONFIGURED_DISK; set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD); if (~map->failed_disk_num == 0) map->failed_disk_num = slot; @@ -3824,7 +4197,7 @@ static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_a if (dl->index == i) break; - if (dl && dl->disk.status & FAILED_DISK) + if (dl && is_failed(&dl->disk)) dl = NULL; if (dl) @@ -3840,14 +4213,13 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, int idx = get_imsm_disk_idx(dev, slot); struct imsm_super *mpb = super->anchor; struct imsm_map *map; - unsigned long long esize; unsigned long long pos; struct mdinfo *d; struct extent *ex; int i, j; int found; __u32 array_start; - __u32 blocks; + __u32 array_end; struct dl *dl; for (dl = super->disks; dl; dl = dl->next) { @@ -3863,11 +4235,10 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, continue; /* skip in use or failed drives */ - if (dl->disk.status & FAILED_DISK || idx == dl->index || + if (is_failed(&dl->disk) || idx == dl->index || dl->index == -2) { dprintf("%x:%x status (failed: %d index: %d)\n", - dl->major, dl->minor, - (dl->disk.status & FAILED_DISK) == FAILED_DISK, idx); + dl->major, dl->minor, is_failed(&dl->disk), idx); continue; } @@ -3899,15 +4270,14 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, j = 0; pos = 0; array_start = __le32_to_cpu(map->pba_of_lba0); - blocks = __le32_to_cpu(map->blocks_per_member); + array_end = array_start + + __le32_to_cpu(map->blocks_per_member) - 1; do { /* check that we can start at pba_of_lba0 with * blocks_per_member of space */ - esize = ex[j].start - pos; - if (array_start >= pos && - array_start + blocks < ex[j].start) { + if (array_start >= pos && array_end < ex[j].start) { found = 1; break; } @@ -3921,9 +4291,8 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, free(ex); if (i < mpb->num_raid_devs) { - dprintf("%x:%x does not have %u at %u\n", - dl->major, dl->minor, - blocks, array_start); + dprintf("%x:%x does not have %u to %u available\n", + dl->major, dl->minor, array_start, array_end); /* No room */ continue; } @@ -4025,6 +4394,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, di->data_offset = __le32_to_cpu(map->pba_of_lba0); di->component_size = a->info.component_size; di->container_member = inst; + super->random = random32(); di->next = rv; rv = di; num_spares++; @@ -4170,7 +4540,7 @@ static void imsm_process_update(struct supertype *st, if (i == u->slot) continue; disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); - if (!disk || disk->status & FAILED_DISK) + if (!disk || is_failed(disk)) failed++; } @@ -4191,6 +4561,15 @@ static void imsm_process_update(struct supertype *st, set_imsm_ord_tbl_ent(map, u->slot, dl->index); set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD); + /* update the family_num to mark a new container + * generation, being careful to record the existing + * family_num in orig_family_num to clean up after + * earlier mdadm versions that neglected to set it. + */ + if (mpb->orig_family_num == 0) + mpb->orig_family_num = mpb->family_num; + mpb->family_num += super->random; + /* count arrays using the victim in the metadata */ found = 0; for (a = st->arrays; a ; a = a->next) { @@ -4504,6 +4883,7 @@ struct superswitch super_imsm = { #ifndef MDASSEMBLE .examine_super = examine_super_imsm, .brief_examine_super = brief_examine_super_imsm, + .brief_examine_subarrays = brief_examine_subarrays_imsm, .export_examine_super = export_examine_super_imsm, .detail_super = detail_super_imsm, .brief_detail_super = brief_detail_super_imsm,