X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=super-intel.c;h=3befc3d0712d21b576d6849bb74f465ce33e9e52;hp=864a9e9aeb15d559fa9968e55a5e1f4b1d2ee94b;hb=da1887895404506708387fa3781bf0df0a2664ff;hpb=cceebc67f1377644b02da0ab50bc7d482eeb9cab diff --git a/super-intel.c b/super-intel.c index 864a9e9a..3befc3d0 100644 --- a/super-intel.c +++ b/super-intel.c @@ -105,6 +105,7 @@ struct imsm_vol { #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */ #define MIGR_GEN_MIGR 3 #define MIGR_STATE_CHANGE 4 +#define MIGR_REPAIR 5 __u8 migr_type; /* Initializing, Rebuilding, ... */ __u8 dirty; __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */ @@ -193,6 +194,29 @@ struct bbm_log { static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" }; #endif +static __u8 migr_type(struct imsm_dev *dev) +{ + if (dev->vol.migr_type == MIGR_VERIFY && + dev->status & DEV_VERIFY_AND_FIX) + return MIGR_REPAIR; + else + return dev->vol.migr_type; +} + +static void set_migr_type(struct imsm_dev *dev, __u8 migr_type) +{ + /* for compatibility with older oroms convert MIGR_REPAIR, into + * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status + */ + if (migr_type == MIGR_REPAIR) { + dev->vol.migr_type = MIGR_VERIFY; + dev->status |= DEV_VERIFY_AND_FIX; + } else { + dev->vol.migr_type = migr_type; + dev->status &= ~DEV_VERIFY_AND_FIX; + } +} + static unsigned int sector_count(__u32 bytes) { return ((bytes + (512-1)) & (~(512-1))) / 512; @@ -233,6 +257,7 @@ struct intel_super { int fd; int extent_cnt; struct extent *e; /* for determining freespace @ create */ + int raiddisk; /* slot to fill in autolayout */ } *disks; struct dl *add; /* list of disks to add while mdmon active */ struct dl *missing; /* disks removed while we weren't looking */ @@ -450,6 +475,20 @@ static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord) map->disk_ord_tbl[slot] = __cpu_to_le32(ord); } +static int get_imsm_disk_slot(struct imsm_map *map, int idx) +{ + int slot; + __u32 ord; + + for (slot = 0; slot < map->num_members; slot++) { + ord = __le32_to_cpu(map->disk_ord_tbl[slot]); + if (ord_to_idx(ord) == idx) + return slot; + } + + return -1; +} + static int get_imsm_raid_level(struct imsm_map *map) { if (map->raid_level == 1) { @@ -476,18 +515,14 @@ static int cmp_extent(const void *av, const void *bv) static int count_memberships(struct dl *dl, struct intel_super *super) { int memberships = 0; - int i, j; + int i; for (i = 0; i < super->anchor->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); struct imsm_map *map = get_imsm_map(dev, 0); - for (j = 0; j < map->num_members; j++) { - __u32 index = get_imsm_disk_idx(dev, j); - - if (index == dl->index) - memberships++; - } + if (get_imsm_disk_slot(map, dl->index) >= 0) + memberships++; } return memberships; @@ -497,7 +532,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) { /* find a list of used extents on the given physical device */ struct extent *rv, *e; - int i, j; + int i; int memberships = count_memberships(dl, super); __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; @@ -510,14 +545,10 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) struct imsm_dev *dev = get_imsm_dev(super, i); struct imsm_map *map = get_imsm_map(dev, 0); - for (j = 0; j < map->num_members; j++) { - __u32 index = get_imsm_disk_idx(dev, j); - - if (index == dl->index) { - e->start = __le32_to_cpu(map->pba_of_lba0); - e->size = __le32_to_cpu(map->blocks_per_member); - e++; - } + if (get_imsm_disk_slot(map, dl->index) >= 0) { + e->start = __le32_to_cpu(map->pba_of_lba0); + e->size = __le32_to_cpu(map->blocks_per_member); + e++; } } qsort(rv, memberships, sizeof(*rv), cmp_extent); @@ -592,10 +623,8 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) printf(" UUID : %s\n", uuid); printf(" RAID Level : %d\n", get_imsm_raid_level(map)); printf(" Members : %d\n", map->num_members); - for (slot = 0; slot < map->num_members; slot++) - if (disk_idx== get_imsm_disk_idx(dev, slot)) - break; - if (slot < map->num_members) { + slot = get_imsm_disk_slot(map, disk_idx); + if (slot >= 0) { ord = get_imsm_ord_tbl_ent(dev, slot); printf(" This Slot : %d%s\n", slot, ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : ""); @@ -616,10 +645,23 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) printf(" Chunk Size : %u KiB\n", __le16_to_cpu(map->blocks_per_strip) / 2); printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks)); - printf(" Migrate State : %s", dev->vol.migr_state ? "migrating" : "idle"); - if (dev->vol.migr_state) - printf(": %s", dev->vol.migr_type ? "rebuilding" : "initializing"); - printf("\n"); + printf(" Migrate State : %s", dev->vol.migr_state ? "migrating" : "idle\n"); + if (dev->vol.migr_state) { + if (migr_type(dev) == MIGR_INIT) + printf(": initializing\n"); + else if (migr_type(dev) == MIGR_REBUILD) + printf(": rebuilding\n"); + else if (migr_type(dev) == MIGR_VERIFY) + printf(": check\n"); + else if (migr_type(dev) == MIGR_GEN_MIGR) + printf(": general migration\n"); + else if (migr_type(dev) == MIGR_STATE_CHANGE) + printf(": state change\n"); + else if (migr_type(dev) == MIGR_REPAIR) + printf(": repair\n"); + else + printf(": \n", migr_type(dev)); + } printf(" Map State : %s", map_state_str[map->map_state]); if (dev->vol.migr_state) { struct imsm_map *map = get_imsm_map(dev, 1); @@ -674,7 +716,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num)); printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num)); getinfo_super_imsm(st, &info); - fname_from_uuid(st, &info, nbuf,'-'); + fname_from_uuid(st, &info, nbuf, ':'); printf(" UUID : %s\n", nbuf + 5); sum = __le32_to_cpu(mpb->check_sum); printf(" Checksum : %08x %s\n", sum, @@ -700,7 +742,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost) super->current_vol = i; getinfo_super_imsm(st, &info); - fname_from_uuid(st, &info, nbuf, '-'); + fname_from_uuid(st, &info, nbuf, ':'); print_imsm_dev(dev, nbuf + 5, super->disks->index); } for (i = 0; i < mpb->num_disks; i++) { @@ -723,27 +765,42 @@ static void brief_examine_super_imsm(struct supertype *st) return; getinfo_super_imsm(st, &info); - fname_from_uuid(st, &info, nbuf,'-'); + fname_from_uuid(st, &info, nbuf, ':'); printf("ARRAY metadata=imsm auto=md UUID=%s\n", nbuf + 5); for (i = 0; i < super->anchor->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); super->current_vol = i; getinfo_super_imsm(st, &info); - fname_from_uuid(st, &info, nbuf1,'-'); + fname_from_uuid(st, &info, nbuf1, ':'); printf("ARRAY /dev/md/%.16s container=%s\n" " member=%d auto=mdp UUID=%s\n", dev->volume, nbuf + 5, i, nbuf1 + 5); } } +static void export_examine_super_imsm(struct supertype *st) +{ + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->anchor; + struct mdinfo info; + char nbuf[64]; + + getinfo_super_imsm(st, &info); + fname_from_uuid(st, &info, nbuf, ':'); + printf("MD_METADATA=imsm\n"); + printf("MD_LEVEL=container\n"); + printf("MD_UUID=%s\n", nbuf+5); + printf("MD_DEVICES=%u\n", mpb->num_disks); +} + static void detail_super_imsm(struct supertype *st, char *homehost) { struct mdinfo info; char nbuf[64]; getinfo_super_imsm(st, &info); - fname_from_uuid(st, &info, nbuf,'-'); + fname_from_uuid(st, &info, nbuf, ':'); printf("\n UUID : %s\n", nbuf + 5); } @@ -752,7 +809,7 @@ static void brief_detail_super_imsm(struct supertype *st) struct mdinfo info; char nbuf[64]; getinfo_super_imsm(st, &info); - fname_from_uuid(st, &info, nbuf,'-'); + fname_from_uuid(st, &info, nbuf, ':'); printf(" UUID=%s", nbuf + 5); } @@ -975,6 +1032,23 @@ static int detail_platform_imsm(int verbose, int enumerate_only) imsm_orom_has_raid1e(orom) ? " raid1e" : "", imsm_orom_has_raid10(orom) ? " raid10" : "", imsm_orom_has_raid5(orom) ? " raid5" : ""); + printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + imsm_orom_has_chunk(orom, 2) ? " 2k" : "", + imsm_orom_has_chunk(orom, 4) ? " 4k" : "", + imsm_orom_has_chunk(orom, 8) ? " 8k" : "", + imsm_orom_has_chunk(orom, 16) ? " 16k" : "", + imsm_orom_has_chunk(orom, 32) ? " 32k" : "", + imsm_orom_has_chunk(orom, 64) ? " 64k" : "", + imsm_orom_has_chunk(orom, 128) ? " 128k" : "", + imsm_orom_has_chunk(orom, 256) ? " 256k" : "", + imsm_orom_has_chunk(orom, 512) ? " 512k" : "", + imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "", + imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "", + imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "", + imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "", + imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "", + imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "", + imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : ""); printf(" Max Disks : %d\n", orom->tds); printf(" Max Volumes : %d\n", orom->vpa); printf(" I/O Controller : %s\n", hba_path); @@ -1115,7 +1189,11 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) struct intel_super *super = st->sb; struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); struct imsm_map *map = get_imsm_map(dev, 0); + struct dl *dl; + for (dl = super->disks; dl; dl = dl->next) + if (dl->raiddisk == info->disk.raid_disk) + break; info->container_member = super->current_vol; info->array.raid_disks = map->num_members; info->array.level = get_imsm_raid_level(map); @@ -1128,6 +1206,10 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) info->disk.major = 0; info->disk.minor = 0; + if (dl) { + info->disk.major = dl->major; + info->disk.minor = dl->minor; + } info->data_offset = __le32_to_cpu(map->pba_of_lba0); info->component_size = __le32_to_cpu(map->blocks_per_member); @@ -1136,7 +1218,8 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty) info->resync_start = 0; else if (dev->vol.migr_state) - info->resync_start = __le32_to_cpu(dev->vol.curr_migr_unit); + /* FIXME add curr_migr_unit to resync_start conversion */ + info->resync_start = 0; else info->resync_start = ~0ULL; @@ -1152,6 +1235,32 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) uuid_from_super_imsm(st, info->uuid); } +/* check the config file to see if we can return a real uuid for this spare */ +static void fixup_container_spare_uuid(struct mdinfo *inf) +{ + struct mddev_ident_s *array_list; + + if (inf->array.level != LEVEL_CONTAINER || + memcmp(inf->uuid, uuid_match_any, sizeof(int[4])) != 0) + return; + + array_list = conf_get_ident(NULL); + + for (; array_list; array_list = array_list->next) { + if (array_list->uuid_set) { + struct supertype *_sst; /* spare supertype */ + struct supertype *_cst; /* container supertype */ + + _cst = array_list->st; + _sst = _cst->ss->match_metadata_desc(inf->text_version); + if (_sst) { + memcpy(inf->uuid, array_list->uuid, sizeof(int[4])); + free(_sst); + break; + } + } + } +} static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) { @@ -1195,11 +1304,12 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) info->component_size = reserved; s = disk->status; info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0; + /* we don't change info->disk.raid_disk here because + * this state will be finalized in mdmon after we have + * found the 'most fresh' version of the metadata + */ + info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0; info->disk.state |= s & SPARE_DISK ? 0 : (1 << MD_DISK_SYNC); - if (s & FAILED_DISK || super->disks->index == -2) { - info->disk.state |= 1 << MD_DISK_FAULTY; - info->disk.raid_disk = -2; - } } /* only call uuid_from_super_imsm when this disk is part of a populated container, @@ -1207,8 +1317,10 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) */ if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs) uuid_from_super_imsm(st, info->uuid); - else + else { memcpy(info->uuid, uuid_match_any, sizeof(int[4])); + fixup_container_spare_uuid(info); + } } static int update_super_imsm(struct supertype *st, struct mdinfo *info, @@ -1402,7 +1514,10 @@ static int imsm_read_serial(int fd, char *devname, int rv; int rsp_len; int len; - char *c, *rsp_buf; + char *dest; + char *src; + char *rsp_buf; + int i; memset(scsi_serial, 0, sizeof(scsi_serial)); @@ -1422,7 +1537,6 @@ static int imsm_read_serial(int fd, char *devname, return rv; } - /* trim leading whitespace */ rsp_len = scsi_serial[3]; if (!rsp_len) { if (devname) @@ -1432,24 +1546,33 @@ static int imsm_read_serial(int fd, char *devname, return 2; } rsp_buf = (char *) &scsi_serial[4]; - c = rsp_buf; - while (isspace(*c)) - c++; - /* truncate len to the end of rsp_buf if necessary */ - if (c + MAX_RAID_SERIAL_LEN > rsp_buf + rsp_len) - len = rsp_len - (c - rsp_buf); - else + /* trim all whitespace and non-printable characters and convert + * ':' to ';' + */ + for (i = 0, dest = rsp_buf; i < rsp_len; i++) { + src = &rsp_buf[i]; + if (*src > 0x20) { + /* ':' is reserved for use in placeholder serial + * numbers for missing disks + */ + if (*src == ':') + *dest++ = ';'; + else + *dest++ = *src; + } + } + len = dest - rsp_buf; + dest = rsp_buf; + + /* truncate leading characters */ + if (len > MAX_RAID_SERIAL_LEN) { + dest += len - MAX_RAID_SERIAL_LEN; len = MAX_RAID_SERIAL_LEN; + } - /* initialize the buffer and copy rsp_buf characters */ memset(serial, 0, MAX_RAID_SERIAL_LEN); - memcpy(serial, c, len); - - /* trim trailing whitespace starting with the last character copied */ - c = (char *) &serial[len - 1]; - while (isspace(*c) || *c == '\0') - *c-- = '\0'; + memcpy(serial, dest, len); return 0; } @@ -1568,28 +1691,50 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed) * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal * map1state=unitialized) - * 3/ Verify (Resync) (migr_state=1 migr_type=MIGR_REBUILD map0state=normal + * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal * map1state=normal) * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal * map1state=degraded) */ -static void migrate(struct imsm_dev *dev, __u8 to_state, int rebuild_resync) +static void migrate(struct imsm_dev *dev, __u8 to_state, int migr_type) { struct imsm_map *dest; struct imsm_map *src = get_imsm_map(dev, 0); dev->vol.migr_state = 1; - dev->vol.migr_type = rebuild_resync; + set_migr_type(dev, migr_type); dev->vol.curr_migr_unit = 0; dest = get_imsm_map(dev, 1); + /* duplicate and then set the target end state in map[0] */ memcpy(dest, src, sizeof_imsm_map(src)); + if (migr_type == MIGR_REBUILD) { + __u32 ord; + int i; + + for (i = 0; i < src->num_members; i++) { + ord = __le32_to_cpu(src->disk_ord_tbl[i]); + set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord)); + } + } + src->map_state = to_state; } static void end_migration(struct imsm_dev *dev, __u8 map_state) { struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state); + int i; + + /* merge any IMSM_ORD_REBUILD bits that were not successfully + * completed in the last migration. + * + * FIXME add support for online capacity expansion and + * raid-level-migration + */ + for (i = 0; i < prev->num_members; i++) + map->disk_ord_tbl[i] |= prev->disk_ord_tbl[i]; dev->vol.migr_state = 0; dev->vol.curr_migr_unit = 0; @@ -1639,7 +1784,8 @@ static int parse_raid_devices(struct intel_super *super) if (posix_memalign(&buf, 512, len) != 0) return 1; - memcpy(buf, super->buf, len); + memcpy(buf, super->buf, super->len); + memset(buf + super->len, 0, len - super->len); free(super->buf); super->buf = buf; super->len = len; @@ -1724,6 +1870,17 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) sectors = mpb_sectors(anchor) - 1; free(anchor); if (!sectors) { + check_sum = __gen_imsm_checksum(super->anchor); + if (check_sum != __le32_to_cpu(super->anchor->check_sum)) { + if (devname) + fprintf(stderr, + Name ": IMSM checksum %x != %x on %s\n", + check_sum, + __le32_to_cpu(super->anchor->check_sum), + devname); + return 2; + } + rc = load_imsm_disk(fd, super, devname, 0); if (rc == 0) rc = parse_raid_devices(super); @@ -1754,7 +1911,7 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) Name ": IMSM checksum %x != %x on %s\n", check_sum, __le32_to_cpu(super->anchor->check_sum), devname); - return 2; + return 3; } /* FIXME the BBM log is disk specific so we cannot use this global @@ -1878,11 +2035,6 @@ static int find_missing(struct intel_super *super) dl = serial_to_dl(disk->serial, super); if (dl) continue; - /* ok we have a 'disk' without a live entry in - * super->disks - */ - if (disk->status & FAILED_DISK || !(disk->status & USABLE_DISK)) - continue; /* never mind, already marked */ dl = malloc(sizeof(*dl)); if (!dl) @@ -1894,6 +2046,7 @@ static int find_missing(struct intel_super *super) dl->index = i; serialcpy(dl->serial, disk->serial); dl->disk = *disk; + dl->e = NULL; dl->next = super->missing; super->missing = dl; } @@ -1912,9 +2065,16 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, char nm[20]; int dfd; int rv; + int devnum = fd2devnum(fd); + int retry; + enum sysfs_read_flags flags; + + flags = GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE; + if (mdmon_running(devnum)) + flags |= SKIP_GONE_DEVS; - /* check if this disk is a member of an active array */ - sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); + /* check if 'fd' an opened container */ + sra = sysfs_read(fd, 0, flags); if (!sra) return 1; @@ -1936,6 +2096,15 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, return 2; } rv = load_imsm_mpb(dfd, super, NULL); + + /* retry the load if we might have raced against mdmon */ + if (rv == 3 && mdmon_running(devnum)) + for (retry = 0; retry < 3; retry++) { + usleep(3000); + rv = load_imsm_mpb(dfd, super, NULL); + if (rv != 3) + break; + } if (!keep_fd) close(dfd); if (rv == 0) { @@ -1949,7 +2118,7 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, } } else { free_imsm(super); - return 2; + return rv; } } @@ -1999,7 +2168,7 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, } *sbp = super; - st->container_dev = fd2devnum(fd); + st->container_dev = devnum; if (st->ss == NULL) { st->ss = &super_imsm; st->minor_version = 0; @@ -2060,13 +2229,12 @@ static __u16 info_to_blocks_per_strip(mdu_array_info_t *info) return info->chunk_size >> 9; } -static __u32 info_to_num_data_stripes(mdu_array_info_t *info) +static __u32 info_to_num_data_stripes(mdu_array_info_t *info, int num_domains) { __u32 num_stripes; num_stripes = (info->size * 2) / info_to_blocks_per_strip(info); - if (info->level == 1) - num_stripes /= 2; + num_stripes /= num_domains; return num_stripes; } @@ -2143,6 +2311,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, int i; unsigned long long array_blocks; size_t size_old, size_new; + __u32 num_data_stripes; if (super->orom && mpb->num_raid_devs >= super->orom->vpa) { fprintf(stderr, Name": This imsm-container already has the " @@ -2211,14 +2380,14 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, dev->reserved_blocks = __cpu_to_le32(0); vol = &dev->vol; vol->migr_state = 0; - vol->migr_type = MIGR_INIT; + set_migr_type(dev, MIGR_INIT); vol->dirty = 0; vol->curr_migr_unit = 0; map = get_imsm_map(dev, 0); map->pba_of_lba0 = __cpu_to_le32(super->create_offset); map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info)); map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info)); - map->num_data_stripes = __cpu_to_le32(info_to_num_data_stripes(info)); + map->failed_disk_num = ~0; map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL; @@ -2232,8 +2401,10 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, map->num_domains = info->raid_disks / 2; } else { map->raid_level = info->level; - map->num_domains = !!map->raid_level; + map->num_domains = 1; } + num_data_stripes = info_to_num_data_stripes(info, map->num_domains); + map->num_data_stripes = __cpu_to_le32(num_data_stripes); map->num_members = info->raid_disks; for (i = 0; i < map->num_members; i++) { @@ -2319,10 +2490,19 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, return 1; } - for (dl = super->disks; dl ; dl = dl->next) - if (dl->major == dk->major && - dl->minor == dk->minor) - break; + if (fd == -1) { + /* we're doing autolayout so grab the pre-marked (in + * validate_geometry) raid_disk + */ + for (dl = super->disks; dl; dl = dl->next) + if (dl->raiddisk == dk->raid_disk) + break; + } else { + for (dl = super->disks; dl ; dl = dl->next) + if (dl->major == dk->major && + dl->minor == dk->minor) + break; + } if (!dl) { fprintf(stderr, Name ": %s is not a member of the same container\n", devname); @@ -2388,6 +2568,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, dd->index = -1; dd->devname = devname ? strdup(devname) : NULL; dd->fd = fd; + dd->e = NULL; rv = imsm_read_serial(fd, devname, dd->serial); if (rv) { fprintf(stderr, @@ -2715,7 +2896,7 @@ static unsigned long long merge_extents(struct intel_super *super, int sum_exten int i, j; int start_extent; unsigned long long pos; - unsigned long long start; + unsigned long long start = 0; unsigned long long maxsize; unsigned long reserve; @@ -2972,6 +3153,78 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, return 1; } +static int reserve_space(struct supertype *st, int raiddisks, + unsigned long long size, int chunk, + unsigned long long *freesize) +{ + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->anchor; + struct dl *dl; + int i; + int extent_cnt; + struct extent *e; + unsigned long long maxsize; + unsigned long long minsize; + int cnt; + int used; + + /* find the largest common start free region of the possible disks */ + used = 0; + extent_cnt = 0; + cnt = 0; + for (dl = super->disks; dl; dl = dl->next) { + dl->raiddisk = -1; + + if (dl->index >= 0) + used++; + + /* don't activate new spares if we are orom constrained + * and there is already a volume active in the container + */ + if (super->orom && dl->index < 0 && mpb->num_raid_devs) + continue; + + e = get_extents(super, dl); + if (!e) + continue; + for (i = 1; e[i-1].size; i++) + ; + dl->e = e; + dl->extent_cnt = i; + extent_cnt += i; + cnt++; + } + + maxsize = merge_extents(super, extent_cnt); + minsize = size; + if (size == 0) + minsize = chunk; + + if (cnt < raiddisks || + (super->orom && used && used != raiddisks) || + maxsize < minsize) { + fprintf(stderr, Name ": not enough devices with space to create array.\n"); + return 0; /* No enough free spaces large enough */ + } + + if (size == 0) { + size = maxsize; + if (chunk) { + size /= chunk; + size *= chunk; + } + } + + cnt = 0; + for (dl = super->disks; dl; dl = dl->next) + if (dl->e) + dl->raiddisk = cnt++; + + *freesize = size; + + return 1; +} + static int validate_geometry_imsm(struct supertype *st, int level, int layout, int raiddisks, int chunk, unsigned long long size, char *dev, unsigned long long *freesize, @@ -2993,9 +3246,15 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, if (!dev) { if (st->sb && freesize) { - /* Should do auto-layout here */ - fprintf(stderr, Name ": IMSM does not support auto-layout yet\n"); - return 0; + /* we are being asked to automatically layout a + * new volume based on the current contents of + * the container. If the the parameters can be + * satisfied reserve_space will record the disks, + * start offset, and size of the volume to be + * created. add_to_super and getinfo_super + * detect when autolayout is in progress. + */ + return reserve_space(st, raiddisks, size, chunk, freesize); } return 1; } @@ -3097,6 +3356,18 @@ static struct mdinfo *container_content_imsm(struct supertype *st) struct mdinfo *this; int slot; + /* do not publish arrays that are in the middle of an + * unsupported migration + */ + if (dev->vol.migr_state && + (migr_type(dev) == MIGR_GEN_MIGR || + migr_type(dev) == MIGR_STATE_CHANGE)) { + fprintf(stderr, Name ": cannot assemble volume '%.16s':" + " unsupported migration in progress\n", + dev->volume); + continue; + } + this = malloc(sizeof(*this)); memset(this, 0, sizeof(*this)); this->next = rest; @@ -3143,7 +3414,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st) info_d = malloc(sizeof(*info_d)); if (!info_d) { fprintf(stderr, Name ": failed to allocate disk" - " for volume %s\n", (char *) dev->volume); + " for volume %.16s\n", dev->volume); free(this); this = rest; break; @@ -3263,10 +3534,23 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev) int failed = 0; struct imsm_disk *disk; struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state); + __u32 ord; + int idx; - for (i = 0; i < map->num_members; i++) { - __u32 ord = get_imsm_ord_tbl_ent(dev, i); - int idx = ord_to_idx(ord); + /* at the beginning of migration we set IMSM_ORD_REBUILD on + * disks that are being rebuilt. New failures are recorded to + * map[0]. So we look through all the disks we started with and + * see if any failures are still present, or if any new ones + * have arrived + * + * FIXME add support for online capacity expansion and + * raid-level-migration + */ + for (i = 0; i < prev->num_members; i++) { + ord = __le32_to_cpu(prev->disk_ord_tbl[i]); + ord |= __le32_to_cpu(map->disk_ord_tbl[i]); + idx = ord_to_idx(ord); disk = get_imsm_disk(super, idx); if (!disk || disk->status & FAILED_DISK || @@ -3284,7 +3568,8 @@ static int is_resyncing(struct imsm_dev *dev) if (!dev->vol.migr_state) return 0; - if (dev->vol.migr_type == MIGR_INIT) + if (migr_type(dev) == MIGR_INIT || + migr_type(dev) == MIGR_REPAIR) return 1; migr_map = get_imsm_map(dev, 1); @@ -3302,7 +3587,7 @@ static int is_rebuilding(struct imsm_dev *dev) if (!dev->vol.migr_state) return 0; - if (dev->vol.migr_type != MIGR_REBUILD) + if (migr_type(dev) != MIGR_REBUILD) return 0; migr_map = get_imsm_map(dev, 1); @@ -3313,11 +3598,38 @@ static int is_rebuilding(struct imsm_dev *dev) return 0; } -static void mark_failure(struct imsm_disk *disk) +/* return true if we recorded new information */ +static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) { - if (disk->status & FAILED_DISK) - return; + __u32 ord; + int slot; + struct imsm_map *map; + + /* new failures are always set in map[0] */ + map = get_imsm_map(dev, 0); + + slot = get_imsm_disk_slot(map, idx); + if (slot < 0) + return 0; + + ord = __le32_to_cpu(map->disk_ord_tbl[slot]); + if ((disk->status & FAILED_DISK) && (ord & IMSM_ORD_REBUILD)) + return 0; + disk->status |= FAILED_DISK; + set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD); + if (map->failed_disk_num == ~0) + map->failed_disk_num = slot; + return 1; +} + +static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx) +{ + mark_failure(dev, disk, idx); + + if (disk->scsi_id == __cpu_to_le32(~(__u32)0)) + return; + disk->scsi_id = __cpu_to_le32(~(__u32)0); memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1); } @@ -3343,7 +3655,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent) dprintf("imsm: mark missing\n"); end_migration(dev, map_state); for (dl = super->missing; dl; dl = dl->next) - mark_failure(&dl->disk); + mark_missing(dev, &dl->disk, dl->index); super->updates_pending++; } @@ -3355,7 +3667,8 @@ static int imsm_set_array_state(struct active_array *a, int consistent) if (is_resync_complete(a)) { /* complete intialization / resync, - * recovery is completed in ->set_disk + * recovery and interrupted recovery is completed in + * ->set_disk */ if (is_resyncing(dev)) { dprintf("imsm: mark resync done\n"); @@ -3365,20 +3678,14 @@ static int imsm_set_array_state(struct active_array *a, int consistent) } else if (!is_resyncing(dev) && !failed) { /* mark the start of the init process if nothing is failed */ dprintf("imsm: mark resync start (%llu)\n", a->resync_start); - if (map->map_state == IMSM_T_STATE_NORMAL) - migrate(dev, IMSM_T_STATE_NORMAL, MIGR_REBUILD); - else + if (map->map_state == IMSM_T_STATE_UNINITIALIZED) migrate(dev, IMSM_T_STATE_NORMAL, MIGR_INIT); + else + migrate(dev, IMSM_T_STATE_NORMAL, MIGR_REPAIR); super->updates_pending++; } - /* check if we can update the migration checkpoint */ - if (dev->vol.migr_state && - __le32_to_cpu(dev->vol.curr_migr_unit) != a->resync_start) { - dprintf("imsm: checkpoint migration (%llu)\n", a->resync_start); - dev->vol.curr_migr_unit = __cpu_to_le32(a->resync_start); - super->updates_pending++; - } + /* FIXME check if we can update curr_migr_unit from resync_start */ /* mark dirty / clean */ if (dev->vol.dirty != !consistent) { @@ -3417,13 +3724,13 @@ static void imsm_set_disk(struct active_array *a, int n, int state) disk = get_imsm_disk(super, ord_to_idx(ord)); /* check for new failures */ - if ((state & DS_FAULTY) && !(disk->status & FAILED_DISK)) { - mark_failure(disk); - super->updates_pending++; + if (state & DS_FAULTY) { + if (mark_failure(dev, disk, ord_to_idx(ord))) + super->updates_pending++; } /* check if in_sync */ - if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD) { + if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) { struct imsm_map *migr_map = get_imsm_map(dev, 1); set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord)); @@ -3436,6 +3743,8 @@ static void imsm_set_disk(struct active_array *a, int n, int state) /* check if recovery complete, newly degraded, or failed */ if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) { end_migration(dev, map_state); + map = get_imsm_map(dev, 0); + map->failed_disk_num = ~0; super->updates_pending++; } else if (map_state == IMSM_T_STATE_DEGRADED && map->map_state != map_state && @@ -3543,11 +3852,11 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, continue; /* skip in use or failed drives */ - if (dl->disk.status & FAILED_DISK || idx == dl->index) { - dprintf("%x:%x status ( %s%s)\n", - dl->major, dl->minor, - dl->disk.status & FAILED_DISK ? "failed " : "", - idx == dl->index ? "in use " : ""); + if (dl->disk.status & FAILED_DISK || idx == dl->index || + dl->index == -2) { + dprintf("%x:%x status (failed: %d index: %d)\n", + dl->major, dl->minor, + (dl->disk.status & FAILED_DISK) == FAILED_DISK, idx); continue; } @@ -3572,10 +3881,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, /* check if this disk is already a member of * this array */ - for (j = 0; j < map->num_members; j++) - if (get_imsm_disk_idx(dev, j) == dl->index) - break; - if (j < map->num_members) + if (get_imsm_disk_slot(map, dl->index) >= 0) continue; found = 0; @@ -3878,9 +4184,10 @@ static void imsm_process_update(struct supertype *st, found = 0; for (a = st->arrays; a ; a = a->next) { dev = get_imsm_dev(super, a->info.container_member); - for (i = 0; i < map->num_members; i++) - if (victim == get_imsm_disk_idx(dev, i)) - found++; + map = get_imsm_map(dev, 0); + + if (get_imsm_disk_slot(map, victim) >= 0) + found++; } /* delete the victim if it is no longer being @@ -4122,7 +4429,9 @@ static void imsm_prepare_update(struct supertype *st, free(super->next_buf); super->next_len = buf_len; - if (posix_memalign(&super->next_buf, 512, buf_len) != 0) + if (posix_memalign(&super->next_buf, 512, buf_len) == 0) + memset(super->next_buf, 0, buf_len); + else super->next_buf = NULL; } } @@ -4184,6 +4493,7 @@ struct superswitch super_imsm = { #ifndef MDASSEMBLE .examine_super = examine_super_imsm, .brief_examine_super = brief_examine_super_imsm, + .export_examine_super = export_examine_super_imsm, .detail_super = detail_super_imsm, .brief_detail_super = brief_detail_super_imsm, .write_init_super = write_init_super_imsm,