X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=super-intel.c;h=0178f289f51f942a980fa125ec688fba8f9c8f4e;hb=bb025c2f22c9c0cdf2d77c7a0e8298b3e3972538;hp=f438044e82587daf40364c55b000d6de14482c34;hpb=64436f0628a14f4e979b93bea57aba4b4c6143e8;p=thirdparty%2Fmdadm.git diff --git a/super-intel.c b/super-intel.c index f438044e..0178f289 100644 --- a/super-intel.c +++ b/super-intel.c @@ -233,6 +233,10 @@ struct intel_dev { unsigned index; }; +enum action { + DISK_REMOVE = 1, + DISK_ADD +}; /* internal representation of IMSM metadata */ struct intel_super { union { @@ -258,8 +262,10 @@ struct intel_super { int extent_cnt; struct extent *e; /* for determining freespace @ create */ int raiddisk; /* slot to fill in autolayout */ + enum action action; } *disks; - struct dl *add; /* list of disks to add while mdmon active */ + struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon + active */ struct dl *missing; /* disks removed while we weren't looking */ struct bbm_log *bbm_log; const char *hba; /* device path of the raid controller for this metadata */ @@ -278,13 +284,22 @@ struct extent { unsigned long long start, size; }; +/* definitions of reshape process types */ +enum imsm_reshape_type { + CH_TAKEOVER, + CH_CHUNK_MIGR, + CH_LEVEL_MIGRATION +}; + /* definition of messages passed to imsm_process_update */ enum imsm_update_type { update_activate_spare, update_create_array, update_kill_array, update_rename_array, - update_add_disk, + update_add_remove_disk, + update_reshape_container_disks, + update_takeover }; struct imsm_update_activate_spare { @@ -295,6 +310,33 @@ struct imsm_update_activate_spare { struct imsm_update_activate_spare *next; }; +struct geo_params { + int dev_id; + char *dev_name; + long long size; + int level; + int layout; + int chunksize; + int raid_disks; +}; + +enum takeover_direction { + R10_TO_R0, + R0_TO_R10 +}; +struct imsm_update_takeover { + enum imsm_update_type type; + int subarray; + enum takeover_direction direction; +}; + +struct imsm_update_reshape { + enum imsm_update_type type; + int old_raid_disks; + int new_raid_disks; + int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */ +}; + struct disk_info { __u8 serial[MAX_RAID_SERIAL_LEN]; }; @@ -316,7 +358,7 @@ struct imsm_update_rename_array { int dev_idx; }; -struct imsm_update_add_disk { +struct imsm_update_add_remove_disk { enum imsm_update_type type; }; @@ -333,6 +375,7 @@ static struct supertype *match_metadata_desc_imsm(char *arg) if (!st) return NULL; memset(st, 0, sizeof(*st)); + st->container_dev = NoMdDev; st->ss = &super_imsm; st->max_devs = IMSM_MAX_DEVICES; st->minor_version = 0; @@ -357,15 +400,28 @@ static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index) return &mpb->disk[index]; } -/* retrieve a disk from the parsed metadata */ -static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index) +/* retrieve the disk description based on a index of the disk + * in the sub-array + */ +static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index) { struct dl *d; for (d = super->disks; d; d = d->next) if (d->index == index) - return &d->disk; - + return d; + + return NULL; +} +/* retrieve a disk from the parsed metadata */ +static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index) +{ + struct dl *dl; + + dl = get_imsm_dl_disk(super, index); + if (dl) + return &dl->disk; + return NULL; } @@ -470,23 +526,35 @@ static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index) return NULL; } -static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot) +/* + * for second_map: + * == 0 get first map + * == 1 get second map + * == -1 than get map according to the current migr_state + */ +static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, + int slot, + int second_map) { struct imsm_map *map; - if (dev->vol.migr_state) - map = get_imsm_map(dev, 1); - else - map = get_imsm_map(dev, 0); + if (second_map == -1) { + if (dev->vol.migr_state) + map = get_imsm_map(dev, 1); + else + map = get_imsm_map(dev, 0); + } else { + map = get_imsm_map(dev, second_map); + } /* top byte identifies disk under rebuild */ return __le32_to_cpu(map->disk_ord_tbl[slot]); } #define ord_to_idx(ord) (((ord) << 8) >> 8) -static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot) +static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map) { - __u32 ord = get_imsm_ord_tbl_ent(dev, slot); + __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map); return ord_to_idx(ord); } @@ -646,6 +714,37 @@ static int is_failed(struct imsm_disk *disk) return (disk->status & FAILED_DISK) == FAILED_DISK; } +/* Return minimum size of a spare that can be used in this array*/ +static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st) +{ + struct intel_super *super = st->sb; + struct dl *dl; + struct extent *e; + int i; + unsigned long long rv = 0; + + if (!super) + return rv; + /* find first active disk in array */ + dl = super->disks; + while (dl && (is_failed(&dl->disk) || dl->index == -1)) + dl = dl->next; + if (!dl) + return rv; + /* find last lba used by subarrays */ + e = get_extents(super, dl); + if (!e) + return rv; + for (i = 0; e[i].size; i++) + continue; + if (i > 0) + rv = e[i-1].start + e[i-1].size; + free(e); + /* add the amount of space needed for metadata */ + rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + return rv * 512; +} + #ifndef MDASSEMBLE static __u64 blocks_per_migr_unit(struct imsm_dev *dev); @@ -654,22 +753,38 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) __u64 sz; int slot, i; struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map2 = get_imsm_map(dev, 1); __u32 ord; printf("\n"); printf("[%.16s]:\n", dev->volume); printf(" UUID : %s\n", uuid); - printf(" RAID Level : %d\n", get_imsm_raid_level(map)); - printf(" Members : %d\n", map->num_members); + printf(" RAID Level : %d", get_imsm_raid_level(map)); + if (map2) + printf(" <-- %d", get_imsm_raid_level(map2)); + printf("\n"); + printf(" Members : %d", map->num_members); + if (map2) + printf(" <-- %d", map2->num_members); + printf("\n"); printf(" Slots : ["); for (i = 0; i < map->num_members; i++) { - ord = get_imsm_ord_tbl_ent(dev, i); + ord = get_imsm_ord_tbl_ent(dev, i, 0); printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U"); } - printf("]\n"); + printf("]"); + if (map2) { + printf(" <-- ["); + for (i = 0; i < map2->num_members; i++) { + ord = get_imsm_ord_tbl_ent(dev, i, 1); + printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U"); + } + printf("]"); + } + printf("\n"); slot = get_imsm_disk_slot(map, disk_idx); if (slot >= 0) { - ord = get_imsm_ord_tbl_ent(dev, slot); + ord = get_imsm_ord_tbl_ent(dev, slot, -1); printf(" This Slot : %d%s\n", slot, ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : ""); } else @@ -686,8 +801,12 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) __le32_to_cpu(map->pba_of_lba0)); printf(" Num Stripes : %u\n", __le32_to_cpu(map->num_data_stripes)); - printf(" Chunk Size : %u KiB\n", + printf(" Chunk Size : %u KiB", __le16_to_cpu(map->blocks_per_strip) / 2); + if (map2) + printf(" <-- %u KiB", + __le16_to_cpu(map2->blocks_per_strip) / 2); + printf("\n"); printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks)); printf(" Migrate State : "); if (dev->vol.migr_state) { @@ -741,7 +860,7 @@ static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved) human_size(sz * 512)); } -static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info); +static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map); static void examine_super_imsm(struct supertype *st, char *homehost) { @@ -753,7 +872,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost) char nbuf[64]; __u32 sum; __u32 reserved = imsm_reserved_sectors(super, super->disks); - + struct dl *dl; snprintf(str, MPB_SIG_LEN, "%s", mpb->sig); printf(" Magic : %s\n", str); @@ -762,7 +881,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num)); printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num)); printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num)); - getinfo_super_imsm(st, &info); + getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); printf(" UUID : %s\n", nbuf + 5); sum = __le32_to_cpu(mpb->check_sum); @@ -789,7 +908,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost) struct imsm_dev *dev = __get_imsm_dev(mpb, i); super->current_vol = i; - getinfo_super_imsm(st, &info); + getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); print_imsm_dev(dev, nbuf + 5, super->disks->index); } @@ -798,6 +917,26 @@ static void examine_super_imsm(struct supertype *st, char *homehost) continue; print_imsm_disk(mpb, i, reserved); } + for (dl = super->disks ; dl; dl = dl->next) { + struct imsm_disk *disk; + char str[MAX_RAID_SERIAL_LEN + 1]; + __u64 sz; + + if (dl->index >= 0) + continue; + + disk = &dl->disk; + printf("\n"); + snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); + printf(" Disk Serial : %s\n", str); + printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", + is_configured(disk) ? " active" : "", + is_failed(disk) ? " failed" : ""); + printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id)); + sz = __le32_to_cpu(disk->total_blocks) - reserved; + printf(" Usable Size : %llu%s\n", (unsigned long long)sz, + human_size(sz * 512)); + } } static void brief_examine_super_imsm(struct supertype *st, int verbose) @@ -812,7 +951,7 @@ static void brief_examine_super_imsm(struct supertype *st, int verbose) return; } - getinfo_super_imsm(st, &info); + getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5); } @@ -829,13 +968,13 @@ static void brief_examine_subarrays_imsm(struct supertype *st, int verbose) if (!super->anchor->num_raid_devs) return; - getinfo_super_imsm(st, &info); + getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); for (i = 0; i < super->anchor->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); super->current_vol = i; - getinfo_super_imsm(st, &info); + getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf1, ':'); printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n", dev->volume, nbuf + 5, i, nbuf1 + 5); @@ -849,7 +988,7 @@ static void export_examine_super_imsm(struct supertype *st) struct mdinfo info; char nbuf[64]; - getinfo_super_imsm(st, &info); + getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); printf("MD_METADATA=imsm\n"); printf("MD_LEVEL=container\n"); @@ -862,7 +1001,7 @@ static void detail_super_imsm(struct supertype *st, char *homehost) struct mdinfo info; char nbuf[64]; - getinfo_super_imsm(st, &info); + getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); printf("\n UUID : %s\n", nbuf + 5); } @@ -871,7 +1010,7 @@ static void brief_detail_super_imsm(struct supertype *st) { struct mdinfo info; char nbuf[64]; - getinfo_super_imsm(st, &info); + getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); printf(" UUID=%s", nbuf + 5); } @@ -1307,12 +1446,12 @@ static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev) return num_stripes_per_unit_resync(dev); } -static __u8 imsm_num_data_members(struct imsm_dev *dev) +static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map) { /* named 'imsm_' because raid0, raid1 and raid10 * counter-intuitively have the same number of data disks */ - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, second_map); switch (get_imsm_raid_level(map)) { case 0: @@ -1376,6 +1515,7 @@ static __u64 blocks_per_migr_unit(struct imsm_dev *dev) return 0; switch (migr_type(dev)) { + case MIGR_GEN_MIGR: case MIGR_VERIFY: case MIGR_REPAIR: case MIGR_INIT: { @@ -1395,7 +1535,7 @@ static __u64 blocks_per_migr_unit(struct imsm_dev *dev) */ stripes_per_unit = num_stripes_per_unit_resync(dev); migr_chunk = migr_strip_blocks_resync(dev); - disks = imsm_num_data_members(dev); + disks = imsm_num_data_members(dev, 0); blocks_per_unit = stripes_per_unit * migr_chunk * disks; stripe = __le32_to_cpu(map->blocks_per_strip) * disks; segment = blocks_per_unit / stripe; @@ -1412,7 +1552,6 @@ static __u64 blocks_per_migr_unit(struct imsm_dev *dev) migr_chunk = migr_strip_blocks_rebuild(dev); return migr_chunk * stripes_per_unit; } - case MIGR_GEN_MIGR: case MIGR_STATE_CHANGE: default: return 0; @@ -1434,30 +1573,45 @@ static int imsm_level_to_layout(int level) return UnSet; } -static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) +static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap) { struct intel_super *super = st->sb; struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *prev_map = get_imsm_map(dev, 1); + struct imsm_map *map_to_analyse = map; struct dl *dl; char *devname; + int map_disks = info->array.raid_disks; + + if (prev_map) + map_to_analyse = prev_map; for (dl = super->disks; dl; dl = dl->next) if (dl->raiddisk == info->disk.raid_disk) break; info->container_member = super->current_vol; - info->array.raid_disks = map->num_members; - info->array.level = get_imsm_raid_level(map); + info->array.raid_disks = map_to_analyse->num_members; + info->array.level = get_imsm_raid_level(map_to_analyse); info->array.layout = imsm_level_to_layout(info->array.level); info->array.md_minor = -1; info->array.ctime = 0; info->array.utime = 0; - info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9; + info->array.chunk_size = + __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9; info->array.state = !dev->vol.dirty; info->custom_array_size = __le32_to_cpu(dev->size_high); info->custom_array_size <<= 32; info->custom_array_size |= __le32_to_cpu(dev->size_low); - + if (prev_map) { + info->new_level = get_imsm_raid_level(map); + info->new_layout = imsm_level_to_layout(info->new_level); + info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9; + } else { + info->new_level = UnSet; + info->new_layout = UnSet; + info->new_chunk = info->array.chunk_size; + } info->disk.major = 0; info->disk.minor = 0; if (dl) { @@ -1465,13 +1619,19 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) info->disk.minor = dl->minor; } - info->data_offset = __le32_to_cpu(map->pba_of_lba0); - info->component_size = __le32_to_cpu(map->blocks_per_member); + info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0); + info->component_size = + __le32_to_cpu(map_to_analyse->blocks_per_member); memset(info->uuid, 0, sizeof(info->uuid)); info->recovery_start = MaxSector; - info->reshape_active = 0; + info->reshape_active = (prev_map != NULL); + if (info->reshape_active) + info->delta_disks = map->num_members - prev_map->num_members; + else + info->delta_disks = 0; - if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty) { + if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED || + dev->vol.dirty) { info->resync_start = 0; } else if (dev->vol.migr_state) { switch (migr_type(dev)) { @@ -1512,40 +1672,22 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) free(devname); info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */ uuid_from_super_imsm(st, info->uuid); -} -/* check the config file to see if we can return a real uuid for this spare */ -static void fixup_container_spare_uuid(struct mdinfo *inf) -{ - struct mddev_ident_s *array_list; - - if (inf->array.level != LEVEL_CONTAINER || - memcmp(inf->uuid, uuid_match_any, sizeof(int[4])) != 0) - return; - - array_list = conf_get_ident(NULL); - - for (; array_list; array_list = array_list->next) { - if (array_list->uuid_set) { - struct supertype *_sst; /* spare supertype */ - struct supertype *_cst; /* container supertype */ - - _cst = array_list->st; - if (_cst) - _sst = _cst->ss->match_metadata_desc(inf->text_version); - else - _sst = NULL; - - if (_sst) { - memcpy(inf->uuid, array_list->uuid, sizeof(int[4])); - free(_sst); - break; + if (dmap) { + int i, j; + for (i=0; iarray.raid_disks) { + struct imsm_disk *dsk; + j = get_imsm_disk_idx(dev, i, -1); + dsk = get_imsm_disk(super, j); + if (dsk && (dsk->status & CONFIGURED_DISK)) + dmap[i] = 1; } } } } - static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed); static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev); @@ -1559,13 +1701,17 @@ static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index) return NULL; } -static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) +static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map) { struct intel_super *super = st->sb; struct imsm_disk *disk; + int map_disks = info->array.raid_disks; + int max_enough = -1; + int i; + struct imsm_super *mpb; if (super->current_vol >= 0) { - getinfo_super_imsm_volume(st, info); + getinfo_super_imsm_volume(st, info, map); return; } @@ -1594,51 +1740,47 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) info->recovery_start = MaxSector; /* do we have the all the insync disks that we expect? */ - if (st->loaded_container) { - struct imsm_super *mpb = super->anchor; - int max_enough = -1, i; + mpb = super->anchor; - for (i = 0; i < mpb->num_raid_devs; i++) { - struct imsm_dev *dev = get_imsm_dev(super, i); - int failed, enough, j, missing = 0; - struct imsm_map *map; - __u8 state; + for (i = 0; i < mpb->num_raid_devs; i++) { + struct imsm_dev *dev = get_imsm_dev(super, i); + int failed, enough, j, missing = 0; + struct imsm_map *map; + __u8 state; - failed = imsm_count_failed(super, dev); - state = imsm_check_degraded(super, dev, failed); - map = get_imsm_map(dev, dev->vol.migr_state); + failed = imsm_count_failed(super, dev); + state = imsm_check_degraded(super, dev, failed); + map = get_imsm_map(dev, dev->vol.migr_state); - /* any newly missing disks? - * (catches single-degraded vs double-degraded) - */ - for (j = 0; j < map->num_members; j++) { - __u32 ord = get_imsm_ord_tbl_ent(dev, i); - __u32 idx = ord_to_idx(ord); + /* any newly missing disks? + * (catches single-degraded vs double-degraded) + */ + for (j = 0; j < map->num_members; j++) { + __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1); + __u32 idx = ord_to_idx(ord); - if (!(ord & IMSM_ORD_REBUILD) && - get_imsm_missing(super, idx)) { - missing = 1; - break; - } + if (!(ord & IMSM_ORD_REBUILD) && + get_imsm_missing(super, idx)) { + missing = 1; + break; } + } - if (state == IMSM_T_STATE_FAILED) - enough = -1; - else if (state == IMSM_T_STATE_DEGRADED && - (state != map->map_state || missing)) - enough = 0; - else /* we're normal, or already degraded */ - enough = 1; + if (state == IMSM_T_STATE_FAILED) + enough = -1; + else if (state == IMSM_T_STATE_DEGRADED && + (state != map->map_state || missing)) + enough = 0; + else /* we're normal, or already degraded */ + enough = 1; - /* in the missing/failed disk case check to see - * if at least one array is runnable - */ - max_enough = max(max_enough, enough); - } - dprintf("%s: enough: %d\n", __func__, max_enough); - info->container_enough = max_enough; - } else - info->container_enough = -1; + /* in the missing/failed disk case check to see + * if at least one array is runnable + */ + max_enough = max(max_enough, enough); + } + dprintf("%s: enough: %d\n", __func__, max_enough); + info->container_enough = max_enough; if (super->disks) { __u32 reserved = imsm_reserved_sectors(super, super->disks); @@ -1660,10 +1802,61 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) */ if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs) uuid_from_super_imsm(st, info->uuid); - else { - memcpy(info->uuid, uuid_match_any, sizeof(int[4])); - fixup_container_spare_uuid(info); + else + memcpy(info->uuid, uuid_zero, sizeof(uuid_zero)); + + /* I don't know how to compute 'map' on imsm, so use safe default */ + if (map) { + int i; + for (i = 0; i < map_disks; i++) + map[i] = 1; + } + +} + +/* allocates memory and fills disk in mdinfo structure + * for each disk in array */ +struct mdinfo *getinfo_super_disks_imsm(struct supertype *st) +{ + struct mdinfo *mddev = NULL; + struct intel_super *super = st->sb; + struct imsm_disk *disk; + int count = 0; + struct dl *dl; + if (!super || !super->disks) + return NULL; + dl = super->disks; + mddev = malloc(sizeof(*mddev)); + if (!mddev) { + fprintf(stderr, Name ": Failed to allocate memory.\n"); + return NULL; } + memset(mddev, 0, sizeof(*mddev)); + while (dl) { + struct mdinfo *tmp; + disk = &dl->disk; + tmp = malloc(sizeof(*tmp)); + if (!tmp) { + fprintf(stderr, Name ": Failed to allocate memory.\n"); + if (mddev) + sysfs_free(mddev); + return NULL; + } + memset(tmp, 0, sizeof(*tmp)); + if (mddev->devs) + tmp->next = mddev->devs; + mddev->devs = tmp; + tmp->disk.number = count++; + tmp->disk.major = dl->major; + tmp->disk.minor = dl->minor; + tmp->disk.state = is_configured(disk) ? + (1 << MD_DISK_ACTIVE) : 0; + tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0; + tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC); + tmp->disk.raid_disk = -1; + dl = dl->next; + } + return mddev; } static int update_super_imsm(struct supertype *st, struct mdinfo *info, @@ -1705,8 +1898,7 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info, mpb = super->anchor; if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private) - fprintf(stderr, - Name ": '--uuid' not supported for imsm metadata\n"); + rv = -1; else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) { mpb->orig_family_num = *((__u32 *) info->update_private); rv = 0; @@ -1727,9 +1919,7 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info, } else if (strcmp(update, "assemble") == 0) rv = 0; else - fprintf(stderr, - Name ": '--update=%s' not supported for imsm metadata\n", - update); + rv = -1; /* successful update? recompute checksum */ if (rv == 0) @@ -2086,7 +2276,8 @@ static void migrate(struct imsm_dev *dev, __u8 to_state, int migr_type) /* duplicate and then set the target end state in map[0] */ memcpy(dest, src, sizeof_imsm_map(src)); - if (migr_type == MIGR_REBUILD) { + if ((migr_type == MIGR_REBUILD) || + (migr_type == MIGR_GEN_MIGR)) { __u32 ord; int i; @@ -2103,18 +2294,26 @@ static void end_migration(struct imsm_dev *dev, __u8 map_state) { struct imsm_map *map = get_imsm_map(dev, 0); struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state); - int i; + int i, j; /* merge any IMSM_ORD_REBUILD bits that were not successfully * completed in the last migration. * - * FIXME add support for online capacity expansion and - * raid-level-migration + * FIXME add support for raid-level-migration */ for (i = 0; i < prev->num_members; i++) - map->disk_ord_tbl[i] |= prev->disk_ord_tbl[i]; + for (j = 0; j < map->num_members; j++) + /* during online capacity expansion + * disks position can be changed if takeover is used + */ + if (ord_to_idx(map->disk_ord_tbl[j]) == + ord_to_idx(prev->disk_ord_tbl[i])) { + map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i]; + break; + } dev->vol.migr_state = 0; + dev->vol.migr_type = 0; dev->vol.curr_migr_unit = 0; map->map_state = map_state; } @@ -2331,6 +2530,7 @@ static void __free_imsm_disk(struct dl *d) free(d); } + static void free_imsm_disks(struct intel_super *super) { struct dl *d; @@ -2763,7 +2963,7 @@ imsm_thunderdome(struct intel_super **super_list, int len) } static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, - char *devname, int keep_fd) + char *devname) { struct mdinfo *sra; struct intel_super *super_list = NULL; @@ -2799,22 +2999,20 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, err = 2; sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); - dfd = dev_open(nm, keep_fd ? O_RDWR : O_RDONLY); + dfd = dev_open(nm, O_RDWR); if (dfd < 0) goto error; - err = load_and_parse_mpb(dfd, s, NULL, keep_fd); + err = load_and_parse_mpb(dfd, s, NULL, 1); /* retry the load if we might have raced against mdmon */ if (err == 3 && mdmon_running(devnum)) for (retry = 0; retry < 3; retry++) { usleep(3000); - err = load_and_parse_mpb(dfd, s, NULL, keep_fd); + err = load_and_parse_mpb(dfd, s, NULL, 1); if (err != 3) break; } - if (!keep_fd) - close(dfd); if (err) goto error; } @@ -2831,25 +3029,6 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, err = 2; goto error; } - - if (st->subarray[0]) { - unsigned long val; - char *ep; - - err = 1; - val = strtoul(st->subarray, &ep, 10); - if (*ep != '\0') { - free_imsm(super); - goto error; - } - - if (val < super->anchor->num_raid_devs) - super->current_vol = val; - else { - free_imsm(super); - goto error; - } - } err = 0; error: @@ -2871,10 +3050,13 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, st->minor_version = 0; st->max_devs = IMSM_MAX_DEVICES; } - st->loaded_container = 1; - return 0; } + +static int load_container_imsm(struct supertype *st, int fd, char *devname) +{ + return load_super_imsm_all(st, fd, &st->sb, devname); +} #endif static int load_super_imsm(struct supertype *st, int fd, char *devname) @@ -2882,11 +3064,6 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) struct intel_super *super; int rv; -#ifndef MDASSEMBLE - if (load_super_imsm_all(st, fd, &st->sb, devname, 1) == 0) - return 0; -#endif - if (test_partition(fd)) /* IMSM not allowed on partitions */ return 1; @@ -2912,32 +3089,12 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) return rv; } - if (st->subarray[0]) { - unsigned long val; - char *ep; - - val = strtoul(st->subarray, &ep, 10); - if (*ep != '\0') { - free_imsm(super); - return 1; - } - - if (val < super->anchor->num_raid_devs) - super->current_vol = val; - else { - free_imsm(super); - return 1; - } - } - st->sb = super; if (st->ss == NULL) { st->ss = &super_imsm; st->minor_version = 0; st->max_devs = IMSM_MAX_DEVICES; } - st->loaded_container = 0; - return 0; } @@ -3090,7 +3247,6 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, if (!check_name(super, name, 0)) return 0; - sprintf(st->subarray, "%d", idx); dv = malloc(sizeof(*dv)); if (!dv) { fprintf(stderr, Name ": failed to allocate device list entry\n"); @@ -3269,7 +3425,7 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, /* Check the device has not already been added */ slot = get_imsm_disk_slot(map, dl->index); if (slot >= 0 && - (get_imsm_ord_tbl_ent(dev, slot) & IMSM_ORD_REBUILD) == 0) { + (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) { fprintf(stderr, Name ": %s has been included in this array twice\n", devname); return 1; @@ -3335,6 +3491,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, dd->devname = devname ? strdup(devname) : NULL; dd->fd = fd; dd->e = NULL; + dd->action = DISK_ADD; rv = imsm_read_serial(fd, devname, dd->serial); if (rv) { fprintf(stderr, @@ -3354,8 +3511,8 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, dd->disk.scsi_id = __cpu_to_le32(0); if (st->update_tail) { - dd->next = super->add; - super->add = dd; + dd->next = super->disk_mgmt_list; + super->disk_mgmt_list = dd; } else { dd->next = super->disks; super->disks = dd; @@ -3364,6 +3521,43 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, return 0; } + +static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk) +{ + struct intel_super *super = st->sb; + struct dl *dd; + + /* remove from super works only in mdmon - for communication + * manager - monitor. Check if communication memory buffer + * is prepared. + */ + if (!st->update_tail) { + fprintf(stderr, + Name ": %s shall be used in mdmon context only" + "(line %d).\n", __func__, __LINE__); + return 1; + } + dd = malloc(sizeof(*dd)); + if (!dd) { + fprintf(stderr, + Name ": malloc failed %s:%d.\n", __func__, __LINE__); + return 1; + } + memset(dd, 0, sizeof(*dd)); + dd->major = dk->major; + dd->minor = dk->minor; + dd->index = -1; + dd->fd = -1; + dd->disk.status = SPARE_DISK; + dd->action = DISK_REMOVE; + + dd->next = super->disk_mgmt_list; + super->disk_mgmt_list = dd; + + + return 0; +} + static int store_imsm_mpb(int fd, struct imsm_super *mpb); static union { @@ -3417,8 +3611,9 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose) return 0; } -static int write_super_imsm(struct intel_super *super, int doclose) +static int write_super_imsm(struct supertype *st, int doclose) { + struct intel_super *super = st->sb; struct imsm_super *mpb = super->anchor; struct dl *d; __u32 generation; @@ -3426,6 +3621,7 @@ static int write_super_imsm(struct intel_super *super, int doclose) int spares = 0; int i; __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk); + int num_disks = 0; /* 'generation' is incremented everytime the metadata is written */ generation = __le32_to_cpu(mpb->generation_num); @@ -3438,21 +3634,28 @@ static int write_super_imsm(struct intel_super *super, int doclose) if (mpb->orig_family_num == 0) mpb->orig_family_num = mpb->family_num; - mpb_size += sizeof(struct imsm_disk) * mpb->num_disks; for (d = super->disks; d; d = d->next) { if (d->index == -1) spares++; - else + else { mpb->disk[d->index] = d->disk; + num_disks++; + } } - for (d = super->missing; d; d = d->next) + for (d = super->missing; d; d = d->next) { mpb->disk[d->index] = d->disk; + num_disks++; + } + mpb->num_disks = num_disks; + mpb_size += sizeof(struct imsm_disk) * mpb->num_disks; for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = __get_imsm_dev(mpb, i); - - imsm_copy_dev(dev, get_imsm_dev(super, i)); - mpb_size += sizeof_imsm_dev(dev, 0); + struct imsm_dev *dev2 = get_imsm_dev(super, i); + if (dev && dev2) { + imsm_copy_dev(dev, dev2); + mpb_size += sizeof_imsm_dev(dev, 0); + } } mpb_size += __le32_to_cpu(mpb->bbm_log_size); mpb->mpb_size = __cpu_to_le32(mpb_size); @@ -3506,7 +3709,7 @@ static int create_array(struct supertype *st, int dev_idx) imsm_copy_dev(&u->dev, dev); inf = get_disk_info(u); for (i = 0; i < map->num_members; i++) { - int idx = get_imsm_disk_idx(dev, i); + int idx = get_imsm_disk_idx(dev, i, -1); disk = get_imsm_disk(super, idx); serialcpy(inf[i].serial, disk->serial); @@ -3516,13 +3719,13 @@ static int create_array(struct supertype *st, int dev_idx) return 0; } -static int _add_disk(struct supertype *st) +static int mgmt_disk(struct supertype *st) { struct intel_super *super = st->sb; size_t len; - struct imsm_update_add_disk *u; + struct imsm_update_add_remove_disk *u; - if (!super->add) + if (!super->disk_mgmt_list) return 0; len = sizeof(*u); @@ -3533,7 +3736,7 @@ static int _add_disk(struct supertype *st) return 1; } - u->type = update_add_disk; + u->type = update_add_remove_disk; append_metadata_update(st, u, len); return 0; @@ -3550,29 +3753,23 @@ static int write_init_super_imsm(struct supertype *st) if (st->update_tail) { /* queue the recently created array / added disk * as a metadata update */ - struct dl *d; int rv; /* determine if we are creating a volume or adding a disk */ if (current_vol < 0) { - /* in the add disk case we are running in mdmon - * context, so don't close fd's + /* in the mgmt (add/remove) disk case we are running + * in mdmon context, so don't close fd's */ - return _add_disk(st); + return mgmt_disk(st); } else rv = create_array(st, current_vol); - for (d = super->disks; d ; d = d->next) { - close(d->fd); - d->fd = -1; - } - return rv; } else { struct dl *d; for (d = super->disks; d; d = d->next) Kill(d->devname, NULL, 0, 1, 1); - return write_super_imsm(st->sb, 1); + return write_super_imsm(st, 1); } } #endif @@ -4104,7 +4301,7 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, */ struct intel_super *super; - if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) { + if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) { st->sb = super; st->container_dev = fd2devnum(cfd); close(cfd); @@ -4122,14 +4319,19 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, return 0; } -static int default_chunk_imsm(struct supertype *st) +static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk) { struct intel_super *super = st->sb; - if (!super->orom) - return 0; + if (level && *level == UnSet) + *level = LEVEL_CONTAINER; - return imsm_orom_default_chunk(super->orom); + if (level && layout && *layout == UnSet) + *layout = imsm_level_to_layout(*level); + + if (chunk && (*chunk == UnSet || *chunk == 0) && + super && super->orom) + *chunk = imsm_orom_default_chunk(super->orom); } static void handle_missing(struct intel_super *super, struct imsm_dev *dev); @@ -4208,19 +4410,19 @@ static int kill_subarray_imsm(struct supertype *st) return 0; } -static int update_subarray_imsm(struct supertype *st, char *update, mddev_ident_t ident) +static int update_subarray_imsm(struct supertype *st, char *subarray, + char *update, struct mddev_ident *ident) { /* update the subarray currently referenced by ->current_vol */ struct intel_super *super = st->sb; struct imsm_super *mpb = super->anchor; - if (super->current_vol < 0) - return 2; - if (strcmp(update, "name") == 0) { char *name = ident->name; + char *ep; + int vol; - if (is_subarray_active(st->subarray, st->devname)) { + if (is_subarray_active(subarray, st->devname)) { fprintf(stderr, Name ": Unable to update name of active subarray\n"); return 2; @@ -4229,20 +4431,24 @@ static int update_subarray_imsm(struct supertype *st, char *update, mddev_ident_ if (!check_name(super, name, 0)) return 2; + vol = strtoul(subarray, &ep, 10); + if (*ep != '\0' || vol >= super->anchor->num_raid_devs) + return 2; + if (st->update_tail) { struct imsm_update_rename_array *u = malloc(sizeof(*u)); if (!u) return 2; u->type = update_rename_array; - u->dev_idx = super->current_vol; + u->dev_idx = vol; snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name); append_metadata_update(st, u, sizeof(*u)); } else { struct imsm_dev *dev; int i; - dev = get_imsm_dev(super, super->current_vol); + dev = get_imsm_dev(super, vol); snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name); for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); @@ -4257,6 +4463,17 @@ static int update_subarray_imsm(struct supertype *st, char *update, mddev_ident_ } #endif /* MDASSEMBLE */ +static int is_gen_migration(struct imsm_dev *dev) +{ + if (!dev->vol.migr_state) + return 0; + + if (migr_type(dev) == MIGR_GEN_MIGR) + return 1; + + return 0; +} + static int is_rebuilding(struct imsm_dev *dev) { struct imsm_map *migr_map; @@ -4306,11 +4523,12 @@ static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array) } -static struct mdinfo *container_content_imsm(struct supertype *st) +static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray) { /* Given a container loaded by load_super_imsm_all, * extract information about all the arrays into * an mdinfo tree. + * If 'subarray' is given, just extract info about that array. * * For each imsm_dev create an mdinfo, fill it in, * then look for matching devices in super->disks @@ -4319,27 +4537,34 @@ static struct mdinfo *container_content_imsm(struct supertype *st) struct intel_super *super = st->sb; struct imsm_super *mpb = super->anchor; struct mdinfo *rest = NULL; - int i; + unsigned int i; + int bbm_errors = 0; - /* do not assemble arrays that might have bad blocks */ - if (imsm_bbm_log_size(super->anchor)) { - fprintf(stderr, Name ": BBM log found in metadata. " - "Cannot activate array(s).\n"); - return NULL; - } + /* check for bad blocks */ + if (imsm_bbm_log_size(super->anchor)) + bbm_errors = 1; for (i = 0; i < mpb->num_raid_devs; i++) { - struct imsm_dev *dev = get_imsm_dev(super, i); - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_dev *dev; + struct imsm_map *map; + struct imsm_map *map2; struct mdinfo *this; int slot; + char *ep; + + if (subarray && + (i != strtoul(subarray, &ep, 10) || *ep != '\0')) + continue; + + dev = get_imsm_dev(super, i); + map = get_imsm_map(dev, 0); + map2 = get_imsm_map(dev, 1); /* do not publish arrays that are in the middle of an * unsupported migration */ if (dev->vol.migr_state && - (migr_type(dev) == MIGR_GEN_MIGR || - migr_type(dev) == MIGR_STATE_CHANGE)) { + (migr_type(dev) == MIGR_STATE_CHANGE)) { fprintf(stderr, Name ": cannot assemble volume '%.16s':" " unsupported migration in progress\n", dev->volume); @@ -4356,7 +4581,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st) this->next = rest; super->current_vol = i; - getinfo_super_imsm_volume(st, this); + getinfo_super_imsm_volume(st, this, NULL); for (slot = 0 ; slot < map->num_members; slot++) { unsigned long long recovery_start; struct mdinfo *info_d; @@ -4366,8 +4591,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st) __u32 ord; skip = 0; - idx = get_imsm_disk_idx(dev, slot); - ord = get_imsm_ord_tbl_ent(dev, slot); + idx = get_imsm_disk_idx(dev, slot, 0); + ord = get_imsm_ord_tbl_ent(dev, slot, 0); for (d = super->disks; d ; d = d->next) if (d->index == idx) break; @@ -4415,7 +4640,17 @@ static struct mdinfo *container_content_imsm(struct supertype *st) info_d->disk.minor = d->minor; info_d->disk.raid_disk = slot; info_d->recovery_start = recovery_start; - + if (map2) { + if (slot < map2->num_members) + info_d->disk.state = (1 << MD_DISK_ACTIVE); + else + this->array.spare_disks++; + } else { + if (slot < map->num_members) + info_d->disk.state = (1 << MD_DISK_ACTIVE); + else + this->array.spare_disks++; + } if (info_d->recovery_start == MaxSector) this->array.working_disks++; @@ -4428,6 +4663,10 @@ static struct mdinfo *container_content_imsm(struct supertype *st) rest = this; } + /* if array has bad blocks, set suitable bit in array status */ + if (bbm_errors) + rest->array.state |= (1<num_members; i++) { - __u32 ord = get_imsm_ord_tbl_ent(dev, i); + __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1); int idx = ord_to_idx(ord); struct imsm_disk *disk; @@ -4559,9 +4798,13 @@ static int is_resyncing(struct imsm_dev *dev) migr_type(dev) == MIGR_REPAIR) return 1; + if (migr_type(dev) == MIGR_GEN_MIGR) + return 0; + migr_map = get_imsm_map(dev, 1); - if (migr_map->map_state == IMSM_T_STATE_NORMAL) + if ((migr_map->map_state == IMSM_T_STATE_NORMAL) && + (dev->vol.migr_type != MIGR_GEN_MIGR)) return 1; else return 0; @@ -4622,22 +4865,150 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev) super->updates_pending++; } -/* Handle dirty -> clean transititions and resync. Degraded and rebuild - * states are handled in imsm_set_disk() with one exception, when a - * resync is stopped due to a new failure this routine will set the - * 'degraded' state for the array. - */ -static int imsm_set_array_state(struct active_array *a, int consistent) +static void imsm_set_disk(struct active_array *a, int n, int state); + +static void imsm_progress_container_reshape(struct intel_super *super) { - int inst = a->info.container_member; - struct intel_super *super = a->container->sb; - struct imsm_dev *dev = get_imsm_dev(super, inst); - struct imsm_map *map = get_imsm_map(dev, 0); - int failed = imsm_count_failed(super, dev); - __u8 map_state = imsm_check_degraded(super, dev, failed); - __u32 blocks_per_unit; + /* if no device has a migr_state, but some device has a + * different number of members than the previous device, start + * changing the number of devices in this device to match + * previous. + */ + struct imsm_super *mpb = super->anchor; + int prev_disks = -1; + int i; - /* before we activate this array handle any missing disks */ + for (i = 0; i < mpb->num_raid_devs; i++) { + struct imsm_dev *dev = get_imsm_dev(super, i); + struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map2; + int prev_num_members; + int used_disks; + + if (dev->vol.migr_state) + return; + + if (prev_disks == -1) + prev_disks = map->num_members; + if (prev_disks == map->num_members) + continue; + + /* OK, this array needs to enter reshape mode. + * i.e it needs a migr_state + */ + + prev_num_members = map->num_members; + map->num_members = prev_disks; + dev->vol.migr_state = 1; + dev->vol.curr_migr_unit = 0; + dev->vol.migr_type = MIGR_GEN_MIGR; + for (i = prev_num_members; + i < map->num_members; i++) + set_imsm_ord_tbl_ent(map, i, i); + map2 = get_imsm_map(dev, 1); + /* Copy the current map */ + memcpy(map2, map, sizeof_imsm_map(map)); + map2->num_members = prev_num_members; + + /* calculate new size + */ + used_disks = imsm_num_data_members(dev, 0); + if (used_disks) { + unsigned long long array_blocks; + + array_blocks = + map->blocks_per_member + * used_disks; + /* round array size down to closest MB + */ + array_blocks = (array_blocks + >> SECT_PER_MB_SHIFT) + << SECT_PER_MB_SHIFT; + dev->size_low = + __cpu_to_le32((__u32)array_blocks); + dev->size_high = + __cpu_to_le32( + (__u32)(array_blocks >> 32)); + } + super->updates_pending++; + } +} + +/* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild + * states are handled in imsm_set_disk() with one exception, when a + * resync is stopped due to a new failure this routine will set the + * 'degraded' state for the array. + */ +static int imsm_set_array_state(struct active_array *a, int consistent) +{ + int inst = a->info.container_member; + struct intel_super *super = a->container->sb; + struct imsm_dev *dev = get_imsm_dev(super, inst); + struct imsm_map *map = get_imsm_map(dev, 0); + int failed = imsm_count_failed(super, dev); + __u8 map_state = imsm_check_degraded(super, dev, failed); + __u32 blocks_per_unit; + + if (dev->vol.migr_state && + dev->vol.migr_type == MIGR_GEN_MIGR) { + /* array state change is blocked due to reshape action + * We might need to + * - abort the reshape (if last_checkpoint is 0 and action!= reshape) + * - finish the reshape (if last_checkpoint is big and action != reshape) + * - update curr_migr_unit + */ + if (a->curr_action == reshape) { + /* still reshaping, maybe update curr_migr_unit */ + long long blocks_per_unit = blocks_per_migr_unit(dev); + long long unit = a->last_checkpoint; + if (blocks_per_unit) { + unit /= blocks_per_unit; + if (unit > + __le32_to_cpu(dev->vol.curr_migr_unit)) { + dev->vol.curr_migr_unit = + __cpu_to_le32(unit); + super->updates_pending++; + } + } + return 0; + } else { + if (a->last_checkpoint == 0 && a->prev_action == reshape) { + /* for some reason we aborted the reshape. + * Better clean up + */ + struct imsm_map *map2 = get_imsm_map(dev, 1); + dev->vol.migr_state = 0; + dev->vol.migr_type = 0; + dev->vol.curr_migr_unit = 0; + memcpy(map, map2, sizeof_imsm_map(map2)); + super->updates_pending++; + } + if (a->last_checkpoint >= a->info.component_size) { + unsigned long long array_blocks; + int used_disks; + /* it seems the reshape is all done */ + dev->vol.migr_state = 0; + dev->vol.migr_type = 0; + dev->vol.curr_migr_unit = 0; + + used_disks = imsm_num_data_members(dev, -1); + array_blocks = map->blocks_per_member * used_disks; + /* round array size down to closest MB */ + array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) + << SECT_PER_MB_SHIFT; + dev->size_low = __cpu_to_le32((__u32) array_blocks); + dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32)); + a->info.custom_array_size = array_blocks; + a->check_reshape = 1; /* encourage manager to update + * array size + */ + super->updates_pending++; + imsm_progress_container_reshape(super); + } + } + } + + /* before we activate this array handle any missing disks */ if (consistent == 2) handle_missing(super, dev); @@ -4697,6 +5068,16 @@ static int imsm_set_array_state(struct active_array *a, int consistent) dev->vol.dirty = 1; super->updates_pending++; } + + /* finalize online capacity expansion/reshape */ + if ((a->curr_action != reshape) && + (a->prev_action == reshape)) { + struct mdinfo *mdi; + + for (mdi = a->info.devs; mdi; mdi = mdi->next) + imsm_set_disk(a, mdi->disk.raid_disk, mdi->curr_state); + } + return consistent; } @@ -4720,7 +5101,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state) dprintf("imsm: set_disk %d:%x\n", n, state); - ord = get_imsm_ord_tbl_ent(dev, n); + ord = get_imsm_ord_tbl_ent(dev, n, -1); disk = get_imsm_disk(super, ord_to_idx(ord)); /* check for new failures */ @@ -4760,6 +5141,23 @@ static void imsm_set_disk(struct active_array *a, int n, int state) end_migration(dev, map_state); super->updates_pending++; a->last_checkpoint = 0; + } else if (is_gen_migration(dev)) { + dprintf("imsm: Detected General Migration in state: "); + if (map_state == IMSM_T_STATE_NORMAL) { + end_migration(dev, map_state); + map = get_imsm_map(dev, 0); + map->failed_disk_num = ~0; + dprintf("normal\n"); + } else { + if (map_state == IMSM_T_STATE_DEGRADED) { + printf("degraded\n"); + end_migration(dev, map_state); + } else { + dprintf("failed\n"); + } + map->map_state = map_state; + } + super->updates_pending++; } } @@ -4799,10 +5197,11 @@ static void imsm_sync_metadata(struct supertype *container) { struct intel_super *super = container->sb; + dprintf("sync metadata: %d\n", super->updates_pending); if (!super->updates_pending) return; - write_super_imsm(super, 0); + write_super_imsm(container, 0); super->updates_pending = 0; } @@ -4810,7 +5209,7 @@ static void imsm_sync_metadata(struct supertype *container) static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a) { struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); - int i = get_imsm_disk_idx(dev, idx); + int i = get_imsm_disk_idx(dev, idx, -1); struct dl *dl; for (dl = super->disks; dl; dl = dl->next) @@ -4827,10 +5226,11 @@ static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_a } static struct dl *imsm_add_spare(struct intel_super *super, int slot, - struct active_array *a, int activate_new) + struct active_array *a, int activate_new, + struct mdinfo *additional_test_list) { struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); - int idx = get_imsm_disk_idx(dev, slot); + int idx = get_imsm_disk_idx(dev, slot, -1); struct imsm_super *mpb = super->anchor; struct imsm_map *map; unsigned long long pos; @@ -4841,6 +5241,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, __u32 array_start = 0; __u32 array_end = 0; struct dl *dl; + struct mdinfo *test_list; for (dl = super->disks; dl; dl = dl->next) { /* If in this array, skip */ @@ -4848,11 +5249,24 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, if (d->state_fd >= 0 && d->disk.major == dl->major && d->disk.minor == dl->minor) { - dprintf("%x:%x already in array\n", dl->major, dl->minor); + dprintf("%x:%x already in array\n", + dl->major, dl->minor); break; } if (d) continue; + test_list = additional_test_list; + while (test_list) { + if (test_list->disk.major == dl->major && + test_list->disk.minor == dl->minor) { + dprintf("%x:%x already in additional test list\n", + dl->major, dl->minor); + break; + } + test_list = test_list->next; + } + if (test_list) + continue; /* skip in use or failed drives */ if (is_failed(&dl->disk) || idx == dl->index || @@ -4922,6 +5336,45 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, return dl; } + +static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed) +{ + struct imsm_dev *dev2; + struct imsm_map *map; + struct dl *idisk; + int slot; + int idx; + __u8 state; + + dev2 = get_imsm_dev(cont->sb, dev_idx); + if (dev2) { + state = imsm_check_degraded(cont->sb, dev2, failed); + if (state == IMSM_T_STATE_FAILED) { + map = get_imsm_map(dev2, 0); + if (!map) + return 1; + for (slot = 0; slot < map->num_members; slot++) { + /* + * Check if failed disks are deleted from intel + * disk list or are marked to be deleted + */ + idx = get_imsm_disk_idx(dev2, slot, -1); + idisk = get_imsm_dl_disk(cont->sb, idx); + /* + * Do not rebuild the array if failed disks + * from failed sub-array are not removed from + * container. + */ + if (idisk && + is_failed(&idisk->disk) && + (idisk->action != DISK_REMOVE)) + return 0; + } + } + } + return 1; +} + static struct mdinfo *imsm_activate_spare(struct active_array *a, struct metadata_update **updates) { @@ -4949,6 +5402,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, struct imsm_update_activate_spare *u; int num_spares = 0; int i; + int allowed; for (d = a->info.devs ; d ; d = d->next) { if ((d->curr_state & DS_FAULTY) && @@ -4961,9 +5415,41 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n", inst, failed, a->info.array.raid_disks, a->info.array.level); + + if (dev->vol.migr_state && + dev->vol.migr_type == MIGR_GEN_MIGR) + /* No repair during migration */ + return NULL; + + if (a->info.array.level == 4) + /* No repair for takeovered array + * imsm doesn't support raid4 + */ + return NULL; + if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED) return NULL; + /* + * If there are any failed disks check state of the other volume. + * Block rebuild if the another one is failed until failed disks + * are removed from container. + */ + if (failed) { + dprintf("found failed disks in %s, check if there another" + "failed sub-array.\n", + dev->volume); + /* check if states of the other volumes allow for rebuild */ + for (i = 0; i < super->anchor->num_raid_devs; i++) { + if (i != inst) { + allowed = imsm_rebuild_allowed(a->container, + i, failed); + if (!allowed) + return NULL; + } + } + } + /* For each slot, if it is not working, find a spare */ for (i = 0; i < a->info.array.raid_disks; i++) { for (d = a->info.devs ; d ; d = d->next) @@ -4982,9 +5468,9 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, */ dl = imsm_readd(super, i, a); if (!dl) - dl = imsm_add_spare(super, i, a, 0); + dl = imsm_add_spare(super, i, a, 0, NULL); if (!dl) - dl = imsm_add_spare(super, i, a, 1); + dl = imsm_add_spare(super, i, a, 1, NULL); if (!dl) continue; @@ -5051,6 +5537,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, } mu->space = NULL; + mu->space_list = NULL; mu->len = sizeof(struct imsm_update_activate_spare) * num_spares; mu->next = *updates; u = (struct imsm_update_activate_spare *) mu->buf; @@ -5081,7 +5568,7 @@ static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_ int j; for (i = 0; i < map->num_members; i++) { - disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); + disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1)); for (j = 0; j < new_map->num_members; j++) if (serialcmp(disk->serial, inf[j].serial) == 0) return 1; @@ -5090,18 +5577,267 @@ static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_ return 0; } + +static struct dl *get_disk_super(struct intel_super *super, int major, int minor) +{ + struct dl *dl = NULL; + for (dl = super->disks; dl; dl = dl->next) + if ((dl->major == major) && (dl->minor == minor)) + return dl; + return NULL; +} + +static int remove_disk_super(struct intel_super *super, int major, int minor) +{ + struct dl *prev = NULL; + struct dl *dl; + + prev = NULL; + for (dl = super->disks; dl; dl = dl->next) { + if ((dl->major == major) && (dl->minor == minor)) { + /* remove */ + if (prev) + prev->next = dl->next; + else + super->disks = dl->next; + dl->next = NULL; + __free_imsm_disk(dl); + dprintf("%s: removed %x:%x\n", + __func__, major, minor); + break; + } + prev = dl; + } + return 0; +} + static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index); +static int add_remove_disk_update(struct intel_super *super) +{ + int check_degraded = 0; + struct dl *disk = NULL; + /* add/remove some spares to/from the metadata/contrainer */ + while (super->disk_mgmt_list) { + struct dl *disk_cfg; + + disk_cfg = super->disk_mgmt_list; + super->disk_mgmt_list = disk_cfg->next; + disk_cfg->next = NULL; + + if (disk_cfg->action == DISK_ADD) { + disk_cfg->next = super->disks; + super->disks = disk_cfg; + check_degraded = 1; + dprintf("%s: added %x:%x\n", + __func__, disk_cfg->major, + disk_cfg->minor); + } else if (disk_cfg->action == DISK_REMOVE) { + dprintf("Disk remove action processed: %x.%x\n", + disk_cfg->major, disk_cfg->minor); + disk = get_disk_super(super, + disk_cfg->major, + disk_cfg->minor); + if (disk) { + /* store action status */ + disk->action = DISK_REMOVE; + /* remove spare disks only */ + if (disk->index == -1) { + remove_disk_super(super, + disk_cfg->major, + disk_cfg->minor); + } + } + /* release allocate disk structure */ + __free_imsm_disk(disk_cfg); + } + } + return check_degraded; +} + +static int apply_reshape_container_disks_update(struct imsm_update_reshape *u, + struct intel_super *super, + void ***space_list) +{ + struct dl *new_disk; + struct intel_dev *id; + int i; + int delta_disks = u->new_raid_disks - u->old_raid_disks; + int disk_count = u->old_raid_disks; + void **tofree = NULL; + int devices_to_reshape = 1; + struct imsm_super *mpb = super->anchor; + int ret_val = 0; + + dprintf("imsm: imsm_process_update() for update_reshape\n"); + + /* enable spares to use in array */ + for (i = 0; i < delta_disks; i++) { + new_disk = get_disk_super(super, + major(u->new_disks[i]), + minor(u->new_disks[i])); + dprintf("imsm: imsm_process_update(): new disk " + "for reshape is: %i:%i (%p, index = %i)\n", + major(u->new_disks[i]), minor(u->new_disks[i]), + new_disk, new_disk->index); + if ((new_disk == NULL) || + ((new_disk->index >= 0) && + (new_disk->index < u->old_raid_disks))) + goto update_reshape_exit; + new_disk->index = disk_count++; + /* slot to fill in autolayout + */ + new_disk->raiddisk = new_disk->index; + new_disk->disk.status |= + CONFIGURED_DISK; + new_disk->disk.status &= ~SPARE_DISK; + } + + dprintf("imsm: process_update(): update_reshape: volume set" + " mpb->num_raid_devs = %i\n", mpb->num_raid_devs); + /* manage changes in volume + */ + for (id = super->devlist ; id; id = id->next) { + void **sp = *space_list; + struct imsm_dev *newdev; + struct imsm_map *newmap, *oldmap; + + if (!sp) + continue; + *space_list = *sp; + newdev = (void*)sp; + /* Copy the dev, but not (all of) the map */ + memcpy(newdev, id->dev, sizeof(*newdev)); + oldmap = get_imsm_map(id->dev, 0); + newmap = get_imsm_map(newdev, 0); + /* Copy the current map */ + memcpy(newmap, oldmap, sizeof_imsm_map(oldmap)); + /* update one device only + */ + if (devices_to_reshape) { + int used_disks; + + dprintf("process_update(): modifying " + "subdev: %i\n", id->index); + devices_to_reshape--; + newdev->vol.migr_state = 1; + newdev->vol.curr_migr_unit = 0; + newdev->vol.migr_type = MIGR_GEN_MIGR; + newmap->num_members = u->new_raid_disks; + for (i = 0; i < delta_disks; i++) { + set_imsm_ord_tbl_ent(newmap, + u->old_raid_disks + i, + u->old_raid_disks + i); + } + /* New map is correct, now need to save old map + */ + newmap = get_imsm_map(newdev, 1); + memcpy(newmap, oldmap, sizeof_imsm_map(oldmap)); + + /* calculate new size + */ + used_disks = imsm_num_data_members(newdev, 0); + if (used_disks) { + unsigned long long array_blocks; + + array_blocks = + newmap->blocks_per_member * used_disks; + /* round array size down to closest MB + */ + array_blocks = (array_blocks + >> SECT_PER_MB_SHIFT) + << SECT_PER_MB_SHIFT; + newdev->size_low = + __cpu_to_le32((__u32)array_blocks); + newdev->size_high = + __cpu_to_le32((__u32)(array_blocks >> 32)); + } + } + + sp = (void **)id->dev; + id->dev = newdev; + *sp = tofree; + tofree = sp; + } + if (tofree) + *space_list = tofree; + ret_val = 1; + +update_reshape_exit: + + return ret_val; +} + +static int apply_takeover_update(struct imsm_update_takeover *u, + struct intel_super *super) +{ + struct imsm_dev *dev = NULL; + struct imsm_map *map; + struct dl *dm, *du; + struct intel_dev *dv; + + for (dv = super->devlist; dv; dv = dv->next) + if (dv->index == (unsigned int)u->subarray) { + dev = dv->dev; + break; + } + + if (dev == NULL) + return 0; + + map = get_imsm_map(dev, 0); + + if (u->direction == R10_TO_R0) { + /* iterate through devices to mark removed disks as spare */ + for (dm = super->disks; dm; dm = dm->next) { + if (dm->disk.status & FAILED_DISK) { + int idx = dm->index; + /* update indexes on the disk list */ +/* FIXME this loop-with-the-loop looks wrong, I'm not convinced + the index values will end up being correct.... NB */ + for (du = super->disks; du; du = du->next) + if (du->index > idx) + du->index--; + /* mark as spare disk */ + dm->disk.status = SPARE_DISK; + dm->index = -1; + } + } + + /* update map */ + map->num_members = map->num_members / 2; + map->map_state = IMSM_T_STATE_NORMAL; + map->num_domains = 1; + map->raid_level = 0; + map->failed_disk_num = -1; + } + + /* update disk order table */ + for (du = super->disks; du; du = du->next) + if (du->index >= 0) + set_imsm_ord_tbl_ent(map, du->index, du->index); + + return 1; +} + static void imsm_process_update(struct supertype *st, struct metadata_update *update) { /** * crack open the metadata_update envelope to find the update record * update can be one of: - * update_activate_spare - a spare device has replaced a failed + * update_reshape_container_disks - all the arrays in the container + * are being reshaped to have more devices. We need to mark + * the arrays for general migration and convert selected spares + * into active devices. + * update_activate_spare - a spare device has replaced a failed * device in an array, update the disk_ord_tbl. If this disk is * present in all member arrays then also clear the SPARE_DISK * flag + * update_create_array + * update_kill_array + * update_rename_array + * update_add_remove_disk */ struct intel_super *super = st->sb; struct imsm_super *mpb; @@ -5126,6 +5862,20 @@ static void imsm_process_update(struct supertype *st, mpb = super->anchor; switch (type) { + case update_takeover: { + struct imsm_update_takeover *u = (void *)update->buf; + if (apply_takeover_update(u, super)) + super->updates_pending++; + break; + } + + case update_reshape_container_disks: { + struct imsm_update_reshape *u = (void *)update->buf; + if (apply_reshape_container_disks_update( + u, super, &update->space_list)) + super->updates_pending++; + break; + } case update_activate_spare: { struct imsm_update_activate_spare *u = (void *) update->buf; struct imsm_dev *dev = get_imsm_dev(super, u->array); @@ -5137,7 +5887,7 @@ static void imsm_process_update(struct supertype *st, struct dl *dl; unsigned int found; int failed; - int victim = get_imsm_disk_idx(dev, u->slot); + int victim = get_imsm_disk_idx(dev, u->slot, -1); int i; for (dl = super->disks; dl; dl = dl->next) @@ -5160,7 +5910,8 @@ static void imsm_process_update(struct supertype *st, for (i = 0; i < map->num_members; i++) { if (i == u->slot) continue; - disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); + disk = get_imsm_disk(super, + get_imsm_disk_idx(dev, i, -1)); if (!disk || is_failed(disk)) failed++; } @@ -5401,31 +6152,24 @@ static void imsm_process_update(struct supertype *st, super->updates_pending++; break; } - case update_add_disk: - + case update_add_remove_disk: { /* we may be able to repair some arrays if disks are - * being added */ - if (super->add) { + * being added, check teh status of add_remove_disk + * if discs has been added. + */ + if (add_remove_disk_update(super)) { struct active_array *a; super->updates_pending++; - for (a = st->arrays; a; a = a->next) + for (a = st->arrays; a; a = a->next) a->check_degraded = 1; } - /* add some spares to the metadata */ - while (super->add) { - struct dl *al; - - al = super->add; - super->add = al->next; - al->next = super->disks; - super->disks = al; - dprintf("%s: added %x:%x\n", - __func__, al->major, al->minor); - } - break; } + default: + fprintf(stderr, "error: unsuported process update type:" + "(type: %d)\n", type); + } } static void imsm_prepare_update(struct supertype *st, @@ -5445,6 +6189,39 @@ static void imsm_prepare_update(struct supertype *st, size_t len = 0; switch (type) { + case update_reshape_container_disks: { + /* Every raid device in the container is about to + * gain some more devices, and we will enter a + * reconfiguration. + * So each 'imsm_map' will be bigger, and the imsm_vol + * will now hold 2 of them. + * Thus we need new 'struct imsm_dev' allocations sized + * as sizeof_imsm_dev but with more devices in both maps. + */ + struct imsm_update_reshape *u = (void *)update->buf; + struct intel_dev *dl; + void **space_tail = (void**)&update->space_list; + + dprintf("imsm: imsm_prepare_update() for update_reshape\n"); + + for (dl = super->devlist; dl; dl = dl->next) { + int size = sizeof_imsm_dev(dl->dev, 1); + void *s; + if (u->new_raid_disks > u->old_raid_disks) + size += sizeof(__u32)*2* + (u->new_raid_disks - u->old_raid_disks); + s = malloc(size); + if (!s) + break; + *space_tail = s; + space_tail = s; + *space_tail = NULL; + } + + len = disks_to_mpb_size(u->new_raid_disks); + dprintf("New anchor length is %llu\n", (unsigned long long)len); + break; + } case update_create_array: { struct imsm_update_create_array *u = (void *) update->buf; struct intel_dev *dv; @@ -5540,7 +6317,7 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind /* update ord entries being careful not to propagate * ord-flags to the first map */ - ord = get_imsm_ord_tbl_ent(dev, j); + ord = get_imsm_ord_tbl_ent(dev, j, -1); if (ord_to_idx(ord) <= index) continue; @@ -5564,6 +6341,524 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind } #endif /* MDASSEMBLE */ +static char disk_by_path[] = "/dev/disk/by-path/"; + +static const char *imsm_get_disk_controller_domain(const char *path) +{ + struct sys_dev *list, *hba = NULL; + char disk_path[PATH_MAX]; + int ahci = 0; + char *dpath = NULL; + + list = find_driver_devices("pci", "ahci"); + for (hba = list; hba; hba = hba->next) + if (devpath_to_vendor(hba->path) == 0x8086) + break; + + if (hba) { + struct stat st; + + strncpy(disk_path, disk_by_path, PATH_MAX - 1); + strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1); + if (stat(disk_path, &st) == 0) { + dpath = devt_to_devpath(st.st_rdev); + if (dpath) + ahci = path_attached_to_hba(dpath, hba->path); + } + } + dprintf("path: %s(%s) hba: %s attached: %d\n", + path, dpath, (hba) ? hba->path : "NULL", ahci); + free_sys_dev(&list); + if (ahci) + return "ahci"; + else + return NULL; +} + +static int imsm_find_array_minor_by_subdev(int subdev, int container, int *minor) +{ + char subdev_name[20]; + struct mdstat_ent *mdstat; + + sprintf(subdev_name, "%d", subdev); + mdstat = mdstat_by_subdev(subdev_name, container); + if (!mdstat) + return -1; + + *minor = mdstat->devnum; + free_mdstat(mdstat); + return 0; +} + +static int imsm_reshape_is_allowed_on_container(struct supertype *st, + struct geo_params *geo, + int *old_raid_disks) +{ + /* currently we only support increasing the number of devices + * for a container. This increases the number of device for each + * member array. They must all be RAID0 or RAID5. + */ + int ret_val = 0; + struct mdinfo *info, *member; + int devices_that_can_grow = 0; + + dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): " + "st->devnum = (%i)\n", + st->devnum); + + if (geo->size != -1 || + geo->level != UnSet || + geo->layout != UnSet || + geo->chunksize != 0 || + geo->raid_disks == UnSet) { + dprintf("imsm: Container operation is allowed for " + "raid disks number change only.\n"); + return ret_val; + } + + info = container_content_imsm(st, NULL); + for (member = info; member; member = member->next) { + int result; + int minor; + + dprintf("imsm: checking device_num: %i\n", + member->container_member); + + if (geo->raid_disks < member->array.raid_disks) { + /* we work on container for Online Capacity Expansion + * only so raid_disks has to grow + */ + dprintf("imsm: for container operation raid disks " + "increase is required\n"); + break; + } + + if ((info->array.level != 0) && + (info->array.level != 5)) { + /* we cannot use this container with other raid level + */ + dprintf("imsm: for container operation wrong" + " raid level (%i) detected\n", + info->array.level); + break; + } else { + /* check for platform support + * for this raid level configuration + */ + struct intel_super *super = st->sb; + if (!is_raid_level_supported(super->orom, + member->array.level, + geo->raid_disks)) { + dprintf("platform does not support raid%d with" + " %d disk%s\n", + info->array.level, + geo->raid_disks, + geo->raid_disks > 1 ? "s" : ""); + break; + } + } + + if (*old_raid_disks && + info->array.raid_disks != *old_raid_disks) + break; + *old_raid_disks = info->array.raid_disks; + + /* All raid5 and raid0 volumes in container + * have to be ready for Online Capacity Expansion + * so they need to be assembled. We have already + * checked that no recovery etc is happening. + */ + result = imsm_find_array_minor_by_subdev(member->container_member, + st->container_dev, + &minor); + if (result < 0) { + dprintf("imsm: cannot find array\n"); + break; + } + devices_that_can_grow++; + } + sysfs_free(info); + if (!member && devices_that_can_grow) + ret_val = 1; + + if (ret_val) + dprintf("\tContainer operation allowed\n"); + else + dprintf("\tError: %i\n", ret_val); + + return ret_val; +} + +/* Function: get_spares_for_grow + * Description: Allocates memory and creates list of spare devices + * avaliable in container. Checks if spare drive size is acceptable. + * Parameters: Pointer to the supertype structure + * Returns: Pointer to the list of spare devices (mdinfo structure) on success, + * NULL if fail + */ +static struct mdinfo *get_spares_for_grow(struct supertype *st) +{ + unsigned long long min_size = min_acceptable_spare_size_imsm(st); + return container_choose_spares(st, min_size, NULL, NULL, NULL, 0); +} + +/****************************************************************************** + * function: imsm_create_metadata_update_for_reshape + * Function creates update for whole IMSM container. + * + ******************************************************************************/ +static int imsm_create_metadata_update_for_reshape( + struct supertype *st, + struct geo_params *geo, + int old_raid_disks, + struct imsm_update_reshape **updatep) +{ + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->anchor; + int update_memory_size = 0; + struct imsm_update_reshape *u = NULL; + struct mdinfo *spares = NULL; + int i; + int delta_disks = 0; + struct mdinfo *dev; + + dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n", + geo->raid_disks); + + delta_disks = geo->raid_disks - old_raid_disks; + + /* size of all update data without anchor */ + update_memory_size = sizeof(struct imsm_update_reshape); + + /* now add space for spare disks that we need to add. */ + update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1); + + u = calloc(1, update_memory_size); + if (u == NULL) { + dprintf("error: " + "cannot get memory for imsm_update_reshape update\n"); + return 0; + } + u->type = update_reshape_container_disks; + u->old_raid_disks = old_raid_disks; + u->new_raid_disks = geo->raid_disks; + + /* now get spare disks list + */ + spares = get_spares_for_grow(st); + + if (spares == NULL + || delta_disks > spares->array.spare_disks) { + dprintf("imsm: ERROR: Cannot get spare devices.\n"); + goto abort; + } + + /* we have got spares + * update disk list in imsm_disk list table in anchor + */ + dprintf("imsm: %i spares are available.\n\n", + spares->array.spare_disks); + + dev = spares->devs; + for (i = 0; i < delta_disks; i++) { + struct dl *dl; + + if (dev == NULL) + break; + u->new_disks[i] = makedev(dev->disk.major, + dev->disk.minor); + dl = get_disk_super(super, dev->disk.major, dev->disk.minor); + dl->index = mpb->num_disks; + mpb->num_disks++; + dev = dev->next; + } + +abort: + /* free spares + */ + sysfs_free(spares); + + dprintf("imsm: reshape update preparation :"); + if (i == delta_disks) { + dprintf(" OK\n"); + *updatep = u; + return update_memory_size; + } + free(u); + dprintf(" Error\n"); + + return 0; +} + +static void imsm_update_metadata_locally(struct supertype *st, + void *buf, int len) +{ + struct metadata_update mu; + + mu.buf = buf; + mu.len = len; + mu.space = NULL; + mu.space_list = NULL; + mu.next = NULL; + imsm_prepare_update(st, &mu); + imsm_process_update(st, &mu); + + while (mu.space_list) { + void **space = mu.space_list; + mu.space_list = *space; + free(space); + } +} + +/*************************************************************************** +* Function: imsm_analyze_change +* Description: Function analyze change for single volume +* and validate if transition is supported +* Parameters: Geometry parameters, supertype structure +* Returns: Operation type code on success, -1 if fail +****************************************************************************/ +enum imsm_reshape_type imsm_analyze_change(struct supertype *st, + struct geo_params *geo) +{ + struct mdinfo info; + int change = -1; + int check_devs = 0; + + getinfo_super_imsm_volume(st, &info, NULL); + + if ((geo->level != info.array.level) && + (geo->level >= 0) && + (geo->level != UnSet)) { + switch (info.array.level) { + case 0: + if (geo->level == 5) { + change = CH_LEVEL_MIGRATION; + check_devs = 1; + } + if (geo->level == 10) { + change = CH_TAKEOVER; + check_devs = 1; + } + break; + case 5: + if (geo->level != 0) + change = CH_LEVEL_MIGRATION; + break; + case 10: + if (geo->level == 0) { + change = CH_TAKEOVER; + check_devs = 1; + } + break; + } + if (change == -1) { + fprintf(stderr, + Name " Error. Level Migration from %d to %d " + "not supported!\n", + info.array.level, geo->level); + goto analyse_change_exit; + } + } else + geo->level = info.array.level; + + if ((geo->layout != info.array.layout) + && ((geo->layout != UnSet) && (geo->layout != -1))) { + change = CH_LEVEL_MIGRATION; + if ((info.array.layout == 0) + && (info.array.level == 5) + && (geo->layout == 5)) { + /* reshape 5 -> 4 */ + } else if ((info.array.layout == 5) + && (info.array.level == 5) + && (geo->layout == 0)) { + /* reshape 4 -> 5 */ + geo->layout = 0; + geo->level = 5; + } else { + fprintf(stderr, + Name " Error. Layout Migration from %d to %d " + "not supported!\n", + info.array.layout, geo->layout); + change = -1; + goto analyse_change_exit; + } + } else + geo->layout = info.array.layout; + + if ((geo->chunksize > 0) && (geo->chunksize != UnSet) + && (geo->chunksize != info.array.chunk_size)) + change = CH_CHUNK_MIGR; + else + geo->chunksize = info.array.chunk_size; + + if (!validate_geometry_imsm(st, + geo->level, + geo->layout, + geo->raid_disks, + (geo->chunksize / 1024), + geo->size, + 0, 0, 1)) + change = -1; + + if (check_devs) { + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->anchor; + + if (mpb->num_raid_devs > 1) { + fprintf(stderr, + Name " Error. Cannot perform operation on %s" + "- for this operation it MUST be single " + "array in container\n", + geo->dev_name); + change = -1; + } + } + +analyse_change_exit: + + return change; +} + +int imsm_takeover(struct supertype *st, struct geo_params *geo) +{ + struct intel_super *super = st->sb; + struct imsm_update_takeover *u; + + u = malloc(sizeof(struct imsm_update_takeover)); + if (u == NULL) + return 1; + + u->type = update_takeover; + u->subarray = super->current_vol; + + /* 10->0 transition */ + if (geo->level == 0) + u->direction = R10_TO_R0; + + /* update metadata locally */ + imsm_update_metadata_locally(st, u, + sizeof(struct imsm_update_takeover)); + /* and possibly remotely */ + if (st->update_tail) + append_metadata_update(st, u, + sizeof(struct imsm_update_takeover)); + else + free(u); + + return 0; +} + +static int imsm_reshape_super(struct supertype *st, long long size, int level, + int layout, int chunksize, int raid_disks, + char *backup, char *dev, int verbose) +{ + int ret_val = 1; + struct geo_params geo; + + dprintf("imsm: reshape_super called.\n"); + + memset(&geo, sizeof(struct geo_params), 0); + + geo.dev_name = dev; + geo.dev_id = st->devnum; + geo.size = size; + geo.level = level; + geo.layout = layout; + geo.chunksize = chunksize; + geo.raid_disks = raid_disks; + + dprintf("\tfor level : %i\n", geo.level); + dprintf("\tfor raid_disks : %i\n", geo.raid_disks); + + if (experimental() == 0) + return ret_val; + + if (st->container_dev == st->devnum) { + /* On container level we can only increase number of devices. */ + dprintf("imsm: info: Container operation\n"); + int old_raid_disks = 0; + if (imsm_reshape_is_allowed_on_container( + st, &geo, &old_raid_disks)) { + struct imsm_update_reshape *u = NULL; + int len; + + len = imsm_create_metadata_update_for_reshape( + st, &geo, old_raid_disks, &u); + + if (len <= 0) { + dprintf("imsm: Cannot prepare update\n"); + goto exit_imsm_reshape_super; + } + + ret_val = 0; + /* update metadata locally */ + imsm_update_metadata_locally(st, u, len); + /* and possibly remotely */ + if (st->update_tail) + append_metadata_update(st, u, len); + else + free(u); + + } else { + fprintf(stderr, Name "imsm: Operation is not allowed " + "on this container\n"); + } + } else { + /* On volume level we support following operations + * - takeover: raid10 -> raid0; raid0 -> raid10 + * - chunk size migration + * - migration: raid5 -> raid0; raid0 -> raid5 + */ + struct intel_super *super = st->sb; + struct intel_dev *dev = super->devlist; + int change, devnum; + dprintf("imsm: info: Volume operation\n"); + /* find requested device */ + while (dev) { + imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum); + if (devnum == geo.dev_id) + break; + dev = dev->next; + } + if (dev == NULL) { + fprintf(stderr, Name " Cannot find %s (%i) subarray\n", + geo.dev_name, geo.dev_id); + goto exit_imsm_reshape_super; + } + super->current_vol = dev->index; + change = imsm_analyze_change(st, &geo); + switch (change) { + case CH_TAKEOVER: + ret_val = imsm_takeover(st, &geo); + break; + case CH_CHUNK_MIGR: + ret_val = 0; + break; + case CH_LEVEL_MIGRATION: + ret_val = 0; + break; + default: + ret_val = 1; + } + } + +exit_imsm_reshape_super: + dprintf("imsm: reshape_super Exit code = %i\n", ret_val); + return ret_val; +} + +static int imsm_manage_reshape( + int afd, struct mdinfo *sra, struct reshape *reshape, + struct supertype *st, unsigned long stripes, + int *fds, unsigned long long *offsets, + int dests, int *destfd, unsigned long long *destoffsets) +{ + /* Just use child_monitor for now */ + return child_monitor( + afd, sra, reshape, st, stripes, + fds, offsets, dests, destfd, destoffsets); +} + struct superswitch super_imsm = { #ifndef MDASSEMBLE .examine_super = examine_super_imsm, @@ -5574,18 +6869,21 @@ struct superswitch super_imsm = { .brief_detail_super = brief_detail_super_imsm, .write_init_super = write_init_super_imsm, .validate_geometry = validate_geometry_imsm, - .default_chunk = default_chunk_imsm, .add_to_super = add_to_super_imsm, + .remove_from_super = remove_from_super_imsm, .detail_platform = detail_platform_imsm, .kill_subarray = kill_subarray_imsm, .update_subarray = update_subarray_imsm, + .load_container = load_container_imsm, #endif .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm, .getinfo_super = getinfo_super_imsm, + .getinfo_super_disks = getinfo_super_disks_imsm, .update_super = update_super_imsm, .avail_size = avail_size_imsm, + .min_acceptable_spare_size = min_acceptable_spare_size_imsm, .compare_super = compare_super_imsm, @@ -5595,7 +6893,10 @@ struct superswitch super_imsm = { .free_super = free_super_imsm, .match_metadata_desc = match_metadata_desc_imsm, .container_content = container_content_imsm, - .default_layout = imsm_level_to_layout, + .default_geometry = default_geometry_imsm, + .get_disk_controller_domain = imsm_get_disk_controller_domain, + .reshape_super = imsm_reshape_super, + .manage_reshape = imsm_manage_reshape, .external = 1, .name = "imsm",