X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=super-intel.c;h=0178f289f51f942a980fa125ec688fba8f9c8f4e;hb=bb025c2f22c9c0cdf2d77c7a0e8298b3e3972538;hp=68382513a910c89766f253dbdb43795d2a6388db;hpb=78b10e663c35a301406facbda3f5be02973e2ba4;p=thirdparty%2Fmdadm.git diff --git a/super-intel.c b/super-intel.c index 68382513..0178f289 100644 --- a/super-intel.c +++ b/super-intel.c @@ -284,6 +284,13 @@ struct extent { unsigned long long start, size; }; +/* definitions of reshape process types */ +enum imsm_reshape_type { + CH_TAKEOVER, + CH_CHUNK_MIGR, + CH_LEVEL_MIGRATION +}; + /* definition of messages passed to imsm_process_update */ enum imsm_update_type { update_activate_spare, @@ -292,6 +299,7 @@ enum imsm_update_type { update_rename_array, update_add_remove_disk, update_reshape_container_disks, + update_takeover }; struct imsm_update_activate_spare { @@ -312,12 +320,21 @@ struct geo_params { int raid_disks; }; +enum takeover_direction { + R10_TO_R0, + R0_TO_R10 +}; +struct imsm_update_takeover { + enum imsm_update_type type; + int subarray; + enum takeover_direction direction; +}; struct imsm_update_reshape { enum imsm_update_type type; int old_raid_disks; int new_raid_disks; - int new_disks[1]; /* new_raid_disk - old_raid_disks makedev number */ + int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */ }; struct disk_info { @@ -509,23 +526,35 @@ static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index) return NULL; } -static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot) +/* + * for second_map: + * == 0 get first map + * == 1 get second map + * == -1 than get map according to the current migr_state + */ +static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, + int slot, + int second_map) { struct imsm_map *map; - if (dev->vol.migr_state) - map = get_imsm_map(dev, 1); - else - map = get_imsm_map(dev, 0); + if (second_map == -1) { + if (dev->vol.migr_state) + map = get_imsm_map(dev, 1); + else + map = get_imsm_map(dev, 0); + } else { + map = get_imsm_map(dev, second_map); + } /* top byte identifies disk under rebuild */ return __le32_to_cpu(map->disk_ord_tbl[slot]); } #define ord_to_idx(ord) (((ord) << 8) >> 8) -static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot) +static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map) { - __u32 ord = get_imsm_ord_tbl_ent(dev, slot); + __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map); return ord_to_idx(ord); } @@ -724,22 +753,38 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) __u64 sz; int slot, i; struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map2 = get_imsm_map(dev, 1); __u32 ord; printf("\n"); printf("[%.16s]:\n", dev->volume); printf(" UUID : %s\n", uuid); - printf(" RAID Level : %d\n", get_imsm_raid_level(map)); - printf(" Members : %d\n", map->num_members); + printf(" RAID Level : %d", get_imsm_raid_level(map)); + if (map2) + printf(" <-- %d", get_imsm_raid_level(map2)); + printf("\n"); + printf(" Members : %d", map->num_members); + if (map2) + printf(" <-- %d", map2->num_members); + printf("\n"); printf(" Slots : ["); for (i = 0; i < map->num_members; i++) { - ord = get_imsm_ord_tbl_ent(dev, i); + ord = get_imsm_ord_tbl_ent(dev, i, 0); printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U"); } - printf("]\n"); + printf("]"); + if (map2) { + printf(" <-- ["); + for (i = 0; i < map2->num_members; i++) { + ord = get_imsm_ord_tbl_ent(dev, i, 1); + printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U"); + } + printf("]"); + } + printf("\n"); slot = get_imsm_disk_slot(map, disk_idx); if (slot >= 0) { - ord = get_imsm_ord_tbl_ent(dev, slot); + ord = get_imsm_ord_tbl_ent(dev, slot, -1); printf(" This Slot : %d%s\n", slot, ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : ""); } else @@ -756,8 +801,12 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) __le32_to_cpu(map->pba_of_lba0)); printf(" Num Stripes : %u\n", __le32_to_cpu(map->num_data_stripes)); - printf(" Chunk Size : %u KiB\n", + printf(" Chunk Size : %u KiB", __le16_to_cpu(map->blocks_per_strip) / 2); + if (map2) + printf(" <-- %u KiB", + __le16_to_cpu(map2->blocks_per_strip) / 2); + printf("\n"); printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks)); printf(" Migrate State : "); if (dev->vol.migr_state) { @@ -1397,12 +1446,12 @@ static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev) return num_stripes_per_unit_resync(dev); } -static __u8 imsm_num_data_members(struct imsm_dev *dev) +static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map) { /* named 'imsm_' because raid0, raid1 and raid10 * counter-intuitively have the same number of data disks */ - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, second_map); switch (get_imsm_raid_level(map)) { case 0: @@ -1466,6 +1515,7 @@ static __u64 blocks_per_migr_unit(struct imsm_dev *dev) return 0; switch (migr_type(dev)) { + case MIGR_GEN_MIGR: case MIGR_VERIFY: case MIGR_REPAIR: case MIGR_INIT: { @@ -1485,7 +1535,7 @@ static __u64 blocks_per_migr_unit(struct imsm_dev *dev) */ stripes_per_unit = num_stripes_per_unit_resync(dev); migr_chunk = migr_strip_blocks_resync(dev); - disks = imsm_num_data_members(dev); + disks = imsm_num_data_members(dev, 0); blocks_per_unit = stripes_per_unit * migr_chunk * disks; stripe = __le32_to_cpu(map->blocks_per_strip) * disks; segment = blocks_per_unit / stripe; @@ -1502,7 +1552,6 @@ static __u64 blocks_per_migr_unit(struct imsm_dev *dev) migr_chunk = migr_strip_blocks_rebuild(dev); return migr_chunk * stripes_per_unit; } - case MIGR_GEN_MIGR: case MIGR_STATE_CHANGE: default: return 0; @@ -1529,26 +1578,40 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, struct intel_super *super = st->sb; struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *prev_map = get_imsm_map(dev, 1); + struct imsm_map *map_to_analyse = map; struct dl *dl; char *devname; int map_disks = info->array.raid_disks; + if (prev_map) + map_to_analyse = prev_map; + for (dl = super->disks; dl; dl = dl->next) if (dl->raiddisk == info->disk.raid_disk) break; info->container_member = super->current_vol; - info->array.raid_disks = map->num_members; - info->array.level = get_imsm_raid_level(map); + info->array.raid_disks = map_to_analyse->num_members; + info->array.level = get_imsm_raid_level(map_to_analyse); info->array.layout = imsm_level_to_layout(info->array.level); info->array.md_minor = -1; info->array.ctime = 0; info->array.utime = 0; - info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9; + info->array.chunk_size = + __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9; info->array.state = !dev->vol.dirty; info->custom_array_size = __le32_to_cpu(dev->size_high); info->custom_array_size <<= 32; info->custom_array_size |= __le32_to_cpu(dev->size_low); - + if (prev_map) { + info->new_level = get_imsm_raid_level(map); + info->new_layout = imsm_level_to_layout(info->new_level); + info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9; + } else { + info->new_level = UnSet; + info->new_layout = UnSet; + info->new_chunk = info->array.chunk_size; + } info->disk.major = 0; info->disk.minor = 0; if (dl) { @@ -1556,13 +1619,19 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, info->disk.minor = dl->minor; } - info->data_offset = __le32_to_cpu(map->pba_of_lba0); - info->component_size = __le32_to_cpu(map->blocks_per_member); + info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0); + info->component_size = + __le32_to_cpu(map_to_analyse->blocks_per_member); memset(info->uuid, 0, sizeof(info->uuid)); info->recovery_start = MaxSector; - info->reshape_active = 0; + info->reshape_active = (prev_map != NULL); + if (info->reshape_active) + info->delta_disks = map->num_members - prev_map->num_members; + else + info->delta_disks = 0; - if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty) { + if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED || + dev->vol.dirty) { info->resync_start = 0; } else if (dev->vol.migr_state) { switch (migr_type(dev)) { @@ -1610,47 +1679,15 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, dmap[i] = 0; if (i < info->array.raid_disks) { struct imsm_disk *dsk; - j = get_imsm_disk_idx(dev, i); + j = get_imsm_disk_idx(dev, i, -1); dsk = get_imsm_disk(super, j); if (dsk && (dsk->status & CONFIGURED_DISK)) dmap[i] = 1; } } } -} - -/* check the config file to see if we can return a real uuid for this spare */ -static void fixup_container_spare_uuid(struct mdinfo *inf) -{ - struct mddev_ident *array_list; - - if (inf->array.level != LEVEL_CONTAINER || - memcmp(inf->uuid, uuid_match_any, sizeof(int[4])) != 0) - return; - - array_list = conf_get_ident(NULL); - - for (; array_list; array_list = array_list->next) { - if (array_list->uuid_set) { - struct supertype *_sst; /* spare supertype */ - struct supertype *_cst; /* container supertype */ - - _cst = array_list->st; - if (_cst) - _sst = _cst->ss->match_metadata_desc(inf->text_version); - else - _sst = NULL; - - if (_sst) { - memcpy(inf->uuid, array_list->uuid, sizeof(int[4])); - free(_sst); - break; - } - } - } } - static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed); static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev); @@ -1719,7 +1756,7 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * * (catches single-degraded vs double-degraded) */ for (j = 0; j < map->num_members; j++) { - __u32 ord = get_imsm_ord_tbl_ent(dev, i); + __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1); __u32 idx = ord_to_idx(ord); if (!(ord & IMSM_ORD_REBUILD) && @@ -1765,10 +1802,8 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * */ if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs) uuid_from_super_imsm(st, info->uuid); - else { - memcpy(info->uuid, uuid_match_any, sizeof(int[4])); - fixup_container_spare_uuid(info); - } + else + memcpy(info->uuid, uuid_zero, sizeof(uuid_zero)); /* I don't know how to compute 'map' on imsm, so use safe default */ if (map) { @@ -3029,11 +3064,6 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) struct intel_super *super; int rv; -#ifndef MDASSEMBLE - if (load_super_imsm_all(st, fd, &st->sb, devname) == 0) - return 0; -#endif - if (test_partition(fd)) /* IMSM not allowed on partitions */ return 1; @@ -3395,7 +3425,7 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, /* Check the device has not already been added */ slot = get_imsm_disk_slot(map, dl->index); if (slot >= 0 && - (get_imsm_ord_tbl_ent(dev, slot) & IMSM_ORD_REBUILD) == 0) { + (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) { fprintf(stderr, Name ": %s has been included in this array twice\n", devname); return 1; @@ -3679,7 +3709,7 @@ static int create_array(struct supertype *st, int dev_idx) imsm_copy_dev(&u->dev, dev); inf = get_disk_info(u); for (i = 0; i < map->num_members; i++) { - int idx = get_imsm_disk_idx(dev, i); + int idx = get_imsm_disk_idx(dev, i, -1); disk = get_imsm_disk(super, idx); serialcpy(inf[i].serial, disk->serial); @@ -3723,7 +3753,6 @@ static int write_init_super_imsm(struct supertype *st) if (st->update_tail) { /* queue the recently created array / added disk * as a metadata update */ - struct dl *d; int rv; /* determine if we are creating a volume or adding a disk */ @@ -3735,11 +3764,6 @@ static int write_init_super_imsm(struct supertype *st) } else rv = create_array(st, current_vol); - for (d = super->disks; d ; d = d->next) { - close(d->fd); - d->fd = -1; - } - return rv; } else { struct dl *d; @@ -4514,17 +4538,16 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra struct imsm_super *mpb = super->anchor; struct mdinfo *rest = NULL; unsigned int i; + int bbm_errors = 0; - /* do not assemble arrays that might have bad blocks */ - if (imsm_bbm_log_size(super->anchor)) { - fprintf(stderr, Name ": BBM log found in metadata. " - "Cannot activate array(s).\n"); - return NULL; - } + /* check for bad blocks */ + if (imsm_bbm_log_size(super->anchor)) + bbm_errors = 1; for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev; struct imsm_map *map; + struct imsm_map *map2; struct mdinfo *this; int slot; char *ep; @@ -4535,6 +4558,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra dev = get_imsm_dev(super, i); map = get_imsm_map(dev, 0); + map2 = get_imsm_map(dev, 1); /* do not publish arrays that are in the middle of an * unsupported migration @@ -4567,8 +4591,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra __u32 ord; skip = 0; - idx = get_imsm_disk_idx(dev, slot); - ord = get_imsm_ord_tbl_ent(dev, slot); + idx = get_imsm_disk_idx(dev, slot, 0); + ord = get_imsm_ord_tbl_ent(dev, slot, 0); for (d = super->disks; d ; d = d->next) if (d->index == idx) break; @@ -4616,7 +4640,17 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra info_d->disk.minor = d->minor; info_d->disk.raid_disk = slot; info_d->recovery_start = recovery_start; - + if (map2) { + if (slot < map2->num_members) + info_d->disk.state = (1 << MD_DISK_ACTIVE); + else + this->array.spare_disks++; + } else { + if (slot < map->num_members) + info_d->disk.state = (1 << MD_DISK_ACTIVE); + else + this->array.spare_disks++; + } if (info_d->recovery_start == MaxSector) this->array.working_disks++; @@ -4629,6 +4663,10 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra rest = this; } + /* if array has bad blocks, set suitable bit in array status */ + if (bbm_errors) + rest->array.state |= (1<num_members; i++) { - __u32 ord = get_imsm_ord_tbl_ent(dev, i); + __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1); int idx = ord_to_idx(ord); struct imsm_disk *disk; @@ -4760,9 +4798,13 @@ static int is_resyncing(struct imsm_dev *dev) migr_type(dev) == MIGR_REPAIR) return 1; + if (migr_type(dev) == MIGR_GEN_MIGR) + return 0; + migr_map = get_imsm_map(dev, 1); - if (migr_map->map_state == IMSM_T_STATE_NORMAL) + if ((migr_map->map_state == IMSM_T_STATE_NORMAL) && + (dev->vol.migr_type != MIGR_GEN_MIGR)) return 1; else return 0; @@ -4825,7 +4867,74 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev) static void imsm_set_disk(struct active_array *a, int n, int state); -/* Handle dirty -> clean transititions and resync. Degraded and rebuild +static void imsm_progress_container_reshape(struct intel_super *super) +{ + /* if no device has a migr_state, but some device has a + * different number of members than the previous device, start + * changing the number of devices in this device to match + * previous. + */ + struct imsm_super *mpb = super->anchor; + int prev_disks = -1; + int i; + + for (i = 0; i < mpb->num_raid_devs; i++) { + struct imsm_dev *dev = get_imsm_dev(super, i); + struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map2; + int prev_num_members; + int used_disks; + + if (dev->vol.migr_state) + return; + + if (prev_disks == -1) + prev_disks = map->num_members; + if (prev_disks == map->num_members) + continue; + + /* OK, this array needs to enter reshape mode. + * i.e it needs a migr_state + */ + + prev_num_members = map->num_members; + map->num_members = prev_disks; + dev->vol.migr_state = 1; + dev->vol.curr_migr_unit = 0; + dev->vol.migr_type = MIGR_GEN_MIGR; + for (i = prev_num_members; + i < map->num_members; i++) + set_imsm_ord_tbl_ent(map, i, i); + map2 = get_imsm_map(dev, 1); + /* Copy the current map */ + memcpy(map2, map, sizeof_imsm_map(map)); + map2->num_members = prev_num_members; + + /* calculate new size + */ + used_disks = imsm_num_data_members(dev, 0); + if (used_disks) { + unsigned long long array_blocks; + + array_blocks = + map->blocks_per_member + * used_disks; + /* round array size down to closest MB + */ + array_blocks = (array_blocks + >> SECT_PER_MB_SHIFT) + << SECT_PER_MB_SHIFT; + dev->size_low = + __cpu_to_le32((__u32)array_blocks); + dev->size_high = + __cpu_to_le32( + (__u32)(array_blocks >> 32)); + } + super->updates_pending++; + } +} + +/* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild * states are handled in imsm_set_disk() with one exception, when a * resync is stopped due to a new failure this routine will set the * 'degraded' state for the array. @@ -4840,6 +4949,65 @@ static int imsm_set_array_state(struct active_array *a, int consistent) __u8 map_state = imsm_check_degraded(super, dev, failed); __u32 blocks_per_unit; + if (dev->vol.migr_state && + dev->vol.migr_type == MIGR_GEN_MIGR) { + /* array state change is blocked due to reshape action + * We might need to + * - abort the reshape (if last_checkpoint is 0 and action!= reshape) + * - finish the reshape (if last_checkpoint is big and action != reshape) + * - update curr_migr_unit + */ + if (a->curr_action == reshape) { + /* still reshaping, maybe update curr_migr_unit */ + long long blocks_per_unit = blocks_per_migr_unit(dev); + long long unit = a->last_checkpoint; + if (blocks_per_unit) { + unit /= blocks_per_unit; + if (unit > + __le32_to_cpu(dev->vol.curr_migr_unit)) { + dev->vol.curr_migr_unit = + __cpu_to_le32(unit); + super->updates_pending++; + } + } + return 0; + } else { + if (a->last_checkpoint == 0 && a->prev_action == reshape) { + /* for some reason we aborted the reshape. + * Better clean up + */ + struct imsm_map *map2 = get_imsm_map(dev, 1); + dev->vol.migr_state = 0; + dev->vol.migr_type = 0; + dev->vol.curr_migr_unit = 0; + memcpy(map, map2, sizeof_imsm_map(map2)); + super->updates_pending++; + } + if (a->last_checkpoint >= a->info.component_size) { + unsigned long long array_blocks; + int used_disks; + /* it seems the reshape is all done */ + dev->vol.migr_state = 0; + dev->vol.migr_type = 0; + dev->vol.curr_migr_unit = 0; + + used_disks = imsm_num_data_members(dev, -1); + array_blocks = map->blocks_per_member * used_disks; + /* round array size down to closest MB */ + array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) + << SECT_PER_MB_SHIFT; + dev->size_low = __cpu_to_le32((__u32) array_blocks); + dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32)); + a->info.custom_array_size = array_blocks; + a->check_reshape = 1; /* encourage manager to update + * array size + */ + super->updates_pending++; + imsm_progress_container_reshape(super); + } + } + } + /* before we activate this array handle any missing disks */ if (consistent == 2) handle_missing(super, dev); @@ -4933,7 +5101,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state) dprintf("imsm: set_disk %d:%x\n", n, state); - ord = get_imsm_ord_tbl_ent(dev, n); + ord = get_imsm_ord_tbl_ent(dev, n, -1); disk = get_imsm_disk(super, ord_to_idx(ord)); /* check for new failures */ @@ -5041,7 +5209,7 @@ static void imsm_sync_metadata(struct supertype *container) static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a) { struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); - int i = get_imsm_disk_idx(dev, idx); + int i = get_imsm_disk_idx(dev, idx, -1); struct dl *dl; for (dl = super->disks; dl; dl = dl->next) @@ -5062,7 +5230,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, struct mdinfo *additional_test_list) { struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); - int idx = get_imsm_disk_idx(dev, slot); + int idx = get_imsm_disk_idx(dev, slot, -1); struct imsm_super *mpb = super->anchor; struct imsm_map *map; unsigned long long pos; @@ -5190,7 +5358,7 @@ static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed) * Check if failed disks are deleted from intel * disk list or are marked to be deleted */ - idx = get_imsm_disk_idx(dev2, slot); + idx = get_imsm_disk_idx(dev2, slot, -1); idisk = get_imsm_dl_disk(cont->sb, idx); /* * Do not rebuild the array if failed disks @@ -5247,6 +5415,18 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n", inst, failed, a->info.array.raid_disks, a->info.array.level); + + if (dev->vol.migr_state && + dev->vol.migr_type == MIGR_GEN_MIGR) + /* No repair during migration */ + return NULL; + + if (a->info.array.level == 4) + /* No repair for takeovered array + * imsm doesn't support raid4 + */ + return NULL; + if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED) return NULL; @@ -5357,6 +5537,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, } mu->space = NULL; + mu->space_list = NULL; mu->len = sizeof(struct imsm_update_activate_spare) * num_spares; mu->next = *updates; u = (struct imsm_update_activate_spare *) mu->buf; @@ -5387,7 +5568,7 @@ static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_ int j; for (i = 0; i < map->num_members; i++) { - disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); + disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1)); for (j = 0; j < new_map->num_members; j++) if (serialcmp(disk->serial, inf[j].serial) == 0) return 1; @@ -5474,16 +5655,189 @@ static int add_remove_disk_update(struct intel_super *super) return check_degraded; } +static int apply_reshape_container_disks_update(struct imsm_update_reshape *u, + struct intel_super *super, + void ***space_list) +{ + struct dl *new_disk; + struct intel_dev *id; + int i; + int delta_disks = u->new_raid_disks - u->old_raid_disks; + int disk_count = u->old_raid_disks; + void **tofree = NULL; + int devices_to_reshape = 1; + struct imsm_super *mpb = super->anchor; + int ret_val = 0; + + dprintf("imsm: imsm_process_update() for update_reshape\n"); + + /* enable spares to use in array */ + for (i = 0; i < delta_disks; i++) { + new_disk = get_disk_super(super, + major(u->new_disks[i]), + minor(u->new_disks[i])); + dprintf("imsm: imsm_process_update(): new disk " + "for reshape is: %i:%i (%p, index = %i)\n", + major(u->new_disks[i]), minor(u->new_disks[i]), + new_disk, new_disk->index); + if ((new_disk == NULL) || + ((new_disk->index >= 0) && + (new_disk->index < u->old_raid_disks))) + goto update_reshape_exit; + new_disk->index = disk_count++; + /* slot to fill in autolayout + */ + new_disk->raiddisk = new_disk->index; + new_disk->disk.status |= + CONFIGURED_DISK; + new_disk->disk.status &= ~SPARE_DISK; + } + + dprintf("imsm: process_update(): update_reshape: volume set" + " mpb->num_raid_devs = %i\n", mpb->num_raid_devs); + /* manage changes in volume + */ + for (id = super->devlist ; id; id = id->next) { + void **sp = *space_list; + struct imsm_dev *newdev; + struct imsm_map *newmap, *oldmap; + + if (!sp) + continue; + *space_list = *sp; + newdev = (void*)sp; + /* Copy the dev, but not (all of) the map */ + memcpy(newdev, id->dev, sizeof(*newdev)); + oldmap = get_imsm_map(id->dev, 0); + newmap = get_imsm_map(newdev, 0); + /* Copy the current map */ + memcpy(newmap, oldmap, sizeof_imsm_map(oldmap)); + /* update one device only + */ + if (devices_to_reshape) { + int used_disks; + + dprintf("process_update(): modifying " + "subdev: %i\n", id->index); + devices_to_reshape--; + newdev->vol.migr_state = 1; + newdev->vol.curr_migr_unit = 0; + newdev->vol.migr_type = MIGR_GEN_MIGR; + newmap->num_members = u->new_raid_disks; + for (i = 0; i < delta_disks; i++) { + set_imsm_ord_tbl_ent(newmap, + u->old_raid_disks + i, + u->old_raid_disks + i); + } + /* New map is correct, now need to save old map + */ + newmap = get_imsm_map(newdev, 1); + memcpy(newmap, oldmap, sizeof_imsm_map(oldmap)); + + /* calculate new size + */ + used_disks = imsm_num_data_members(newdev, 0); + if (used_disks) { + unsigned long long array_blocks; + + array_blocks = + newmap->blocks_per_member * used_disks; + /* round array size down to closest MB + */ + array_blocks = (array_blocks + >> SECT_PER_MB_SHIFT) + << SECT_PER_MB_SHIFT; + newdev->size_low = + __cpu_to_le32((__u32)array_blocks); + newdev->size_high = + __cpu_to_le32((__u32)(array_blocks >> 32)); + } + } + + sp = (void **)id->dev; + id->dev = newdev; + *sp = tofree; + tofree = sp; + } + if (tofree) + *space_list = tofree; + ret_val = 1; + +update_reshape_exit: + + return ret_val; +} + +static int apply_takeover_update(struct imsm_update_takeover *u, + struct intel_super *super) +{ + struct imsm_dev *dev = NULL; + struct imsm_map *map; + struct dl *dm, *du; + struct intel_dev *dv; + + for (dv = super->devlist; dv; dv = dv->next) + if (dv->index == (unsigned int)u->subarray) { + dev = dv->dev; + break; + } + + if (dev == NULL) + return 0; + + map = get_imsm_map(dev, 0); + + if (u->direction == R10_TO_R0) { + /* iterate through devices to mark removed disks as spare */ + for (dm = super->disks; dm; dm = dm->next) { + if (dm->disk.status & FAILED_DISK) { + int idx = dm->index; + /* update indexes on the disk list */ +/* FIXME this loop-with-the-loop looks wrong, I'm not convinced + the index values will end up being correct.... NB */ + for (du = super->disks; du; du = du->next) + if (du->index > idx) + du->index--; + /* mark as spare disk */ + dm->disk.status = SPARE_DISK; + dm->index = -1; + } + } + + /* update map */ + map->num_members = map->num_members / 2; + map->map_state = IMSM_T_STATE_NORMAL; + map->num_domains = 1; + map->raid_level = 0; + map->failed_disk_num = -1; + } + + /* update disk order table */ + for (du = super->disks; du; du = du->next) + if (du->index >= 0) + set_imsm_ord_tbl_ent(map, du->index, du->index); + + return 1; +} + static void imsm_process_update(struct supertype *st, struct metadata_update *update) { /** * crack open the metadata_update envelope to find the update record * update can be one of: - * update_activate_spare - a spare device has replaced a failed + * update_reshape_container_disks - all the arrays in the container + * are being reshaped to have more devices. We need to mark + * the arrays for general migration and convert selected spares + * into active devices. + * update_activate_spare - a spare device has replaced a failed * device in an array, update the disk_ord_tbl. If this disk is * present in all member arrays then also clear the SPARE_DISK * flag + * update_create_array + * update_kill_array + * update_rename_array + * update_add_remove_disk */ struct intel_super *super = st->sb; struct imsm_super *mpb; @@ -5508,7 +5862,18 @@ static void imsm_process_update(struct supertype *st, mpb = super->anchor; switch (type) { + case update_takeover: { + struct imsm_update_takeover *u = (void *)update->buf; + if (apply_takeover_update(u, super)) + super->updates_pending++; + break; + } + case update_reshape_container_disks: { + struct imsm_update_reshape *u = (void *)update->buf; + if (apply_reshape_container_disks_update( + u, super, &update->space_list)) + super->updates_pending++; break; } case update_activate_spare: { @@ -5522,7 +5887,7 @@ static void imsm_process_update(struct supertype *st, struct dl *dl; unsigned int found; int failed; - int victim = get_imsm_disk_idx(dev, u->slot); + int victim = get_imsm_disk_idx(dev, u->slot, -1); int i; for (dl = super->disks; dl; dl = dl->next) @@ -5545,7 +5910,8 @@ static void imsm_process_update(struct supertype *st, for (i = 0; i < map->num_members; i++) { if (i == u->slot) continue; - disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); + disk = get_imsm_disk(super, + get_imsm_disk_idx(dev, i, -1)); if (!disk || is_failed(disk)) failed++; } @@ -5824,6 +6190,36 @@ static void imsm_prepare_update(struct supertype *st, switch (type) { case update_reshape_container_disks: { + /* Every raid device in the container is about to + * gain some more devices, and we will enter a + * reconfiguration. + * So each 'imsm_map' will be bigger, and the imsm_vol + * will now hold 2 of them. + * Thus we need new 'struct imsm_dev' allocations sized + * as sizeof_imsm_dev but with more devices in both maps. + */ + struct imsm_update_reshape *u = (void *)update->buf; + struct intel_dev *dl; + void **space_tail = (void**)&update->space_list; + + dprintf("imsm: imsm_prepare_update() for update_reshape\n"); + + for (dl = super->devlist; dl; dl = dl->next) { + int size = sizeof_imsm_dev(dl->dev, 1); + void *s; + if (u->new_raid_disks > u->old_raid_disks) + size += sizeof(__u32)*2* + (u->new_raid_disks - u->old_raid_disks); + s = malloc(size); + if (!s) + break; + *space_tail = s; + space_tail = s; + *space_tail = NULL; + } + + len = disks_to_mpb_size(u->new_raid_disks); + dprintf("New anchor length is %llu\n", (unsigned long long)len); break; } case update_create_array: { @@ -5921,7 +6317,7 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind /* update ord entries being careful not to propagate * ord-flags to the first map */ - ord = get_imsm_ord_tbl_ent(dev, j); + ord = get_imsm_ord_tbl_ent(dev, j, -1); if (ord_to_idx(ord) <= index) continue; @@ -5998,6 +6394,10 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, struct geo_params *geo, int *old_raid_disks) { + /* currently we only support increasing the number of devices + * for a container. This increases the number of device for each + * member array. They must all be RAID0 or RAID5. + */ int ret_val = 0; struct mdinfo *info, *member; int devices_that_can_grow = 0; @@ -6037,7 +6437,7 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, (info->array.level != 5)) { /* we cannot use this container with other raid level */ - dprintf("imsm: for container operation wrong"\ + dprintf("imsm: for container operation wrong" " raid level (%i) detected\n", info->array.level); break; @@ -6049,7 +6449,7 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, if (!is_raid_level_supported(super->orom, member->array.level, geo->raid_disks)) { - dprintf("platform does not support raid%d with"\ + dprintf("platform does not support raid%d with" " %d disk%s\n", info->array.level, geo->raid_disks, @@ -6098,42 +6498,8 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, */ static struct mdinfo *get_spares_for_grow(struct supertype *st) { - dev_t dev = 0; - struct mdinfo *disks, *d, **dp; unsigned long long min_size = min_acceptable_spare_size_imsm(st); - - /* get list of alldisks in container */ - disks = getinfo_super_disks_imsm(st); - - if (!disks) - return NULL; - /* find spare devices on the list */ - dp = &disks->devs; - disks->array.spare_disks = 0; - while (*dp) { - int found = 0; - d = *dp; - if (d->disk.state == 0) { - /* check if size is acceptable */ - unsigned long long dev_size; - dev = makedev(d->disk.major,d->disk.minor); - if (min_size && - dev_size_from_id(dev, &dev_size) && - dev_size >= min_size) { - dev = 0; - found = 1; - } - } - if (found) { - dp = &d->next; - disks->array.spare_disks++; - } else { - *dp = d->next; - d->next = NULL; - sysfs_free(d); - } - } - return disks; + return container_choose_spares(st, min_size, NULL, NULL, NULL, 0); } /****************************************************************************** @@ -6154,6 +6520,7 @@ static int imsm_create_metadata_update_for_reshape( struct mdinfo *spares = NULL; int i; int delta_disks = 0; + struct mdinfo *dev; dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n", geo->raid_disks); @@ -6192,27 +6559,18 @@ static int imsm_create_metadata_update_for_reshape( dprintf("imsm: %i spares are available.\n\n", spares->array.spare_disks); + dev = spares->devs; for (i = 0; i < delta_disks; i++) { - struct mdinfo *dev = spares->devs; struct dl *dl; + if (dev == NULL) + break; u->new_disks[i] = makedev(dev->disk.major, dev->disk.minor); dl = get_disk_super(super, dev->disk.major, dev->disk.minor); - dl->index = mpb->num_disks++; - } - /* Now update the metadata so that container_content will find - * the new devices - */ - for (i = 0; i < mpb->num_raid_devs; i++) { - int d; - struct imsm_dev *dev = get_imsm_dev(super, i); - struct imsm_map *map = get_imsm_map(dev, 0); - map->num_members = geo->raid_disks; - for (d = 0; d < delta_disks; d++) { - set_imsm_ord_tbl_ent(map, old_raid_disks + d, - mpb->num_disks - delta_disks + d); - } + dl->index = mpb->num_disks; + mpb->num_disks++; + dev = dev->next; } abort: @@ -6220,25 +6578,180 @@ abort: */ sysfs_free(spares); + dprintf("imsm: reshape update preparation :"); if (i == delta_disks) { + dprintf(" OK\n"); *updatep = u; return update_memory_size; } free(u); + dprintf(" Error\n"); return 0; } +static void imsm_update_metadata_locally(struct supertype *st, + void *buf, int len) +{ + struct metadata_update mu; + + mu.buf = buf; + mu.len = len; + mu.space = NULL; + mu.space_list = NULL; + mu.next = NULL; + imsm_prepare_update(st, &mu); + imsm_process_update(st, &mu); + + while (mu.space_list) { + void **space = mu.space_list; + mu.space_list = *space; + free(space); + } +} + +/*************************************************************************** +* Function: imsm_analyze_change +* Description: Function analyze change for single volume +* and validate if transition is supported +* Parameters: Geometry parameters, supertype structure +* Returns: Operation type code on success, -1 if fail +****************************************************************************/ +enum imsm_reshape_type imsm_analyze_change(struct supertype *st, + struct geo_params *geo) +{ + struct mdinfo info; + int change = -1; + int check_devs = 0; + + getinfo_super_imsm_volume(st, &info, NULL); + + if ((geo->level != info.array.level) && + (geo->level >= 0) && + (geo->level != UnSet)) { + switch (info.array.level) { + case 0: + if (geo->level == 5) { + change = CH_LEVEL_MIGRATION; + check_devs = 1; + } + if (geo->level == 10) { + change = CH_TAKEOVER; + check_devs = 1; + } + break; + case 5: + if (geo->level != 0) + change = CH_LEVEL_MIGRATION; + break; + case 10: + if (geo->level == 0) { + change = CH_TAKEOVER; + check_devs = 1; + } + break; + } + if (change == -1) { + fprintf(stderr, + Name " Error. Level Migration from %d to %d " + "not supported!\n", + info.array.level, geo->level); + goto analyse_change_exit; + } + } else + geo->level = info.array.level; + + if ((geo->layout != info.array.layout) + && ((geo->layout != UnSet) && (geo->layout != -1))) { + change = CH_LEVEL_MIGRATION; + if ((info.array.layout == 0) + && (info.array.level == 5) + && (geo->layout == 5)) { + /* reshape 5 -> 4 */ + } else if ((info.array.layout == 5) + && (info.array.level == 5) + && (geo->layout == 0)) { + /* reshape 4 -> 5 */ + geo->layout = 0; + geo->level = 5; + } else { + fprintf(stderr, + Name " Error. Layout Migration from %d to %d " + "not supported!\n", + info.array.layout, geo->layout); + change = -1; + goto analyse_change_exit; + } + } else + geo->layout = info.array.layout; + + if ((geo->chunksize > 0) && (geo->chunksize != UnSet) + && (geo->chunksize != info.array.chunk_size)) + change = CH_CHUNK_MIGR; + else + geo->chunksize = info.array.chunk_size; + + if (!validate_geometry_imsm(st, + geo->level, + geo->layout, + geo->raid_disks, + (geo->chunksize / 1024), + geo->size, + 0, 0, 1)) + change = -1; + + if (check_devs) { + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->anchor; + + if (mpb->num_raid_devs > 1) { + fprintf(stderr, + Name " Error. Cannot perform operation on %s" + "- for this operation it MUST be single " + "array in container\n", + geo->dev_name); + change = -1; + } + } + +analyse_change_exit: + + return change; +} + +int imsm_takeover(struct supertype *st, struct geo_params *geo) +{ + struct intel_super *super = st->sb; + struct imsm_update_takeover *u; + + u = malloc(sizeof(struct imsm_update_takeover)); + if (u == NULL) + return 1; + + u->type = update_takeover; + u->subarray = super->current_vol; + + /* 10->0 transition */ + if (geo->level == 0) + u->direction = R10_TO_R0; + + /* update metadata locally */ + imsm_update_metadata_locally(st, u, + sizeof(struct imsm_update_takeover)); + /* and possibly remotely */ + if (st->update_tail) + append_metadata_update(st, u, + sizeof(struct imsm_update_takeover)); + else + free(u); + + return 0; +} static int imsm_reshape_super(struct supertype *st, long long size, int level, int layout, int chunksize, int raid_disks, - char *backup, char *dev, int verbouse) + char *backup, char *dev, int verbose) { - /* currently we only support increasing the number of devices - * for a container. This increases the number of device for each - * member array. They must all be RAID0 or RAID5. - */ - int ret_val = 1; struct geo_params geo; @@ -6247,6 +6760,7 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level, memset(&geo, sizeof(struct geo_params), 0); geo.dev_name = dev; + geo.dev_id = st->devnum; geo.size = size; geo.level = level; geo.layout = layout; @@ -6259,11 +6773,9 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level, if (experimental() == 0) return ret_val; - /* verify reshape conditions - * on container level we can only increase number of devices. */ if (st->container_dev == st->devnum) { - /* check for delta_disks > 0 - *and supported raid levels 0 and 5 only in container */ + /* On container level we can only increase number of devices. */ + dprintf("imsm: info: Container operation\n"); int old_raid_disks = 0; if (imsm_reshape_is_allowed_on_container( st, &geo, &old_raid_disks)) { @@ -6273,22 +6785,80 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level, len = imsm_create_metadata_update_for_reshape( st, &geo, old_raid_disks, &u); - if (len) { - ret_val = 0; + if (len <= 0) { + dprintf("imsm: Cannot prepare update\n"); + goto exit_imsm_reshape_super; + } + + ret_val = 0; + /* update metadata locally */ + imsm_update_metadata_locally(st, u, len); + /* and possibly remotely */ + if (st->update_tail) append_metadata_update(st, u, len); - } else - dprintf("imsm: Cannot prepare "\ - "update\n"); - } else - dprintf("imsm: Operation is not allowed "\ + else + free(u); + + } else { + fprintf(stderr, Name "imsm: Operation is not allowed " "on this container\n"); - } else - dprintf("imsm: not a container operation\n"); + } + } else { + /* On volume level we support following operations + * - takeover: raid10 -> raid0; raid0 -> raid10 + * - chunk size migration + * - migration: raid5 -> raid0; raid0 -> raid5 + */ + struct intel_super *super = st->sb; + struct intel_dev *dev = super->devlist; + int change, devnum; + dprintf("imsm: info: Volume operation\n"); + /* find requested device */ + while (dev) { + imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum); + if (devnum == geo.dev_id) + break; + dev = dev->next; + } + if (dev == NULL) { + fprintf(stderr, Name " Cannot find %s (%i) subarray\n", + geo.dev_name, geo.dev_id); + goto exit_imsm_reshape_super; + } + super->current_vol = dev->index; + change = imsm_analyze_change(st, &geo); + switch (change) { + case CH_TAKEOVER: + ret_val = imsm_takeover(st, &geo); + break; + case CH_CHUNK_MIGR: + ret_val = 0; + break; + case CH_LEVEL_MIGRATION: + ret_val = 0; + break; + default: + ret_val = 1; + } + } +exit_imsm_reshape_super: dprintf("imsm: reshape_super Exit code = %i\n", ret_val); return ret_val; } +static int imsm_manage_reshape( + int afd, struct mdinfo *sra, struct reshape *reshape, + struct supertype *st, unsigned long stripes, + int *fds, unsigned long long *offsets, + int dests, int *destfd, unsigned long long *destoffsets) +{ + /* Just use child_monitor for now */ + return child_monitor( + afd, sra, reshape, st, stripes, + fds, offsets, dests, destfd, destoffsets); +} + struct superswitch super_imsm = { #ifndef MDASSEMBLE .examine_super = examine_super_imsm, @@ -6326,6 +6896,7 @@ struct superswitch super_imsm = { .default_geometry = default_geometry_imsm, .get_disk_controller_domain = imsm_get_disk_controller_domain, .reshape_super = imsm_reshape_super, + .manage_reshape = imsm_manage_reshape, .external = 1, .name = "imsm",