X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=super-intel.c;h=3525dae6681dff82483ca5e2d9fb679f2cf93413;hb=c9aaf5effb5e4afe5dd1e4d08188140bc596808e;hp=d5bab1750ab6fc8586b9ae70bdcaae3dd8386e6d;hpb=c47b0ff69a50d6b74e3ad39e31826b5b90b370df;p=thirdparty%2Fmdadm.git diff --git a/super-intel.c b/super-intel.c index d5bab175..3525dae6 100644 --- a/super-intel.c +++ b/super-intel.c @@ -41,18 +41,54 @@ #define MAX_SIGNATURE_LENGTH 32 #define MAX_RAID_SERIAL_LEN 16 -#define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000) -#define MPB_ATTRIB_PM __cpu_to_le32(0x40000000) -#define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000) -#define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001) -#define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002) -#define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004) -#define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008) -#define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010) -#define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020) +/* supports RAID0 */ +#define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001) +/* supports RAID1 */ +#define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002) +/* supports RAID10 */ +#define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004) +/* supports RAID1E */ +#define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008) +/* supports RAID5 */ +#define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010) +/* supports RAID CNG */ +#define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020) +/* supports expanded stripe sizes of 256K, 512K and 1MB */ +#define MPB_ATTRIB_EXP_STRIPE_SIZE __cpu_to_le32(0x00000040) + +/* The OROM Support RST Caching of Volumes */ +#define MPB_ATTRIB_NVM __cpu_to_le32(0x02000000) +/* The OROM supports creating disks greater than 2TB */ +#define MPB_ATTRIB_2TB_DISK __cpu_to_le32(0x04000000) +/* The OROM supports Bad Block Management */ +#define MPB_ATTRIB_BBM __cpu_to_le32(0x08000000) + +/* THe OROM Supports NVM Caching of Volumes */ +#define MPB_ATTRIB_NEVER_USE2 __cpu_to_le32(0x10000000) +/* The OROM supports creating volumes greater than 2TB */ +#define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000) +/* originally for PMP, now it's wasted b/c. Never use this bit! */ +#define MPB_ATTRIB_NEVER_USE __cpu_to_le32(0x40000000) +/* Verify MPB contents against checksum after reading MPB */ +#define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000) + +/* Define all supported attributes that have to be accepted by mdadm + */ +#define MPB_ATTRIB_SUPPORTED (MPB_ATTRIB_CHECKSUM_VERIFY | \ + MPB_ATTRIB_2TB | \ + MPB_ATTRIB_2TB_DISK | \ + MPB_ATTRIB_RAID0 | \ + MPB_ATTRIB_RAID1 | \ + MPB_ATTRIB_RAID10 | \ + MPB_ATTRIB_RAID5 | \ + MPB_ATTRIB_EXP_STRIPE_SIZE) + +/* Define attributes that are unused but not harmful */ +#define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE) #define MPB_SECTOR_CNT 2210 #define IMSM_RESERVED_SECTORS 4096 +#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056 #define SECT_PER_MB_SHIFT 11 /* Disk configuration info. */ @@ -309,7 +345,7 @@ struct intel_super { struct extent *e; /* for determining freespace @ create */ int raiddisk; /* slot to fill in autolayout */ enum action action; - } *disks; + } *disks, *current_disk; struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon active */ struct dl *missing; /* disks removed while we weren't looking */ @@ -345,7 +381,8 @@ enum imsm_update_type { update_add_remove_disk, update_reshape_container_disks, update_reshape_migration, - update_takeover + update_takeover, + update_general_migration_checkpoint, }; struct imsm_update_activate_spare { @@ -398,6 +435,11 @@ struct imsm_update_reshape_migration { int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */ }; +struct imsm_update_general_migration_checkpoint { + enum imsm_update_type type; + __u32 curr_migr_unit; +}; + struct disk_info { __u8 serial[MAX_RAID_SERIAL_LEN]; }; @@ -557,7 +599,7 @@ static __u8 *get_imsm_version(struct imsm_super *mpb) { return &mpb->sig[MPB_SIG_LEN]; } -#endif +#endif /* retrieve a disk directly from the anchor when the anchor is known to be * up-to-date, currently only at load time @@ -786,13 +828,24 @@ static int count_memberships(struct dl *dl, struct intel_super *super) return memberships; } +static __u32 imsm_min_reserved_sectors(struct intel_super *super); + static struct extent *get_extents(struct intel_super *super, struct dl *dl) { /* find a list of used extents on the given physical device */ struct extent *rv, *e; int i; int memberships = count_memberships(dl, super); - __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + __u32 reservation; + + /* trim the reserved area for spares, so they can join any array + * regardless of whether the OROM has assigned sectors from the + * IMSM_RESERVED_SECTORS region + */ + if (dl->index == -1) + reservation = imsm_min_reserved_sectors(super); + else + reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; rv = malloc(sizeof(struct extent) * (memberships + 1)); if (!rv) @@ -883,6 +936,51 @@ static int is_failed(struct imsm_disk *disk) return (disk->status & FAILED_DISK) == FAILED_DISK; } +/* try to determine how much space is reserved for metadata from + * the last get_extents() entry on the smallest active disk, + * otherwise fallback to the default + */ +static __u32 imsm_min_reserved_sectors(struct intel_super *super) +{ + struct extent *e; + int i; + __u32 min_active, remainder; + __u32 rv = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + struct dl *dl, *dl_min = NULL; + + if (!super) + return rv; + + min_active = 0; + for (dl = super->disks; dl; dl = dl->next) { + if (dl->index < 0) + continue; + if (dl->disk.total_blocks < min_active || min_active == 0) { + dl_min = dl; + min_active = dl->disk.total_blocks; + } + } + if (!dl_min) + return rv; + + /* find last lba used by subarrays on the smallest active disk */ + e = get_extents(super, dl_min); + if (!e) + return rv; + for (i = 0; e[i].size; i++) + continue; + + remainder = min_active - e[i].start; + free(e); + + /* to give priority to recovery we should not require full + IMSM_RESERVED_SECTORS from the spare */ + rv = MPB_SECTOR_CNT + NUM_BLOCKS_DIRTY_STRIPE_REGION; + + /* if real reservation is smaller use that value */ + return (remainder < rv) ? remainder : rv; +} + /* Return minimum size of a spare that can be used in this array*/ static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st) { @@ -909,8 +1007,10 @@ static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st) if (i > 0) rv = e[i-1].start + e[i-1].size; free(e); + /* add the amount of space needed for metadata */ - rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + rv = rv + imsm_min_reserved_sectors(super); + return rv * 512; } @@ -1018,18 +1118,20 @@ static void print_imsm_dev(struct intel_super *super, printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean"); } -static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved) +static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved) { - struct imsm_disk *disk = __get_imsm_disk(mpb, index); char str[MAX_RAID_SERIAL_LEN + 1]; __u64 sz; - if (index < 0 || !disk) + if (index < -1 || !disk) return; printf("\n"); snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); - printf(" Disk%02d Serial : %s\n", index, str); + if (index >= 0) + printf(" Disk%02d Serial : %s\n", index, str); + else + printf(" Disk Serial : %s\n", str); printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", is_configured(disk) ? " active" : "", is_failed(disk) ? " failed" : ""); @@ -1089,7 +1191,95 @@ void examine_migr_rec_imsm(struct intel_super *super) break; } } +#endif /* MDASSEMBLE */ +/******************************************************************************* + * function: imsm_check_attributes + * Description: Function checks if features represented by attributes flags + * are supported by mdadm. + * Parameters: + * attributes - Attributes read from metadata + * Returns: + * 0 - passed attributes contains unsupported features flags + * 1 - all features are supported + ******************************************************************************/ +static int imsm_check_attributes(__u32 attributes) +{ + int ret_val = 1; + __u32 not_supported = MPB_ATTRIB_SUPPORTED^0xffffffff; + + not_supported &= ~MPB_ATTRIB_IGNORED; + + not_supported &= attributes; + if (not_supported) { + fprintf(stderr, Name "(IMSM): Unsupported attributes : %x\n", + (unsigned)__le32_to_cpu(not_supported)); + if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) { + dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY \n"); + not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY; + } + if (not_supported & MPB_ATTRIB_2TB) { + dprintf("\t\tMPB_ATTRIB_2TB\n"); + not_supported ^= MPB_ATTRIB_2TB; + } + if (not_supported & MPB_ATTRIB_RAID0) { + dprintf("\t\tMPB_ATTRIB_RAID0\n"); + not_supported ^= MPB_ATTRIB_RAID0; + } + if (not_supported & MPB_ATTRIB_RAID1) { + dprintf("\t\tMPB_ATTRIB_RAID1\n"); + not_supported ^= MPB_ATTRIB_RAID1; + } + if (not_supported & MPB_ATTRIB_RAID10) { + dprintf("\t\tMPB_ATTRIB_RAID10\n"); + not_supported ^= MPB_ATTRIB_RAID10; + } + if (not_supported & MPB_ATTRIB_RAID1E) { + dprintf("\t\tMPB_ATTRIB_RAID1E\n"); + not_supported ^= MPB_ATTRIB_RAID1E; + } + if (not_supported & MPB_ATTRIB_RAID5) { + dprintf("\t\tMPB_ATTRIB_RAID5\n"); + not_supported ^= MPB_ATTRIB_RAID5; + } + if (not_supported & MPB_ATTRIB_RAIDCNG) { + dprintf("\t\tMPB_ATTRIB_RAIDCNG\n"); + not_supported ^= MPB_ATTRIB_RAIDCNG; + } + if (not_supported & MPB_ATTRIB_BBM) { + dprintf("\t\tMPB_ATTRIB_BBM\n"); + not_supported ^= MPB_ATTRIB_BBM; + } + if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) { + dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY (== MPB_ATTRIB_LEGACY)\n"); + not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY; + } + if (not_supported & MPB_ATTRIB_EXP_STRIPE_SIZE) { + dprintf("\t\tMPB_ATTRIB_EXP_STRIP_SIZE\n"); + not_supported ^= MPB_ATTRIB_EXP_STRIPE_SIZE; + } + if (not_supported & MPB_ATTRIB_2TB_DISK) { + dprintf("\t\tMPB_ATTRIB_2TB_DISK\n"); + not_supported ^= MPB_ATTRIB_2TB_DISK; + } + if (not_supported & MPB_ATTRIB_NEVER_USE2) { + dprintf("\t\tMPB_ATTRIB_NEVER_USE2\n"); + not_supported ^= MPB_ATTRIB_NEVER_USE2; + } + if (not_supported & MPB_ATTRIB_NEVER_USE) { + dprintf("\t\tMPB_ATTRIB_NEVER_USE\n"); + not_supported ^= MPB_ATTRIB_NEVER_USE; + } + + if (not_supported) + dprintf(Name "(IMSM): Unknown attributes : %x\n", not_supported); + + ret_val = 0; + } + + return ret_val; +} +#ifndef MDASSEMBLE static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map); static void examine_super_imsm(struct supertype *st, char *homehost) @@ -1111,6 +1301,11 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num)); printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num)); printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num)); + printf(" Attributes : "); + if (imsm_check_attributes(mpb->attributes)) + printf("All supported\n"); + else + printf("not supported\n"); getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); printf(" UUID : %s\n", nbuf + 5); @@ -1120,7 +1315,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" MPB Sectors : %d\n", mpb_sectors(mpb)); printf(" Disks : %d\n", mpb->num_disks); printf(" RAID Devices : %d\n", mpb->num_raid_devs); - print_imsm_disk(mpb, super->disks->index, reserved); + print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), super->disks->index, reserved); if (super->bbm_log) { struct bbm_log *log = super->bbm_log; @@ -1145,28 +1340,12 @@ static void examine_super_imsm(struct supertype *st, char *homehost) for (i = 0; i < mpb->num_disks; i++) { if (i == super->disks->index) continue; - print_imsm_disk(mpb, i, reserved); + print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved); } - for (dl = super->disks ; dl; dl = dl->next) { - struct imsm_disk *disk; - char str[MAX_RAID_SERIAL_LEN + 1]; - __u64 sz; - - if (dl->index >= 0) - continue; - disk = &dl->disk; - printf("\n"); - snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); - printf(" Disk Serial : %s\n", str); - printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", - is_configured(disk) ? " active" : "", - is_failed(disk) ? " failed" : ""); - printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id)); - sz = __le32_to_cpu(disk->total_blocks) - reserved; - printf(" Usable Size : %llu%s\n", (unsigned long long)sz, - human_size(sz * 512)); - } + for (dl = super->disks; dl; dl = dl->next) + if (dl->index == -1) + print_imsm_disk(&dl->disk, -1, reserved); examine_migr_rec_imsm(super); } @@ -1390,9 +1569,9 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b fd2devname(fd, buf); printf(" Port%d : %s", port, buf); if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0) - printf(" (%s)\n", buf); + printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf); else - printf("()\n"); + printf(" ()\n"); } close(fd); free(path); @@ -1413,8 +1592,6 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b return err; } - - static void print_found_intel_controllers(struct sys_dev *elem) { for (; elem; elem = elem->next) { @@ -1816,7 +1993,7 @@ static __u64 blocks_per_migr_unit(struct intel_super *super, migr_chunk = migr_strip_blocks_resync(dev); disks = imsm_num_data_members(dev, 0); blocks_per_unit = stripes_per_unit * migr_chunk * disks; - stripe = __le32_to_cpu(map->blocks_per_strip) * disks; + stripe = __le16_to_cpu(map->blocks_per_strip) * disks; segment = blocks_per_unit / stripe; block_rel = blocks_per_unit - segment * stripe; parity_depth = parity_segment_depth(dev); @@ -1938,6 +2115,52 @@ out: return retval; } +#ifndef MDASSEMBLE +/******************************************************************************* + * function: imsm_create_metadata_checkpoint_update + * Description: It creates update for checkpoint change. + * Parameters: + * super : imsm internal array info + * u : pointer to prepared update + * Returns: + * Uptate length. + * If length is equal to 0, input pointer u contains no update + ******************************************************************************/ +static int imsm_create_metadata_checkpoint_update( + struct intel_super *super, + struct imsm_update_general_migration_checkpoint **u) +{ + + int update_memory_size = 0; + + dprintf("imsm_create_metadata_checkpoint_update(enter)\n"); + + if (u == NULL) + return 0; + *u = NULL; + + /* size of all update data without anchor */ + update_memory_size = + sizeof(struct imsm_update_general_migration_checkpoint); + + *u = calloc(1, update_memory_size); + if (*u == NULL) { + dprintf("error: cannot get memory for " + "imsm_create_metadata_checkpoint_update update\n"); + return 0; + } + (*u)->type = update_general_migration_checkpoint; + (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit); + dprintf("imsm_create_metadata_checkpoint_update: prepared for %u\n", + (*u)->curr_migr_unit); + + return update_memory_size; +} + + +static void imsm_update_metadata_locally(struct supertype *st, + void *buf, int len); + /******************************************************************************* * Function: write_imsm_migr_rec * Description: Function writes imsm migration record @@ -1956,6 +2179,8 @@ static int write_imsm_migr_rec(struct supertype *st) int fd = -1; int retval = -1; struct dl *sd; + int len; + struct imsm_update_general_migration_checkpoint *u; for (sd = super->disks ; sd ; sd = sd->next) { /* write to 2 first slots only */ @@ -1981,6 +2206,26 @@ static int write_imsm_migr_rec(struct supertype *st) close(fd); fd = -1; } + /* update checkpoint information in metadata */ + len = imsm_create_metadata_checkpoint_update(super, &u); + + if (len <= 0) { + dprintf("imsm: Cannot prepare update\n"); + goto out; + } + /* update metadata locally */ + imsm_update_metadata_locally(st, u, len); + /* and possibly remotely */ + if (st->update_tail) { + append_metadata_update(st, u, len); + /* during reshape we do all work inside metadata handler + * manage_reshape(), so metadata update has to be triggered + * insida it + */ + flush_metadata_updates(st); + st->update_tail = &st->updates; + } else + free(u); retval = 0; out: @@ -1988,6 +2233,31 @@ static int write_imsm_migr_rec(struct supertype *st) close(fd); return retval; } +#endif /* MDASSEMBLE */ + +/* spare/missing disks activations are not allowe when + * array/container performs reshape operation, because + * all arrays in container works on the same disks set + */ +int imsm_reshape_blocks_arrays_changes(struct intel_super *super) +{ + int rv = 0; + struct intel_dev *i_dev; + struct imsm_dev *dev; + + /* check whole container + */ + for (i_dev = super->devlist; i_dev; i_dev = i_dev->next) { + dev = i_dev->dev; + if (is_gen_migration(dev)) { + /* No repair during any migration in container + */ + rv = 1; + break; + } + } + return rv; +} static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap) { @@ -2006,9 +2276,8 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, if (prev_map) map_to_analyse = prev_map; - for (dl = super->disks; dl; dl = dl->next) - if (dl->raiddisk == info->disk.raid_disk) - break; + dl = super->current_disk; + info->container_member = super->current_vol; info->array.raid_disks = map->num_members; info->array.level = get_imsm_raid_level(map_to_analyse); @@ -2022,7 +2291,10 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, info->custom_array_size = __le32_to_cpu(dev->size_high); info->custom_array_size <<= 32; info->custom_array_size |= __le32_to_cpu(dev->size_low); - if (prev_map && map->map_state == prev_map->map_state) { + info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb); + + if (prev_map && map->map_state == prev_map->map_state && + (migr_type(dev) == MIGR_GEN_MIGR)) { info->reshape_active = 1; info->new_level = get_imsm_raid_level(map); info->new_layout = imsm_level_to_layout(info->new_level); @@ -2032,7 +2304,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, /* this needs to be applied to every array * in the container. */ - info->reshape_active = 2; + info->reshape_active = CONTAINER_RESHAPE; } /* We shape information that we give to md might have to be * modify to cope with md's requirement for reshaping arrays. @@ -2054,7 +2326,6 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, /* conversion is happening as RAID5 */ info->array.level = 5; info->array.layout = ALGORITHM_PARITY_N; - info->array.raid_disks += 1; info->delta_disks -= 1; break; default: @@ -2070,11 +2341,13 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, info->new_chunk = info->array.chunk_size; info->delta_disks = 0; } - info->disk.major = 0; - info->disk.minor = 0; + if (dl) { info->disk.major = dl->major; info->disk.minor = dl->minor; + info->disk.number = dl->index; + info->disk.raid_disk = get_imsm_disk_slot(map_to_analyse, + dl->index); } info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0); @@ -2100,8 +2373,9 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, info->reshape_progress = 0; info->resync_start = MaxSector; - if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED || - dev->vol.dirty) { + if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED || + dev->vol.dirty) && + imsm_reshape_blocks_arrays_changes(super) == 0) { info->resync_start = 0; } if (dev->vol.migr_state) { @@ -2122,11 +2396,20 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, unsigned long long array_blocks; int used_disks; + if (__le32_to_cpu(migr_rec->ascending_migr) && + (units < + (__le32_to_cpu(migr_rec->num_migr_units)-1)) && + (super->migr_rec->rec_status == + __cpu_to_le32(UNIT_SRC_IN_CP_AREA))) + units++; + info->reshape_progress = blocks_per_unit * units; dprintf("IMSM: General Migration checkpoint : %llu " "(%llu) -> read reshape progress : %llu\n", - units, blocks_per_unit, info->reshape_progress); + (unsigned long long)units, + (unsigned long long)blocks_per_unit, + info->reshape_progress); used_disks = imsm_num_data_members(dev, 1); if (used_disks > 0) { @@ -2233,6 +2516,7 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * info->disk.state = 0; info->name[0] = 0; info->recovery_start = MaxSector; + info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb); /* do we have the all the insync disks that we expect? */ mpb = super->anchor; @@ -2578,7 +2862,7 @@ static void fd2devname(int fd, char *name) sprintf(path, "/sys/dev/block/%d:%d", major(st.st_rdev), minor(st.st_rdev)); - rv = readlink(path, dname, sizeof(dname)); + rv = readlink(path, dname, sizeof(dname)-1); if (rv <= 0) return; @@ -2670,7 +2954,6 @@ static void serialcpy(__u8 *dest, __u8 *src) strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN); } -#ifndef MDASSEMBLE static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super) { struct dl *dl; @@ -2681,7 +2964,6 @@ static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super) return dl; } -#endif static struct imsm_disk * __serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx) @@ -2828,7 +3110,7 @@ static void end_migration(struct imsm_dev *dev, __u8 map_state) } dev->vol.migr_state = 0; - dev->vol.migr_type = 0; + set_migr_type(dev, 0); dev->vol.curr_migr_unit = 0; map->map_state = map_state; } @@ -2966,8 +3248,8 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) { if (devname) - fprintf(stderr, - Name ": Cannot seek to anchor block on %s: %s\n", + fprintf(stderr, Name + ": Cannot seek to anchor block on %s: %s\n", devname, strerror(errno)); return 1; } @@ -3239,7 +3521,6 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de return 0; } -#ifndef MDASSEMBLE /* find_missing - helper routine for load_super_imsm_all that identifies * disks that have disappeared from the system. This routine relies on * the mpb being uptodate, which it is at load time. @@ -3275,6 +3556,7 @@ static int find_missing(struct intel_super *super) return 0; } +#ifndef MDASSEMBLE static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list) { struct intel_disk *idisk = disk_list; @@ -3764,16 +4046,17 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) } /* load migration record */ - load_imsm_migr_rec(super, NULL); - - /* Check for unsupported migration features */ - if (check_mpb_migr_compatibility(super) != 0) { - fprintf(stderr, Name ": Unsupported migration detected"); - if (devname) - fprintf(stderr, " on %s\n", devname); - else - fprintf(stderr, " (IMSM).\n"); - return 3; + if (load_imsm_migr_rec(super, NULL) == 0) { + /* Check for unsupported migration features */ + if (check_mpb_migr_compatibility(super) != 0) { + fprintf(stderr, + Name ": Unsupported migration detected"); + if (devname) + fprintf(stderr, " on %s\n", devname); + else + fprintf(stderr, " (IMSM).\n"); + return 3; + } } return 0; @@ -3927,12 +4210,40 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, memset(mpb_new + size_old, 0, size_round - size_old); } super->current_vol = idx; - /* when creating the first raid device in this container set num_disks - * to zero, i.e. delete this spare and add raid member devices in - * add_to_super_imsm_volume() + + /* handle 'failed_disks' by either: + * a) create dummy disk entries in the table if this the first + * volume in the array. We add them here as this is the only + * opportunity to add them. add_to_super_imsm_volume() + * handles the non-failed disks and continues incrementing + * mpb->num_disks. + * b) validate that 'failed_disks' matches the current number + * of missing disks if the container is populated */ - if (super->current_vol == 0) + if (super->current_vol == 0) { mpb->num_disks = 0; + for (i = 0; i < info->failed_disks; i++) { + struct imsm_disk *disk; + + mpb->num_disks++; + disk = __get_imsm_disk(mpb, i); + disk->status = CONFIGURED_DISK | FAILED_DISK; + disk->scsi_id = __cpu_to_le32(~(__u32)0); + snprintf((char *) disk->serial, MAX_RAID_SERIAL_LEN, + "missing:%d", i); + } + find_missing(super); + } else { + int missing = 0; + struct dl *d; + + for (d = super->missing; d; d = d->next) + missing++; + if (info->failed_disks > missing) { + fprintf(stderr, Name": unable to add 'missing' disk to container\n"); + return 0; + } + } if (!check_name(super, name, 0)) return 0; @@ -3964,15 +4275,14 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, vol = &dev->vol; vol->migr_state = 0; set_migr_type(dev, MIGR_INIT); - vol->dirty = 0; + vol->dirty = !info->state; vol->curr_migr_unit = 0; map = get_imsm_map(dev, 0); map->pba_of_lba0 = __cpu_to_le32(super->create_offset); map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info)); map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info)); map->failed_disk_num = ~0; - map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED : - IMSM_T_STATE_NORMAL; + map->map_state = info->failed_disks ? IMSM_T_STATE_DEGRADED : IMSM_T_STATE_NORMAL; map->ddf = 1; if (info->level == 1 && info->raid_disks > 2) { @@ -4080,9 +4390,10 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, { struct intel_super *super = st->sb; struct imsm_super *mpb = super->anchor; - struct dl *dl; + struct imsm_disk *_disk; struct imsm_dev *dev; struct imsm_map *map; + struct dl *dl, *df; int slot; dev = get_imsm_dev(super, super->current_vol); @@ -4126,15 +4437,40 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, devname); return 1; } - set_imsm_ord_tbl_ent(map, dk->number, dl->index); + set_imsm_ord_tbl_ent(map, dk->raid_disk, dl->index); dl->disk.status = CONFIGURED_DISK; + /* update size of 'missing' disks to be at least as large as the + * largest acitve member (we only have dummy missing disks when + * creating the first volume) + */ + if (super->current_vol == 0) { + for (df = super->missing; df; df = df->next) { + if (dl->disk.total_blocks > df->disk.total_blocks) + df->disk.total_blocks = dl->disk.total_blocks; + _disk = __get_imsm_disk(mpb, df->index); + *_disk = df->disk; + } + } + + /* refresh unset/failed slots to point to valid 'missing' entries */ + for (df = super->missing; df; df = df->next) + for (slot = 0; slot < mpb->num_disks; slot++) { + __u32 ord = get_imsm_ord_tbl_ent(dev, slot, -1); + + if ((ord & IMSM_ORD_REBUILD) == 0) + continue; + set_imsm_ord_tbl_ent(map, slot, df->index | IMSM_ORD_REBUILD); + dprintf("set slot:%d to missing disk:%d\n", slot, df->index); + break; + } + /* if we are creating the first raid device update the family number */ if (super->current_vol == 0) { __u32 sum; struct imsm_dev *_dev = __get_imsm_dev(mpb, 0); - struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index); + _disk = __get_imsm_disk(mpb, dl->index); if (!_dev || !_disk) { fprintf(stderr, Name ": BUG mpb setup error\n"); return 1; @@ -4146,10 +4482,41 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, mpb->family_num = __cpu_to_le32(sum); mpb->orig_family_num = mpb->family_num; } - + super->current_disk = dl; return 0; } +/* mark_spare() + * Function marks disk as spare and restores disk serial + * in case it was previously marked as failed by takeover operation + * reruns: + * -1 : critical error + * 0 : disk is marked as spare but serial is not set + * 1 : success + */ +int mark_spare(struct dl *disk) +{ + __u8 serial[MAX_RAID_SERIAL_LEN]; + int ret_val = -1; + + if (!disk) + return ret_val; + + ret_val = 0; + if (!imsm_read_serial(disk->fd, NULL, serial)) { + /* Restore disk serial number, because takeover marks disk + * as failed and adds to serial ':0' before it becomes + * a spare disk. + */ + serialcpy(disk->serial, serial); + serialcpy(disk->disk.serial, serial); + ret_val = 1; + } + disk->disk.status = SPARE_DISK; + disk->index = -1; + + return ret_val; +} static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, int fd, char *devname) @@ -4187,7 +4554,6 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, memset(dd, 0, sizeof(*dd)); dd->major = major(stb.st_rdev); dd->minor = minor(stb.st_rdev); - dd->index = -1; dd->devname = devname ? strdup(devname) : NULL; dd->fd = fd; dd->e = NULL; @@ -4204,7 +4570,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, size /= 512; serialcpy(dd->disk.serial, dd->serial); dd->disk.total_blocks = __cpu_to_le32(size); - dd->disk.status = SPARE_DISK; + mark_spare(dd); if (sysfs_disk_to_scsi_id(fd, &id) == 0) dd->disk.scsi_id = __cpu_to_le32(id); else @@ -4247,9 +4613,8 @@ static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk) memset(dd, 0, sizeof(*dd)); dd->major = dk->major; dd->minor = dk->minor; - dd->index = -1; dd->fd = -1; - dd->disk.status = SPARE_DISK; + mark_spare(dd); dd->action = DISK_REMOVE; dd->next = super->disk_mgmt_list; @@ -4312,8 +4677,6 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose) return 0; } -static int is_gen_migration(struct imsm_dev *dev); - static int write_super_imsm(struct supertype *st, int doclose) { struct intel_super *super = st->sb; @@ -4375,7 +4738,7 @@ static int write_super_imsm(struct supertype *st, int doclose) /* write the mpb for disks that compose raid devices */ for (d = super->disks; d ; d = d->next) { - if (d->index < 0) + if (d->index < 0 || is_failed(&d->disk)) continue; if (store_imsm_mpb(d->fd, mpb)) fprintf(stderr, "%s: failed for device %d:%d %s\n", @@ -4385,7 +4748,8 @@ static int write_super_imsm(struct supertype *st, int doclose) get_dev_size(d->fd, NULL, &dsize); if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) { - write(d->fd, super->migr_rec_buf, 512); + if (write(d->fd, super->migr_rec_buf, 512) != 512) + perror("Write migr_rec failed"); } } if (doclose) { @@ -4712,43 +5076,44 @@ static int is_raid_level_supported(const struct imsm_orom *orom, int level, int return 0; } +static int imsm_default_chunk(const struct imsm_orom *orom) +{ + /* up to 512 if the plaform supports it, otherwise the platform max. + * 128 if no platform detected + */ + int fs = max(7, orom ? fls(orom->sss) : 0); + + return min(512, (1 << fs)); +} #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg)) -/* - * validate volume parameters with OROM/EFI capabilities - */ static int validate_geometry_imsm_orom(struct intel_super *super, int level, int layout, int raiddisks, int *chunk, int verbose) { -#if DEBUG - verbose = 1; -#endif - /* validate container capabilities */ - if (super->orom && raiddisks > super->orom->tds) { - if (verbose) - fprintf(stderr, Name ": %d exceeds maximum number of" - " platform supported disks: %d\n", - raiddisks, super->orom->tds); + /* check/set platform and metadata limits/defaults */ + if (super->orom && raiddisks > super->orom->dpa) { + pr_vrb(": platform supports a maximum of %d disks per array\n", + super->orom->dpa); return 0; } /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */ - if (super->orom && (!is_raid_level_supported(super->orom, level, - raiddisks))) { + if (!is_raid_level_supported(super->orom, level, raiddisks)) { pr_vrb(": platform does not support raid%d with %d disk%s\n", level, raiddisks, raiddisks > 1 ? "s" : ""); return 0; } - if (super->orom && level != 1) { - if (chunk && (*chunk == 0 || *chunk == UnSet)) - *chunk = imsm_orom_default_chunk(super->orom); - else if (chunk && !imsm_orom_has_chunk(super->orom, *chunk)) { - pr_vrb(": platform does not support a chunk size of: " - "%d\n", *chunk); - return 0; - } + + if (chunk && (*chunk == 0 || *chunk == UnSet)) + *chunk = imsm_default_chunk(super->orom); + + if (super->orom && chunk && !imsm_orom_has_chunk(super->orom, *chunk)) { + pr_vrb(": platform does not support a chunk size of: " + "%d\n", *chunk); + return 0; } + if (layout != imsm_level_to_layout(level)) { if (level == 5) pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n"); @@ -4784,6 +5149,12 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, if (!super) return 0; + if (mpb->num_raid_devs > 0 && mpb->num_disks != raiddisks) { + fprintf(stderr, Name ": the option-rom requires all " + "member disks to be a member of all volumes.\n"); + return 0; + } + if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) { fprintf(stderr, Name ": RAID gemetry validation failed. " "Cannot proceed with the action(s).\n"); @@ -5077,7 +5448,8 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, return validate_geometry_imsm_volume(st, level, layout, raiddisks, chunk, size, dev, - freesize, verbose); + freesize, 1) + ? 1 : -1; } } @@ -5098,9 +5470,8 @@ static void default_geometry_imsm(struct supertype *st, int *level, int *layout, if (level && layout && *layout == UnSet) *layout = imsm_level_to_layout(*level); - if (chunk && (*chunk == UnSet || *chunk == 0) && - super && super->orom) - *chunk = imsm_orom_default_chunk(super->orom); + if (chunk && (*chunk == UnSet || *chunk == 0)) + *chunk = imsm_default_chunk(super->orom); } static void handle_missing(struct intel_super *super, struct imsm_dev *dev); @@ -5168,10 +5539,8 @@ static int kill_subarray_imsm(struct supertype *st) struct dl *d; for (d = super->disks; d; d = d->next) - if (d->index > -2) { - d->index = -1; - d->disk.status = SPARE_DISK; - } + if (d->index > -2) + mark_spare(d); } super->updates_pending++; @@ -5233,6 +5602,9 @@ static int update_subarray_imsm(struct supertype *st, char *subarray, static int is_gen_migration(struct imsm_dev *dev) { + if (dev == NULL) + return 0; + if (!dev->vol.migr_state) return 0; @@ -5293,6 +5665,9 @@ static void update_recovery_start(struct intel_super *super, rebuild->recovery_start = units * blocks_per_migr_unit(super, dev); } +#ifndef MDASSEMBLE +static int recover_backup_imsm(struct supertype *st, struct mdinfo *info); +#endif static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray) { @@ -5309,13 +5684,24 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra struct imsm_super *mpb = super->anchor; struct mdinfo *rest = NULL; unsigned int i; - int bbm_errors = 0; + int sb_errors = 0; struct dl *d; int spare_disks = 0; + /* do not assemble arrays when not all attributes are supported */ + if (imsm_check_attributes(mpb->attributes) == 0) { + sb_errors = 1; + fprintf(stderr, Name ": Unsupported attributes in IMSM metadata." + "Arrays activation is blocked.\n"); + } + /* check for bad blocks */ - if (imsm_bbm_log_size(super->anchor)) - bbm_errors = 1; + if (imsm_bbm_log_size(super->anchor)) { + fprintf(stderr, Name ": BBM log found in IMSM metadata." + "Arrays activation is blocked.\n"); + sb_errors = 1; + } + /* count spare devices, not used in maps */ @@ -5354,29 +5740,39 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra */ chunk = __le16_to_cpu(map->blocks_per_strip) >> 1; -#ifndef MDASSEMBLE - if (!validate_geometry_imsm_orom(super, - get_imsm_raid_level(map), /* RAID level */ - imsm_level_to_layout(get_imsm_raid_level(map)), - map->num_members, /* raid disks */ - &chunk, - 1 /* verbose */)) { - fprintf(stderr, Name ": RAID gemetry validation failed. " - "Cannot proceed with the action(s).\n"); - continue; - } -#endif /* MDASSEMBLE */ this = malloc(sizeof(*this)); if (!this) { fprintf(stderr, Name ": failed to allocate %zu bytes\n", sizeof(*this)); break; } - memset(this, 0, sizeof(*this)); - this->next = rest; super->current_vol = i; getinfo_super_imsm_volume(st, this, NULL); + this->next = rest; +#ifndef MDASSEMBLE + /* mdadm does not support all metadata features- set the bit in all arrays state */ + if (!validate_geometry_imsm_orom(super, + get_imsm_raid_level(map), /* RAID level */ + imsm_level_to_layout(get_imsm_raid_level(map)), + map->num_members, /* raid disks */ + &chunk, + 1 /* verbose */)) { + fprintf(stderr, Name ": IMSM RAID gemetry validation failed. " + "Array %s activation is blocked.\n", + dev->volume); + this->array.state |= + (1<array.state |= + (1<num_members; slot++) { unsigned long long recovery_start; struct mdinfo *info_d; @@ -5456,12 +5852,14 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra /* now that the disk list is up-to-date fixup recovery_start */ update_recovery_start(super, dev, this); this->array.spare_disks += spare_disks; - rest = this; - } - /* if array has bad blocks, set suitable bit in array status */ - if (bbm_errors) - rest->array.state |= (1<reshape_active == 1) + recover_backup_imsm(st, this); +#endif + rest = this; + } return rest; } @@ -5612,6 +6010,8 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) __u32 ord; int slot; struct imsm_map *map; + char buf[MAX_RAID_SERIAL_LEN+3]; + unsigned int len, shift = 0; /* new failures are always set in map[0] */ map = get_imsm_map(dev, 0); @@ -5624,6 +6024,13 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) if (is_failed(disk) && (ord & IMSM_ORD_REBUILD)) return 0; + memcpy(buf, disk->serial, MAX_RAID_SERIAL_LEN); + buf[MAX_RAID_SERIAL_LEN] = '\000'; + strcat(buf, ":0"); + if ((len = strlen(buf)) >= MAX_RAID_SERIAL_LEN) + shift = len - MAX_RAID_SERIAL_LEN + 1; + strncpy((char *)disk->serial, &buf[shift], MAX_RAID_SERIAL_LEN); + disk->status |= FAILED_DISK; set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD); if (map->failed_disk_num == 0xff) @@ -5728,7 +6135,7 @@ static void imsm_progress_container_reshape(struct intel_super *super) map->num_members = prev_disks; dev->vol.migr_state = 1; dev->vol.curr_migr_unit = 0; - dev->vol.migr_type = MIGR_GEN_MIGR; + set_migr_type(dev, MIGR_GEN_MIGR); for (i = prev_num_members; i < map->num_members; i++) set_imsm_ord_tbl_ent(map, i, i); @@ -5771,14 +6178,18 @@ static int imsm_set_array_state(struct active_array *a, int consistent) } else { if (a->last_checkpoint == 0 && a->prev_action == reshape) { /* for some reason we aborted the reshape. - * Better clean up + * + * disable automatic metadata rollback + * user action is required to recover process */ + if (0) { struct imsm_map *map2 = get_imsm_map(dev, 1); dev->vol.migr_state = 0; - dev->vol.migr_type = 0; + set_migr_type(dev, 0); dev->vol.curr_migr_unit = 0; memcpy(map, map2, sizeof_imsm_map(map2)); super->updates_pending++; + } } if (a->last_checkpoint >= a->info.component_size) { unsigned long long array_blocks; @@ -5834,7 +6245,8 @@ static int imsm_set_array_state(struct active_array *a, int consistent) super->updates_pending++; a->last_checkpoint = 0; } - } else if (!is_resyncing(dev) && !failed) { + } else if ((!is_resyncing(dev) && !failed) && + (imsm_reshape_blocks_arrays_changes(super) == 0)) { /* mark the start of the init process if nothing is failed */ dprintf("imsm: mark resync start\n"); if (map->map_state == IMSM_T_STATE_UNINITIALIZED) @@ -5845,6 +6257,12 @@ static int imsm_set_array_state(struct active_array *a, int consistent) } mark_checkpoint: + /* skip checkpointing for general migration, + * it is controlled in mdadm + */ + if (is_gen_migration(dev)) + goto skip_mark_checkpoint; + /* check if we can update curr_migr_unit from resync_start, recovery_start */ blocks_per_unit = blocks_per_migr_unit(super, dev); if (blocks_per_unit) { @@ -5866,6 +6284,7 @@ mark_checkpoint: } } +skip_mark_checkpoint: /* mark dirty / clean */ if (dev->vol.dirty != !consistent) { dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty"); @@ -6214,10 +6633,8 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n", inst, failed, a->info.array.raid_disks, a->info.array.level); - if (dev->vol.migr_state && - dev->vol.migr_type == MIGR_GEN_MIGR) - /* No repair during migration */ - return NULL; + if (imsm_reshape_blocks_arrays_changes(super)) + return NULL; if (a->info.array.level == 4) /* No repair for takeovered array @@ -6234,9 +6651,9 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, * are removed from container. */ if (failed) { - dprintf("found failed disks in %s, check if there another" + dprintf("found failed disks in %.*s, check if there another" "failed sub-array.\n", - dev->volume); + MAX_RAID_SERIAL_LEN, dev->volume); /* check if states of the other volumes allow for rebuild */ for (i = 0; i < super->anchor->num_raid_devs; i++) { if (i != inst) { @@ -6266,9 +6683,9 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, */ dl = imsm_readd(super, i, a); if (!dl) - dl = imsm_add_spare(super, i, a, 0, NULL); + dl = imsm_add_spare(super, i, a, 0, rv); if (!dl) - dl = imsm_add_spare(super, i, a, 1, NULL); + dl = imsm_add_spare(super, i, a, 1, rv); if (!dl) continue; @@ -6305,8 +6722,6 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, num_spares++; dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor, i, di->data_offset); - - break; } if (!rv) @@ -6576,6 +6991,120 @@ error_disk_add: return ret_val; } +static int apply_update_activate_spare(struct imsm_update_activate_spare *u, + struct intel_super *super, + struct active_array *active_array) +{ + struct imsm_super *mpb = super->anchor; + struct imsm_dev *dev = get_imsm_dev(super, u->array); + struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *migr_map; + struct active_array *a; + struct imsm_disk *disk; + __u8 to_state; + struct dl *dl; + unsigned int found; + int failed; + int victim; + int i; + int second_map_created = 0; + + for (; u; u = u->next) { + victim = get_imsm_disk_idx(dev, u->slot, -1); + + if (victim < 0) + return 0; + + for (dl = super->disks; dl; dl = dl->next) + if (dl == u->dl) + break; + + if (!dl) { + fprintf(stderr, "error: imsm_activate_spare passed " + "an unknown disk (index: %d)\n", + u->dl->index); + return 0; + } + + /* count failures (excluding rebuilds and the victim) + * to determine map[0] state + */ + failed = 0; + for (i = 0; i < map->num_members; i++) { + if (i == u->slot) + continue; + disk = get_imsm_disk(super, + get_imsm_disk_idx(dev, i, -1)); + if (!disk || is_failed(disk)) + failed++; + } + + /* adding a pristine spare, assign a new index */ + if (dl->index < 0) { + dl->index = super->anchor->num_disks; + super->anchor->num_disks++; + } + disk = &dl->disk; + disk->status |= CONFIGURED_DISK; + disk->status &= ~SPARE_DISK; + + /* mark rebuild */ + to_state = imsm_check_degraded(super, dev, failed); + if (!second_map_created) { + second_map_created = 1; + map->map_state = IMSM_T_STATE_DEGRADED; + migrate(dev, super, to_state, MIGR_REBUILD); + } else + map->map_state = to_state; + migr_map = get_imsm_map(dev, 1); + set_imsm_ord_tbl_ent(map, u->slot, dl->index); + set_imsm_ord_tbl_ent(migr_map, u->slot, + dl->index | IMSM_ORD_REBUILD); + + /* update the family_num to mark a new container + * generation, being careful to record the existing + * family_num in orig_family_num to clean up after + * earlier mdadm versions that neglected to set it. + */ + if (mpb->orig_family_num == 0) + mpb->orig_family_num = mpb->family_num; + mpb->family_num += super->random; + + /* count arrays using the victim in the metadata */ + found = 0; + for (a = active_array; a ; a = a->next) { + dev = get_imsm_dev(super, a->info.container_member); + map = get_imsm_map(dev, 0); + + if (get_imsm_disk_slot(map, victim) >= 0) + found++; + } + + /* delete the victim if it is no longer being + * utilized anywhere + */ + if (!found) { + struct dl **dlp; + + /* We know that 'manager' isn't touching anything, + * so it is safe to delete + */ + for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next) + if ((*dlp)->index == victim) + break; + + /* victim may be on the missing list */ + if (!*dlp) + for (dlp = &super->missing; *dlp; + dlp = &(*dlp)->next) + if ((*dlp)->index == victim) + break; + imsm_delete(super, dlp, victim); + } + } + + return 1; +} static int apply_reshape_container_disks_update(struct imsm_update_reshape *u, struct intel_super *super, @@ -6649,7 +7178,7 @@ static int apply_reshape_container_disks_update(struct imsm_update_reshape *u, devices_to_reshape--; newdev->vol.migr_state = 1; newdev->vol.curr_migr_unit = 0; - newdev->vol.migr_type = MIGR_GEN_MIGR; + set_migr_type(newdev, MIGR_GEN_MIGR); newmap->num_members = u->new_raid_disks; for (i = 0; i < delta_disks; i++) { set_imsm_ord_tbl_ent(newmap, @@ -6719,8 +7248,7 @@ static int apply_takeover_update(struct imsm_update_takeover *u, if (du->index > idx) du->index--; /* mark as spare disk */ - dm->disk.status = SPARE_DISK; - dm->index = -1; + mark_spare(dm); } } /* update map */ @@ -6780,7 +7308,7 @@ static int apply_takeover_update(struct imsm_update_takeover *u, for (du = super->missing; du; du = du->next) if (du->index >= 0) { set_imsm_ord_tbl_ent(map, du->index, du->index); - mark_missing(dev_new, &du->disk, du->index); + mark_missing(dv->dev, &du->disk, du->index); } return 1; @@ -6828,6 +7356,24 @@ static void imsm_process_update(struct supertype *st, mpb = super->anchor; switch (type) { + case update_general_migration_checkpoint: { + struct intel_dev *id; + struct imsm_update_general_migration_checkpoint *u = + (void *)update->buf; + + dprintf("imsm: process_update() " + "for update_general_migration_checkpoint called\n"); + + /* find device under general migration */ + for (id = super->devlist ; id; id = id->next) { + if (is_gen_migration(id->dev)) { + id->dev->vol.curr_migr_unit = + __cpu_to_le32(u->curr_migr_unit); + super->updates_pending++; + } + } + break; + } case update_takeover: { struct imsm_update_takeover *u = (void *)update->buf; if (apply_takeover_update(u, super, &update->space_list)) { @@ -6853,99 +7399,8 @@ static void imsm_process_update(struct supertype *st, } case update_activate_spare: { struct imsm_update_activate_spare *u = (void *) update->buf; - struct imsm_dev *dev = get_imsm_dev(super, u->array); - struct imsm_map *map = get_imsm_map(dev, 0); - struct imsm_map *migr_map; - struct active_array *a; - struct imsm_disk *disk; - __u8 to_state; - struct dl *dl; - unsigned int found; - int failed; - int victim = get_imsm_disk_idx(dev, u->slot, -1); - int i; - - for (dl = super->disks; dl; dl = dl->next) - if (dl == u->dl) - break; - - if (!dl) { - fprintf(stderr, "error: imsm_activate_spare passed " - "an unknown disk (index: %d)\n", - u->dl->index); - return; - } - - super->updates_pending++; - /* count failures (excluding rebuilds and the victim) - * to determine map[0] state - */ - failed = 0; - for (i = 0; i < map->num_members; i++) { - if (i == u->slot) - continue; - disk = get_imsm_disk(super, - get_imsm_disk_idx(dev, i, -1)); - if (!disk || is_failed(disk)) - failed++; - } - - /* adding a pristine spare, assign a new index */ - if (dl->index < 0) { - dl->index = super->anchor->num_disks; - super->anchor->num_disks++; - } - disk = &dl->disk; - disk->status |= CONFIGURED_DISK; - disk->status &= ~SPARE_DISK; - - /* mark rebuild */ - to_state = imsm_check_degraded(super, dev, failed); - map->map_state = IMSM_T_STATE_DEGRADED; - migrate(dev, super, to_state, MIGR_REBUILD); - migr_map = get_imsm_map(dev, 1); - set_imsm_ord_tbl_ent(map, u->slot, dl->index); - set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD); - - /* update the family_num to mark a new container - * generation, being careful to record the existing - * family_num in orig_family_num to clean up after - * earlier mdadm versions that neglected to set it. - */ - if (mpb->orig_family_num == 0) - mpb->orig_family_num = mpb->family_num; - mpb->family_num += super->random; - - /* count arrays using the victim in the metadata */ - found = 0; - for (a = st->arrays; a ; a = a->next) { - dev = get_imsm_dev(super, a->info.container_member); - map = get_imsm_map(dev, 0); - - if (get_imsm_disk_slot(map, victim) >= 0) - found++; - } - - /* delete the victim if it is no longer being - * utilized anywhere - */ - if (!found) { - struct dl **dlp; - - /* We know that 'manager' isn't touching anything, - * so it is safe to delete - */ - for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next) - if ((*dlp)->index == victim) - break; - - /* victim may be on the missing list */ - if (!*dlp) - for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next) - if ((*dlp)->index == victim) - break; - imsm_delete(super, dlp, victim); - } + if (apply_update_activate_spare(u, super, st->arrays)) + super->updates_pending++; break; } case update_create_array: { @@ -7165,6 +7620,10 @@ static void imsm_prepare_update(struct supertype *st, size_t len = 0; switch (type) { + case update_general_migration_checkpoint: + dprintf("imsm: prepare_update() " + "for update_general_migration_checkpoint called\n"); + break; case update_takeover: { struct imsm_update_takeover *u = (void *)update->buf; if (u->direction == R0_TO_R10) { @@ -7449,7 +7908,7 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind __free_imsm_disk(dl); } } - +#endif /* MDASSEMBLE */ /******************************************************************************* * Function: open_backup_targets * Description: Function opens file descriptors for all devices given in @@ -7489,6 +7948,7 @@ int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds) return 0; } +#ifndef MDASSEMBLE /******************************************************************************* * Function: init_migr_record_imsm * Description: Function inits imsm migration record @@ -7513,10 +7973,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, struct imsm_map *map_dest = get_imsm_map(dev, 0); struct imsm_map *map_src = get_imsm_map(dev, 1); unsigned long long num_migr_units; - - unsigned long long array_blocks = - (((unsigned long long)__le32_to_cpu(dev->size_high)) << 32) + - __le32_to_cpu(dev->size_low); + unsigned long long array_blocks; memset(migr_rec, 0, sizeof(struct migr_record)); migr_rec->family_num = __cpu_to_le32(super->anchor->family_num); @@ -7532,7 +7989,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks); migr_rec->dest_depth_per_unit = __cpu_to_le32(migr_rec->dest_depth_per_unit); - + array_blocks = info->component_size * new_data_disks; num_migr_units = array_blocks / __le32_to_cpu(migr_rec->blocks_per_unit); @@ -7572,9 +8029,9 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, * and to write it to the Copy Area. * Parameters: * st : supertype information + * dev : imsm device that backup is saved for * info : general array info * buf : input buffer - * write_offset : address of data to backup * length : length of data to backup (blocks_per_unit) * Returns: * 0 : success @@ -7584,7 +8041,6 @@ int save_backup_imsm(struct supertype *st, struct imsm_dev *dev, struct mdinfo *info, void *buf, - int new_data, int length) { int rv = -1; @@ -7594,34 +8050,48 @@ int save_backup_imsm(struct supertype *st, int i; struct imsm_map *map_dest = get_imsm_map(dev, 0); int new_disks = map_dest->num_members; + int dest_layout = 0; + int dest_chunk; + unsigned long long start; + int data_disks = imsm_num_data_members(dev, 0); targets = malloc(new_disks * sizeof(int)); if (!targets) goto abort; + for (i = 0; i < new_disks; i++) + targets[i] = -1; + target_offsets = malloc(new_disks * sizeof(unsigned long long)); if (!target_offsets) goto abort; + start = info->reshape_progress * 512; for (i = 0; i < new_disks; i++) { - targets[i] = -1; target_offsets[i] = (unsigned long long) __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512; + /* move back copy area adderss, it will be moved forward + * in restore_stripes() using start input variable + */ + target_offsets[i] -= start/data_disks; } if (open_backup_targets(info, new_disks, targets)) goto abort; + dest_layout = imsm_level_to_layout(map_dest->raid_level); + dest_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512; + if (restore_stripes(targets, /* list of dest devices */ target_offsets, /* migration record offsets */ new_disks, - info->new_chunk, - info->new_level, - info->new_layout, - -1, /* source backup file descriptor */ - 0, /* input buf offset - * always 0 buf is already offset */ - 0, + dest_chunk, + map_dest->raid_level, + dest_layout, + -1, /* source backup file descriptor */ + 0, /* input buf offset + * always 0 buf is already offseted */ + start, length, buf) != 0) { fprintf(stderr, Name ": Error restoring stripes\n"); @@ -7652,23 +8122,40 @@ abort: * Returns: * 0: success * 1: failure + * 2: failure, means no valid migration record + * / no general migration in progress / ******************************************************************************/ int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state) { struct intel_super *super = st->sb; - load_imsm_migr_rec(super, info); - if (__le32_to_cpu(super->migr_rec->blocks_per_unit) == 0) { - dprintf("ERROR: blocks_per_unit = 0!!!\n"); + unsigned long long blocks_per_unit; + unsigned long long curr_migr_unit; + + if (load_imsm_migr_rec(super, info) != 0) { + dprintf("imsm: ERROR: Cannot read migration record " + "for checkpoint save.\n"); return 1; } + blocks_per_unit = __le32_to_cpu(super->migr_rec->blocks_per_unit); + if (blocks_per_unit == 0) { + dprintf("imsm: no migration in progress.\n"); + return 2; + } + curr_migr_unit = info->reshape_progress / blocks_per_unit; + /* check if array is alligned to copy area + * if it is not alligned, add one to current migration unit value + * this can happend on array reshape finish only + */ + if (info->reshape_progress % blocks_per_unit) + curr_migr_unit++; + super->migr_rec->curr_migr_unit = - __cpu_to_le32(info->reshape_progress / - __le32_to_cpu(super->migr_rec->blocks_per_unit)); + __cpu_to_le32(curr_migr_unit); super->migr_rec->rec_status = __cpu_to_le32(state); super->migr_rec->dest_1st_member_lba = - __cpu_to_le32((__le32_to_cpu(super->migr_rec->curr_migr_unit)) - * __le32_to_cpu(super->migr_rec->dest_depth_per_unit)); + __cpu_to_le32(curr_migr_unit * + __le32_to_cpu(super->migr_rec->dest_depth_per_unit)); if (write_imsm_migr_rec(st) < 0) { dprintf("imsm: Cannot write migration record " "outside backup area\n"); @@ -7678,6 +8165,133 @@ int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state) return 0; } +/******************************************************************************* + * Function: recover_backup_imsm + * Description: Function recovers critical data from the Migration Copy Area + * while assembling an array. + * Parameters: + * super : imsm internal array info + * info : general array info + * Returns: + * 0 : success (or there is no data to recover) + * 1 : fail + ******************************************************************************/ +int recover_backup_imsm(struct supertype *st, struct mdinfo *info) +{ + struct intel_super *super = st->sb; + struct migr_record *migr_rec = super->migr_rec; + struct imsm_map *map_dest = NULL; + struct intel_dev *id = NULL; + unsigned long long read_offset; + unsigned long long write_offset; + unsigned unit_len; + int *targets = NULL; + int new_disks, i, err; + char *buf = NULL; + int retval = 1; + unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit); + unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units); + char buffer[20]; + int skipped_disks = 0; + int max_degradation; + + err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20); + if (err < 1) + return 1; + + /* recover data only during assemblation */ + if (strncmp(buffer, "inactive", 8) != 0) + return 0; + /* no data to recover */ + if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL) + return 0; + if (curr_migr_unit >= num_migr_units) + return 1; + + /* find device during reshape */ + for (id = super->devlist; id; id = id->next) + if (is_gen_migration(id->dev)) + break; + if (id == NULL) + return 1; + + map_dest = get_imsm_map(id->dev, 0); + new_disks = map_dest->num_members; + max_degradation = new_disks - imsm_num_data_members(id->dev, 0); + + read_offset = (unsigned long long) + __le32_to_cpu(migr_rec->ckpt_area_pba) * 512; + + write_offset = ((unsigned long long) + __le32_to_cpu(migr_rec->dest_1st_member_lba) + + __le32_to_cpu(map_dest->pba_of_lba0)) * 512; + + unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512; + if (posix_memalign((void **)&buf, 512, unit_len) != 0) + goto abort; + targets = malloc(new_disks * sizeof(int)); + if (!targets) + goto abort; + + open_backup_targets(info, new_disks, targets); + + for (i = 0; i < new_disks; i++) { + if (targets[i] < 0) { + skipped_disks++; + continue; + } + if (lseek64(targets[i], read_offset, SEEK_SET) < 0) { + fprintf(stderr, + Name ": Cannot seek to block: %s\n", + strerror(errno)); + goto abort; + } + if ((unsigned)read(targets[i], buf, unit_len) != unit_len) { + fprintf(stderr, + Name ": Cannot read copy area block: %s\n", + strerror(errno)); + goto abort; + } + if (lseek64(targets[i], write_offset, SEEK_SET) < 0) { + fprintf(stderr, + Name ": Cannot seek to block: %s\n", + strerror(errno)); + goto abort; + } + if ((unsigned)write(targets[i], buf, unit_len) != unit_len) { + fprintf(stderr, + Name ": Cannot restore block: %s\n", + strerror(errno)); + goto abort; + } + } + + if (skipped_disks > max_degradation) { + fprintf(stderr, + Name ": Cannot restore data from backup." + " Too many failed disks\n"); + goto abort; + } + + if (save_checkpoint_imsm(st, info, UNIT_SRC_NORMAL)) { + /* ignore error == 2, this can mean end of reshape here + */ + dprintf("imsm: Cannot write checkpoint to " + "migration record (UNIT_SRC_NORMAL) during restart\n"); + } else + retval = 0; + +abort: + if (targets) { + for (i = 0; i < new_disks; i++) + if (targets[i]) + close(targets[i]); + free(targets); + } + free(buf); + return retval; +} + static char disk_by_path[] = "/dev/disk/by-path/"; static const char *imsm_get_disk_controller_domain(const char *path) @@ -7895,6 +8509,7 @@ static int imsm_create_metadata_update_for_reshape( || delta_disks > spares->array.spare_disks) { fprintf(stderr, Name ": imsm: ERROR: Cannot get spare devices " "for %s.\n", geo->dev_name); + i = -1; goto abort; } @@ -8046,7 +8661,6 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, int chunk; getinfo_super_imsm_volume(st, &info, NULL); - if ((geo->level != info.array.level) && (geo->level >= 0) && (geo->level != UnSet)) { @@ -8054,6 +8668,14 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, case 0: if (geo->level == 5) { change = CH_MIGRATION; + if (geo->layout != ALGORITHM_LEFT_ASYMMETRIC) { + fprintf(stderr, + Name " Error. Requested Layout " + "not supported (left-asymmetric layout " + "is supported only)!\n"); + change = -1; + goto analyse_change_exit; + } check_devs = 1; } if (geo->level == 10) { @@ -8176,30 +8798,6 @@ int imsm_takeover(struct supertype *st, struct geo_params *geo) return 0; } -static int warn_user_about_risk(void) -{ - int rv = 0; - - fprintf(stderr, - "\nThis is an experimental feature. Data on the RAID volume(s) " - "can be lost!!!\n\n" - "To continue command execution please make sure that\n" - "the grow process will not be interrupted. Use safe power\n" - "supply to avoid unexpected system reboot. Make sure that\n" - "reshaped container is not assembled automatically during\n" - "system boot.\n" - "If reshape is interrupted, assemble array manually\n" - "using e.g. '-Ac' option and up to date mdadm.conf file.\n" - "Assembly in scan mode is not possible in such case.\n" - "Growing container with boot array is not possible.\n" - "If boot array reshape is interrupted, whole file system\n" - "can be lost.\n\n"); - rv = ask("Do you want to continue? "); - fprintf(stderr, "\n"); - - return rv; -} - static int imsm_reshape_super(struct supertype *st, long long size, int level, int layout, int chunksize, int raid_disks, int delta_disks, char *backup, char *dev, @@ -8233,13 +8831,6 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level, dprintf("imsm: info: Container operation\n"); int old_raid_disks = 0; - /* this warning will be removed when imsm checkpointing - * will be implemented, and restoring from check-point - * operation will be transparent for reboot process - */ - if (warn_user_about_risk() == 0) - return ret_val; - if (imsm_reshape_is_allowed_on_container( st, &geo, &old_raid_disks)) { struct imsm_update_reshape *u = NULL; @@ -8278,8 +8869,9 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level, dprintf("imsm: info: Volume operation\n"); /* find requested device */ while (dev) { - imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum); - if (devnum == geo.dev_id) + if (imsm_find_array_minor_by_subdev( + dev->index, st->container_dev, &devnum) == 0 + && devnum == geo.dev_id) break; dev = dev->next; } @@ -8330,56 +8922,66 @@ exit_imsm_reshape_super: * reshape process reach new position * Parameters: * sra : general array info - * to_complete : new sync_max position * ndata : number of disks in new array's layout * Returns: * 0 : success, * 1 : there is no reshape in progress, * -1 : fail ******************************************************************************/ -int wait_for_reshape_imsm(struct mdinfo *sra, unsigned long long to_complete, - int ndata) +int wait_for_reshape_imsm(struct mdinfo *sra, int ndata) { int fd = sysfs_get_fd(sra, NULL, "reshape_position"); unsigned long long completed; + /* to_complete : new sync_max position */ + unsigned long long to_complete = sra->reshape_progress; + unsigned long long position_to_set = to_complete / ndata; - struct timeval timeout; - - if (fd < 0) + if (fd < 0) { + dprintf("imsm: wait_for_reshape_imsm() " + "cannot open reshape_position\n"); return 1; + } - sysfs_fd_get_ll(fd, &completed); + if (sysfs_fd_get_ll(fd, &completed) < 0) { + dprintf("imsm: wait_for_reshape_imsm() " + "cannot read reshape_position (no reshape in progres)\n"); + close(fd); + return 0; + } - if (to_complete == 0) {/* reshape till the end of array */ - sysfs_set_str(sra, NULL, "sync_max", "max"); - to_complete = MaxSector; - } else { - if (completed > to_complete) - return -1; - if (sysfs_set_num(sra, NULL, "sync_max", - to_complete / ndata) != 0) { - close(fd); - return -1; - } + if (completed > to_complete) { + dprintf("imsm: wait_for_reshape_imsm() " + "wrong next position to set %llu (%llu)\n", + to_complete, completed); + close(fd); + return -1; + } + dprintf("Position set: %llu\n", position_to_set); + if (sysfs_set_num(sra, NULL, "sync_max", + position_to_set) != 0) { + dprintf("imsm: wait_for_reshape_imsm() " + "cannot set reshape position to %llu\n", + position_to_set); + close(fd); + return -1; } - /* FIXME should not need a timeout at all */ - timeout.tv_sec = 30; - timeout.tv_usec = 0; do { char action[20]; fd_set rfds; FD_ZERO(&rfds); FD_SET(fd, &rfds); - select(fd+1, NULL, NULL, &rfds, &timeout); + select(fd+1, &rfds, NULL, NULL, NULL); + if (sysfs_get_str(sra, NULL, "sync_action", + action, 20) > 0 && + strncmp(action, "reshape", 7) != 0) + break; if (sysfs_fd_get_ll(fd, &completed) < 0) { + dprintf("imsm: wait_for_reshape_imsm() " + "cannot read reshape_position (in loop)\n"); close(fd); return 1; } - if (sysfs_get_str(sra, NULL, "sync_action", - action, 20) > 0 && - strncmp(action, "reshape", 7) != 0) - break; } while (completed < to_complete); close(fd); return 0; @@ -8463,7 +9065,7 @@ static int imsm_manage_reshape( struct intel_super *super = st->sb; struct intel_dev *dv = NULL; struct imsm_dev *dev = NULL; - struct imsm_map *map_src, *map_dest; + struct imsm_map *map_src; int migr_vol_qan = 0; int ndata, odata; /* [bytes] */ int chunk; /* [bytes] */ @@ -8473,13 +9075,13 @@ static int imsm_manage_reshape( unsigned long long max_position; /* array size [bytes] */ unsigned long long next_step; /* [blocks]/[bytes] */ unsigned long long old_data_stripe_length; - unsigned long long new_data_stripe_length; unsigned long long start_src; /* [bytes] */ unsigned long long start; /* [bytes] */ unsigned long long start_buf_shift; /* [bytes] */ int degraded = 0; + int source_layout = 0; - if (!fds || !offsets || !destfd || !destoffsets || !sra) + if (!fds || !offsets || !sra) goto abort; /* Find volume during the reshape */ @@ -8501,24 +9103,25 @@ static int imsm_manage_reshape( map_src = get_imsm_map(dev, 1); if (map_src == NULL) goto abort; - map_dest = get_imsm_map(dev, 0); ndata = imsm_num_data_members(dev, 0); odata = imsm_num_data_members(dev, 1); - chunk = map_src->blocks_per_strip * 512; + chunk = __le16_to_cpu(map_src->blocks_per_strip) * 512; old_data_stripe_length = odata * chunk; migr_rec = super->migr_rec; - /* [bytes] */ - sra->new_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512; - sra->new_level = map_dest->raid_level; - new_data_stripe_length = sra->new_chunk * ndata; - /* initialize migration record for start condition */ if (sra->reshape_progress == 0) init_migr_record_imsm(st, dev, sra); + else { + if (__le32_to_cpu(migr_rec->rec_status) != UNIT_SRC_NORMAL) { + dprintf("imsm: cannot restart migration when data " + "are present in copy area.\n"); + goto abort; + } + } /* size for data */ buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512; @@ -8531,10 +9134,8 @@ static int imsm_manage_reshape( goto abort; } - max_position = - __le32_to_cpu(migr_rec->post_migr_vol_cap) + - ((unsigned long long)__le32_to_cpu( - migr_rec->post_migr_vol_cap_hi) << 32); + max_position = sra->component_size * ndata; + source_layout = imsm_level_to_layout(map_src->raid_level); while (__le32_to_cpu(migr_rec->curr_migr_unit) < __le32_to_cpu(migr_rec->num_migr_units)) { @@ -8558,8 +9159,7 @@ static int imsm_manage_reshape( if ((current_position + next_step) > max_position) next_step = max_position - current_position; - start = (map_src->pba_of_lba0 + dev->reserved_blocks + - current_position) * 512; + start = current_position * 512; /* allign reading start to old geometry */ start_buf_shift = start % old_data_stripe_length; @@ -8592,8 +9192,8 @@ static int imsm_manage_reshape( start_buf_shift, next_step_filler); if (save_stripes(fds, offsets, map_src->num_members, - chunk, sra->array.level, - sra->array.layout, 0, NULL, start_src, + chunk, map_src->raid_level, + source_layout, 0, NULL, start_src, copy_length + next_step_filler + start_buf_shift, buf)) { @@ -8605,8 +9205,7 @@ static int imsm_manage_reshape( * in backup general migration area */ if (save_backup_imsm(st, dev, sra, - buf + start_buf_shift, - ndata, copy_length)) { + buf + start_buf_shift, copy_length)) { dprintf("imsm: Cannot save stripes to " "target devices\n"); goto abort; @@ -8617,28 +9216,32 @@ static int imsm_manage_reshape( "migration record (UNIT_SRC_IN_CP_AREA)\n"); goto abort; } - /* decrease backup_blocks */ - if (backup_blocks > (unsigned long)next_step) - backup_blocks -= next_step; - else - backup_blocks = 0; + } else { + /* set next step to use whole border area */ + border /= next_step; + if (border > 1) + next_step *= border; } /* When data backed up, checkpoint stored, * kick the kernel to reshape unit of data */ next_step = next_step + sra->reshape_progress; + /* limit next step to array max position */ + if (next_step > max_position) + next_step = max_position; sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress); sysfs_set_num(sra, NULL, "suspend_hi", next_step); + sra->reshape_progress = next_step; /* wait until reshape finish */ - if (wait_for_reshape_imsm(sra, next_step, ndata) < 0) { + if (wait_for_reshape_imsm(sra, ndata) < 0) { dprintf("wait_for_reshape_imsm returned error!\n"); goto abort; } - sra->reshape_progress = next_step; - - if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL)) { + if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) { + /* ignore error == 2, this can mean end of reshape here + */ dprintf("imsm: Cannot write checkpoint to " "migration record (UNIT_SRC_NORMAL)\n"); goto abort; @@ -8676,6 +9279,7 @@ struct superswitch super_imsm = { .get_disk_controller_domain = imsm_get_disk_controller_domain, .reshape_super = imsm_reshape_super, .manage_reshape = imsm_manage_reshape, + .recover_backup = recover_backup_imsm, #endif .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm, @@ -8695,6 +9299,7 @@ struct superswitch super_imsm = { .match_metadata_desc = match_metadata_desc_imsm, .container_content = container_content_imsm, + .external = 1, .name = "imsm",