X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=super-intel.c;h=a78d7238a755aefaa88353db44719736e8bed163;hb=0f82fe603a42f37f1e2a6f826b4164811bf2d188;hp=5f5f0d99263e7eb864c4450b1fb465781a89f934;hpb=51d9a2ce33a6a60a75751a46bb93bf052a2dfc7a;p=thirdparty%2Fmdadm.git diff --git a/super-intel.c b/super-intel.c index 5f5f0d99..a78d7238 100644 --- a/super-intel.c +++ b/super-intel.c @@ -41,17 +41,52 @@ #define MAX_SIGNATURE_LENGTH 32 #define MAX_RAID_SERIAL_LEN 16 -#define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000) -#define MPB_ATTRIB_PM __cpu_to_le32(0x40000000) -#define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000) -#define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001) -#define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002) -#define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004) -#define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008) -#define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010) -#define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020) - -#define MPB_SECTOR_CNT 418 +/* supports RAID0 */ +#define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001) +/* supports RAID1 */ +#define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002) +/* supports RAID10 */ +#define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004) +/* supports RAID1E */ +#define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008) +/* supports RAID5 */ +#define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010) +/* supports RAID CNG */ +#define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020) +/* supports expanded stripe sizes of 256K, 512K and 1MB */ +#define MPB_ATTRIB_EXP_STRIPE_SIZE __cpu_to_le32(0x00000040) + +/* The OROM Support RST Caching of Volumes */ +#define MPB_ATTRIB_NVM __cpu_to_le32(0x02000000) +/* The OROM supports creating disks greater than 2TB */ +#define MPB_ATTRIB_2TB_DISK __cpu_to_le32(0x04000000) +/* The OROM supports Bad Block Management */ +#define MPB_ATTRIB_BBM __cpu_to_le32(0x08000000) + +/* THe OROM Supports NVM Caching of Volumes */ +#define MPB_ATTRIB_NEVER_USE2 __cpu_to_le32(0x10000000) +/* The OROM supports creating volumes greater than 2TB */ +#define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000) +/* originally for PMP, now it's wasted b/c. Never use this bit! */ +#define MPB_ATTRIB_NEVER_USE __cpu_to_le32(0x40000000) +/* Verify MPB contents against checksum after reading MPB */ +#define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000) + +/* Define all supported attributes that have to be accepted by mdadm + */ +#define MPB_ATTRIB_SUPPORTED (MPB_ATTRIB_CHECKSUM_VERIFY | \ + MPB_ATTRIB_2TB | \ + MPB_ATTRIB_2TB_DISK | \ + MPB_ATTRIB_RAID0 | \ + MPB_ATTRIB_RAID1 | \ + MPB_ATTRIB_RAID10 | \ + MPB_ATTRIB_RAID5 | \ + MPB_ATTRIB_EXP_STRIPE_SIZE) + +/* Define attributes that are unused but not harmful */ +#define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE) + +#define MPB_SECTOR_CNT 2210 #define IMSM_RESERVED_SECTORS 4096 #define SECT_PER_MB_SHIFT 11 @@ -194,6 +229,41 @@ struct bbm_log { static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" }; #endif +#define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209 + +#define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */ + +#define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must + * be recovered using srcMap */ +#define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has + * already been migrated and must + * be recovered from checkpoint area */ +struct migr_record { + __u32 rec_status; /* Status used to determine how to restart + * migration in case it aborts + * in some fashion */ + __u32 curr_migr_unit; /* 0..numMigrUnits-1 */ + __u32 family_num; /* Family number of MPB + * containing the RaidDev + * that is migrating */ + __u32 ascending_migr; /* True if migrating in increasing + * order of lbas */ + __u32 blocks_per_unit; /* Num disk blocks per unit of operation */ + __u32 dest_depth_per_unit; /* Num member blocks each destMap + * member disk + * advances per unit-of-operation */ + __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */ + __u32 dest_1st_member_lba; /* First member lba on first + * stripe of destination */ + __u32 num_migr_units; /* Total num migration units-of-op */ + __u32 post_migr_vol_cap; /* Size of volume after + * migration completes */ + __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */ + __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the + * migration ckpt record was read from + * (for recovered migrations) */ +} __attribute__ ((__packed__)); + static __u8 migr_type(struct imsm_dev *dev) { if (dev->vol.migr_type == MIGR_VERIFY && @@ -250,6 +320,10 @@ struct intel_super { void *buf; /* O_DIRECT buffer for reading/writing metadata */ struct imsm_super *anchor; /* immovable parameters */ }; + union { + void *migr_rec_buf; /* buffer for I/O operations */ + struct migr_record *migr_rec; /* migration record */ + }; size_t len; /* size of the 'buf' allocation */ void *next_buf; /* for realloc'ing buf from the manager */ size_t next_len; @@ -270,7 +344,7 @@ struct intel_super { struct extent *e; /* for determining freespace @ create */ int raiddisk; /* slot to fill in autolayout */ enum action action; - } *disks; + } *disks, *current_disk; struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon active */ struct dl *missing; /* disks removed while we weren't looking */ @@ -305,7 +379,9 @@ enum imsm_update_type { update_rename_array, update_add_remove_disk, update_reshape_container_disks, - update_takeover + update_reshape_migration, + update_takeover, + update_general_migration_checkpoint, }; struct imsm_update_activate_spare { @@ -340,9 +416,29 @@ struct imsm_update_reshape { enum imsm_update_type type; int old_raid_disks; int new_raid_disks; + + int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */ +}; + +struct imsm_update_reshape_migration { + enum imsm_update_type type; + int old_raid_disks; + int new_raid_disks; + /* fields for array migration changes + */ + int subdev; + int new_level; + int new_layout; + int new_chunksize; + int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */ }; +struct imsm_update_general_migration_checkpoint { + enum imsm_update_type type; + __u32 curr_migr_unit; +}; + struct disk_info { __u8 serial[MAX_RAID_SERIAL_LEN]; }; @@ -383,7 +479,6 @@ const char *get_sys_dev_type(enum sys_dev_type type) return _sys_dev_type[type]; } -#ifndef MDASSEMBLE static struct intel_hba * alloc_intel_hba(struct sys_dev *device) { struct intel_hba *result = malloc(sizeof(*result)); @@ -407,7 +502,6 @@ static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev * return result; } - static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device) { struct intel_hba *hba; @@ -473,7 +567,6 @@ static struct sys_dev* find_disk_attached_hba(int fd, const char *devname) return NULL; } -#endif /* MDASSEMBLE */ static int find_intel_hba_capability(int fd, struct intel_super *super, @@ -505,7 +598,7 @@ static __u8 *get_imsm_version(struct imsm_super *mpb) { return &mpb->sig[MPB_SIG_LEN]; } -#endif +#endif /* retrieve a disk directly from the anchor when the anchor is known to be * up-to-date, currently only at load time @@ -740,7 +833,16 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) struct extent *rv, *e; int i; int memberships = count_memberships(dl, super); - __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + __u32 reservation; + + /* trim the reserved area for spares, so they can join any array + * regardless of whether the OROM has assigned sectors from the + * IMSM_RESERVED_SECTORS region + */ + if (dl->index == -1) + reservation = MPB_SECTOR_CNT; + else + reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; rv = malloc(sizeof(struct extent) * (memberships + 1)); if (!rv) @@ -863,9 +965,13 @@ static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st) } #ifndef MDASSEMBLE -static __u64 blocks_per_migr_unit(struct imsm_dev *dev); +static __u64 blocks_per_migr_unit(struct intel_super *super, + struct imsm_dev *dev); -static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) +static void print_imsm_dev(struct intel_super *super, + struct imsm_dev *dev, + char *uuid, + int disk_idx) { __u64 sz; int slot, i; @@ -956,24 +1062,26 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) printf(" <-- %s", map_state_str[map->map_state]); printf("\n Checkpoint : %u (%llu)", __le32_to_cpu(dev->vol.curr_migr_unit), - (unsigned long long)blocks_per_migr_unit(dev)); + (unsigned long long)blocks_per_migr_unit(super, dev)); } printf("\n"); printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean"); } -static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved) +static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved) { - struct imsm_disk *disk = __get_imsm_disk(mpb, index); char str[MAX_RAID_SERIAL_LEN + 1]; __u64 sz; - if (index < 0 || !disk) + if (index < -1 || !disk) return; printf("\n"); snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); - printf(" Disk%02d Serial : %s\n", index, str); + if (index >= 0) + printf(" Disk%02d Serial : %s\n", index, str); + else + printf(" Disk Serial : %s\n", str); printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", is_configured(disk) ? " active" : "", is_failed(disk) ? " failed" : ""); @@ -983,6 +1091,145 @@ static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved) human_size(sz * 512)); } +static int is_gen_migration(struct imsm_dev *dev); + +void examine_migr_rec_imsm(struct intel_super *super) +{ + struct migr_record *migr_rec = super->migr_rec; + struct imsm_super *mpb = super->anchor; + int i; + + for (i = 0; i < mpb->num_raid_devs; i++) { + struct imsm_dev *dev = __get_imsm_dev(mpb, i); + if (is_gen_migration(dev) == 0) + continue; + + printf("\nMigration Record Information:"); + if (super->disks->index > 1) { + printf(" Empty\n "); + printf("Examine one of first two disks in array\n"); + break; + } + printf("\n Status : "); + if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL) + printf("Normal\n"); + else + printf("Contains Data\n"); + printf(" Current Unit : %u\n", + __le32_to_cpu(migr_rec->curr_migr_unit)); + printf(" Family : %u\n", + __le32_to_cpu(migr_rec->family_num)); + printf(" Ascending : %u\n", + __le32_to_cpu(migr_rec->ascending_migr)); + printf(" Blocks Per Unit : %u\n", + __le32_to_cpu(migr_rec->blocks_per_unit)); + printf(" Dest. Depth Per Unit : %u\n", + __le32_to_cpu(migr_rec->dest_depth_per_unit)); + printf(" Checkpoint Area pba : %u\n", + __le32_to_cpu(migr_rec->ckpt_area_pba)); + printf(" First member lba : %u\n", + __le32_to_cpu(migr_rec->dest_1st_member_lba)); + printf(" Total Number of Units : %u\n", + __le32_to_cpu(migr_rec->num_migr_units)); + printf(" Size of volume : %u\n", + __le32_to_cpu(migr_rec->post_migr_vol_cap)); + printf(" Expansion space for LBA64 : %u\n", + __le32_to_cpu(migr_rec->post_migr_vol_cap_hi)); + printf(" Record was read from : %u\n", + __le32_to_cpu(migr_rec->ckpt_read_disk_num)); + + break; + } +} +#endif /* MDASSEMBLE */ +/******************************************************************************* + * function: imsm_check_attributes + * Description: Function checks if features represented by attributes flags + * are supported by mdadm. + * Parameters: + * attributes - Attributes read from metadata + * Returns: + * 0 - passed attributes contains unsupported features flags + * 1 - all features are supported + ******************************************************************************/ +static int imsm_check_attributes(__u32 attributes) +{ + int ret_val = 1; + __u32 not_supported = MPB_ATTRIB_SUPPORTED^0xffffffff; + + not_supported &= ~MPB_ATTRIB_IGNORED; + + not_supported &= attributes; + if (not_supported) { + fprintf(stderr, Name "(IMSM): Unsupported attributes : %x\n", + (unsigned)__le32_to_cpu(not_supported)); + if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) { + dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY \n"); + not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY; + } + if (not_supported & MPB_ATTRIB_2TB) { + dprintf("\t\tMPB_ATTRIB_2TB\n"); + not_supported ^= MPB_ATTRIB_2TB; + } + if (not_supported & MPB_ATTRIB_RAID0) { + dprintf("\t\tMPB_ATTRIB_RAID0\n"); + not_supported ^= MPB_ATTRIB_RAID0; + } + if (not_supported & MPB_ATTRIB_RAID1) { + dprintf("\t\tMPB_ATTRIB_RAID1\n"); + not_supported ^= MPB_ATTRIB_RAID1; + } + if (not_supported & MPB_ATTRIB_RAID10) { + dprintf("\t\tMPB_ATTRIB_RAID10\n"); + not_supported ^= MPB_ATTRIB_RAID10; + } + if (not_supported & MPB_ATTRIB_RAID1E) { + dprintf("\t\tMPB_ATTRIB_RAID1E\n"); + not_supported ^= MPB_ATTRIB_RAID1E; + } + if (not_supported & MPB_ATTRIB_RAID5) { + dprintf("\t\tMPB_ATTRIB_RAID5\n"); + not_supported ^= MPB_ATTRIB_RAID5; + } + if (not_supported & MPB_ATTRIB_RAIDCNG) { + dprintf("\t\tMPB_ATTRIB_RAIDCNG\n"); + not_supported ^= MPB_ATTRIB_RAIDCNG; + } + if (not_supported & MPB_ATTRIB_BBM) { + dprintf("\t\tMPB_ATTRIB_BBM\n"); + not_supported ^= MPB_ATTRIB_BBM; + } + if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) { + dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY (== MPB_ATTRIB_LEGACY)\n"); + not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY; + } + if (not_supported & MPB_ATTRIB_EXP_STRIPE_SIZE) { + dprintf("\t\tMPB_ATTRIB_EXP_STRIP_SIZE\n"); + not_supported ^= MPB_ATTRIB_EXP_STRIPE_SIZE; + } + if (not_supported & MPB_ATTRIB_2TB_DISK) { + dprintf("\t\tMPB_ATTRIB_2TB_DISK\n"); + not_supported ^= MPB_ATTRIB_2TB_DISK; + } + if (not_supported & MPB_ATTRIB_NEVER_USE2) { + dprintf("\t\tMPB_ATTRIB_NEVER_USE2\n"); + not_supported ^= MPB_ATTRIB_NEVER_USE2; + } + if (not_supported & MPB_ATTRIB_NEVER_USE) { + dprintf("\t\tMPB_ATTRIB_NEVER_USE\n"); + not_supported ^= MPB_ATTRIB_NEVER_USE; + } + + if (not_supported) + dprintf(Name "(IMSM): Unknown attributes : %x\n", not_supported); + + ret_val = 0; + } + + return ret_val; +} + +#ifndef MDASSEMBLE static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map); static void examine_super_imsm(struct supertype *st, char *homehost) @@ -1004,6 +1251,11 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num)); printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num)); printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num)); + printf(" Attributes : "); + if (imsm_check_attributes(mpb->attributes)) + printf("All supported\n"); + else + printf("not supported\n"); getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); printf(" UUID : %s\n", nbuf + 5); @@ -1013,7 +1265,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" MPB Sectors : %d\n", mpb_sectors(mpb)); printf(" Disks : %d\n", mpb->num_disks); printf(" RAID Devices : %d\n", mpb->num_raid_devs); - print_imsm_disk(mpb, super->disks->index, reserved); + print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), super->disks->index, reserved); if (super->bbm_log) { struct bbm_log *log = super->bbm_log; @@ -1033,33 +1285,19 @@ static void examine_super_imsm(struct supertype *st, char *homehost) super->current_vol = i; getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); - print_imsm_dev(dev, nbuf + 5, super->disks->index); + print_imsm_dev(super, dev, nbuf + 5, super->disks->index); } for (i = 0; i < mpb->num_disks; i++) { if (i == super->disks->index) continue; - print_imsm_disk(mpb, i, reserved); + print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved); } - for (dl = super->disks ; dl; dl = dl->next) { - struct imsm_disk *disk; - char str[MAX_RAID_SERIAL_LEN + 1]; - __u64 sz; - if (dl->index >= 0) - continue; + for (dl = super->disks; dl; dl = dl->next) + if (dl->index == -1) + print_imsm_disk(&dl->disk, -1, reserved); - disk = &dl->disk; - printf("\n"); - snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); - printf(" Disk Serial : %s\n", str); - printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", - is_configured(disk) ? " active" : "", - is_failed(disk) ? " failed" : ""); - printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id)); - sz = __le32_to_cpu(disk->total_blocks) - reserved; - printf(" Usable Size : %llu%s\n", (unsigned long long)sz, - human_size(sz * 512)); - } + examine_migr_rec_imsm(super); } static void brief_examine_super_imsm(struct supertype *st, int verbose) @@ -1281,9 +1519,9 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b fd2devname(fd, buf); printf(" Port%d : %s", port, buf); if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0) - printf(" (%s)\n", buf); + printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf); else - printf("()\n"); + printf(" ()\n"); } close(fd); free(path); @@ -1304,8 +1542,6 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b return err; } - - static void print_found_intel_controllers(struct sys_dev *elem) { for (; elem; elem = elem->next) { @@ -1671,7 +1907,8 @@ static __u32 map_migr_block(struct imsm_dev *dev, __u32 block) } } -static __u64 blocks_per_migr_unit(struct imsm_dev *dev) +static __u64 blocks_per_migr_unit(struct intel_super *super, + struct imsm_dev *dev) { /* calculate the conversion factor between per member 'blocks' * (md/{resync,rebuild}_start) and imsm migration units, return @@ -1681,7 +1918,10 @@ static __u64 blocks_per_migr_unit(struct imsm_dev *dev) return 0; switch (migr_type(dev)) { - case MIGR_GEN_MIGR: + case MIGR_GEN_MIGR: { + struct migr_record *migr_rec = super->migr_rec; + return __le32_to_cpu(migr_rec->blocks_per_unit); + } case MIGR_VERIFY: case MIGR_REPAIR: case MIGR_INIT: { @@ -1703,7 +1943,7 @@ static __u64 blocks_per_migr_unit(struct imsm_dev *dev) migr_chunk = migr_strip_blocks_resync(dev); disks = imsm_num_data_members(dev, 0); blocks_per_unit = stripes_per_unit * migr_chunk * disks; - stripe = __le32_to_cpu(map->blocks_per_strip) * disks; + stripe = __le16_to_cpu(map->blocks_per_strip) * disks; segment = blocks_per_unit / stripe; block_rel = blocks_per_unit - segment * stripe; parity_depth = parity_segment_depth(dev); @@ -1739,25 +1979,233 @@ static int imsm_level_to_layout(int level) return UnSet; } +/******************************************************************************* + * Function: read_imsm_migr_rec + * Description: Function reads imsm migration record from last sector of disk + * Parameters: + * fd : disk descriptor + * super : metadata info + * Returns: + * 0 : success, + * -1 : fail + ******************************************************************************/ +static int read_imsm_migr_rec(int fd, struct intel_super *super) +{ + int ret_val = -1; + unsigned long long dsize; + + get_dev_size(fd, NULL, &dsize); + if (lseek64(fd, dsize - 512, SEEK_SET) < 0) { + fprintf(stderr, + Name ": Cannot seek to anchor block: %s\n", + strerror(errno)); + goto out; + } + if (read(fd, super->migr_rec_buf, 512) != 512) { + fprintf(stderr, + Name ": Cannot read migr record block: %s\n", + strerror(errno)); + goto out; + } + ret_val = 0; + +out: + return ret_val; +} + +/******************************************************************************* + * Function: load_imsm_migr_rec + * Description: Function reads imsm migration record (it is stored at the last + * sector of disk) + * Parameters: + * super : imsm internal array info + * info : general array info + * Returns: + * 0 : success + * -1 : fail + ******************************************************************************/ +static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info) +{ + struct mdinfo *sd; + struct dl *dl = NULL; + char nm[30]; + int retval = -1; + int fd = -1; + + if (info) { + for (sd = info->devs ; sd ; sd = sd->next) { + /* read only from one of the first two slots */ + if ((sd->disk.raid_disk > 1) || + (sd->disk.raid_disk < 0)) + continue; + sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); + fd = dev_open(nm, O_RDONLY); + if (fd >= 0) + break; + } + } + if (fd < 0) { + for (dl = super->disks; dl; dl = dl->next) { + /* read only from one of the first two slots */ + if (dl->index > 1) + continue; + sprintf(nm, "%d:%d", dl->major, dl->minor); + fd = dev_open(nm, O_RDONLY); + if (fd >= 0) + break; + } + } + if (fd < 0) + goto out; + retval = read_imsm_migr_rec(fd, super); + +out: + if (fd >= 0) + close(fd); + return retval; +} + +#ifndef MDASSEMBLE +/******************************************************************************* + * function: imsm_create_metadata_checkpoint_update + * Description: It creates update for checkpoint change. + * Parameters: + * super : imsm internal array info + * u : pointer to prepared update + * Returns: + * Uptate length. + * If length is equal to 0, input pointer u contains no update + ******************************************************************************/ +static int imsm_create_metadata_checkpoint_update( + struct intel_super *super, + struct imsm_update_general_migration_checkpoint **u) +{ + + int update_memory_size = 0; + + dprintf("imsm_create_metadata_checkpoint_update(enter)\n"); + + if (u == NULL) + return 0; + *u = NULL; + + /* size of all update data without anchor */ + update_memory_size = + sizeof(struct imsm_update_general_migration_checkpoint); + + *u = calloc(1, update_memory_size); + if (*u == NULL) { + dprintf("error: cannot get memory for " + "imsm_create_metadata_checkpoint_update update\n"); + return 0; + } + (*u)->type = update_general_migration_checkpoint; + (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit); + dprintf("imsm_create_metadata_checkpoint_update: prepared for %u\n", + (*u)->curr_migr_unit); + + return update_memory_size; +} + + +static void imsm_update_metadata_locally(struct supertype *st, + void *buf, int len); + +/******************************************************************************* + * Function: write_imsm_migr_rec + * Description: Function writes imsm migration record + * (at the last sector of disk) + * Parameters: + * super : imsm internal array info + * Returns: + * 0 : success + * -1 : if fail + ******************************************************************************/ +static int write_imsm_migr_rec(struct supertype *st) +{ + struct intel_super *super = st->sb; + unsigned long long dsize; + char nm[30]; + int fd = -1; + int retval = -1; + struct dl *sd; + int len; + struct imsm_update_general_migration_checkpoint *u; + + for (sd = super->disks ; sd ; sd = sd->next) { + /* write to 2 first slots only */ + if ((sd->index < 0) || (sd->index > 1)) + continue; + sprintf(nm, "%d:%d", sd->major, sd->minor); + fd = dev_open(nm, O_RDWR); + if (fd < 0) + continue; + get_dev_size(fd, NULL, &dsize); + if (lseek64(fd, dsize - 512, SEEK_SET) < 0) { + fprintf(stderr, + Name ": Cannot seek to anchor block: %s\n", + strerror(errno)); + goto out; + } + if (write(fd, super->migr_rec_buf, 512) != 512) { + fprintf(stderr, + Name ": Cannot write migr record block: %s\n", + strerror(errno)); + goto out; + } + close(fd); + fd = -1; + } + /* update checkpoint information in metadata */ + len = imsm_create_metadata_checkpoint_update(super, &u); + + if (len <= 0) { + dprintf("imsm: Cannot prepare update\n"); + goto out; + } + /* update metadata locally */ + imsm_update_metadata_locally(st, u, len); + /* and possibly remotely */ + if (st->update_tail) { + append_metadata_update(st, u, len); + /* during reshape we do all work inside metadata handler + * manage_reshape(), so metadata update has to be triggered + * insida it + */ + flush_metadata_updates(st); + st->update_tail = &st->updates; + } else + free(u); + + retval = 0; + out: + if (fd >= 0) + close(fd); + return retval; +} +#endif /* MDASSEMBLE */ + static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap) { struct intel_super *super = st->sb; + struct migr_record *migr_rec = super->migr_rec; struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); struct imsm_map *map = get_imsm_map(dev, 0); struct imsm_map *prev_map = get_imsm_map(dev, 1); struct imsm_map *map_to_analyse = map; struct dl *dl; char *devname; + unsigned int component_size_alligment; int map_disks = info->array.raid_disks; + memset(info, 0, sizeof(*info)); if (prev_map) map_to_analyse = prev_map; - for (dl = super->disks; dl; dl = dl->next) - if (dl->raiddisk == info->disk.raid_disk) - break; + dl = super->current_disk; + info->container_member = super->current_vol; - info->array.raid_disks = map_to_analyse->num_members; + info->array.raid_disks = map->num_members; info->array.level = get_imsm_raid_level(map_to_analyse); info->array.layout = imsm_level_to_layout(info->array.level); info->array.md_minor = -1; @@ -1801,7 +2249,6 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, /* conversion is happening as RAID5 */ info->array.level = 5; info->array.layout = ALGORITHM_PARITY_N; - info->array.raid_disks += 1; info->delta_disks -= 1; break; default: @@ -1817,16 +2264,33 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, info->new_chunk = info->array.chunk_size; info->delta_disks = 0; } - info->disk.major = 0; - info->disk.minor = 0; + if (dl) { info->disk.major = dl->major; info->disk.minor = dl->minor; + info->disk.number = dl->index; + info->disk.raid_disk = get_imsm_disk_slot(map_to_analyse, + dl->index); } info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0); info->component_size = __le32_to_cpu(map_to_analyse->blocks_per_member); + + /* check component size aligment + */ + component_size_alligment = + info->component_size % (info->array.chunk_size/512); + + if (component_size_alligment && + (info->array.level != 1) && (info->array.level != UnSet)) { + dprintf("imsm: reported component size alligned from %llu ", + info->component_size); + info->component_size -= component_size_alligment; + dprintf("to %llu (%i).\n", + info->component_size, component_size_alligment); + } + memset(info->uuid, 0, sizeof(info->uuid)); info->recovery_start = MaxSector; @@ -1840,28 +2304,34 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, switch (migr_type(dev)) { case MIGR_REPAIR: case MIGR_INIT: { - __u64 blocks_per_unit = blocks_per_migr_unit(dev); + __u64 blocks_per_unit = blocks_per_migr_unit(super, + dev); __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit); info->resync_start = blocks_per_unit * units; break; } case MIGR_GEN_MIGR: { - __u64 blocks_per_unit = blocks_per_migr_unit(dev); - __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit); + __u64 blocks_per_unit = blocks_per_migr_unit(super, + dev); + __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit); unsigned long long array_blocks; int used_disks; + if (__le32_to_cpu(migr_rec->ascending_migr) && + (units < + (__le32_to_cpu(migr_rec->num_migr_units)-1)) && + (super->migr_rec->rec_status == + __cpu_to_le32(UNIT_SRC_IN_CP_AREA))) + units++; + info->reshape_progress = blocks_per_unit * units; - /* checkpoint is written per disks unit - * recalculate it to reshape position - */ - used_disks = imsm_num_data_members(dev, 0); - info->reshape_progress *= used_disks; dprintf("IMSM: General Migration checkpoint : %llu " "(%llu) -> read reshape progress : %llu\n", - units, blocks_per_unit, info->reshape_progress); + (unsigned long long)units, + (unsigned long long)blocks_per_unit, + info->reshape_progress); used_disks = imsm_num_data_members(dev, 1); if (used_disks > 0) { @@ -1943,6 +2413,7 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * getinfo_super_imsm_volume(st, info, map); return; } + memset(info, 0, sizeof(*info)); /* Set raid_disks to zero so that Assemble will always pull in valid * spares @@ -2404,7 +2875,6 @@ static void serialcpy(__u8 *dest, __u8 *src) strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN); } -#ifndef MDASSEMBLE static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super) { struct dl *dl; @@ -2415,7 +2885,6 @@ static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super) return dl; } -#endif static struct imsm_disk * __serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx) @@ -2505,8 +2974,11 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) * map1state=normal) * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal * map1state=degraded) + * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal + * map1state=normal) */ -static void migrate(struct imsm_dev *dev, __u8 to_state, int migr_type) +static void migrate(struct imsm_dev *dev, struct intel_super *super, + __u8 to_state, int migr_type) { struct imsm_map *dest; struct imsm_map *src = get_imsm_map(dev, 0); @@ -2529,6 +3001,10 @@ static void migrate(struct imsm_dev *dev, __u8 to_state, int migr_type) } } + if (migr_type == MIGR_GEN_MIGR) + /* Clear migration record */ + memset(super->migr_rec, 0, sizeof(struct migr_record)); + src->map_state = to_state; } @@ -2631,6 +3107,44 @@ struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb) return ptr; } +/******************************************************************************* + * Function: check_mpb_migr_compatibility + * Description: Function checks for unsupported migration features: + * - migration optimization area (pba_of_lba0) + * - descending reshape (ascending_migr) + * Parameters: + * super : imsm metadata information + * Returns: + * 0 : migration is compatible + * -1 : migration is not compatible + ******************************************************************************/ +int check_mpb_migr_compatibility(struct intel_super *super) +{ + struct imsm_map *map0, *map1; + struct migr_record *migr_rec = super->migr_rec; + int i; + + for (i = 0; i < super->anchor->num_raid_devs; i++) { + struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i); + + if (dev_iter && + dev_iter->vol.migr_state == 1 && + dev_iter->vol.migr_type == MIGR_GEN_MIGR) { + /* This device is migrating */ + map0 = get_imsm_map(dev_iter, 0); + map1 = get_imsm_map(dev_iter, 1); + if (map0->pba_of_lba0 != map1->pba_of_lba0) + /* migration optimization area was used */ + return -1; + if (migr_rec->ascending_migr == 0 + && migr_rec->dest_depth_per_unit > 0) + /* descending reshape not supported yet */ + return -1; + } + } + return 0; +} + static void __free_imsm(struct intel_super *super, int free_disks); /* load_imsm_mpb - read matrix metadata @@ -2655,8 +3169,8 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) { if (devname) - fprintf(stderr, - Name ": Cannot seek to anchor block on %s: %s\n", + fprintf(stderr, Name + ": Cannot seek to anchor block on %s: %s\n", devname, strerror(errno)); return 1; } @@ -2703,6 +3217,14 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) sectors = mpb_sectors(anchor) - 1; free(anchor); + + if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) { + fprintf(stderr, Name + ": %s could not allocate migr_rec buffer\n", __func__); + free(super->buf); + return 2; + } + if (!sectors) { check_sum = __gen_imsm_checksum(super->anchor); if (check_sum != __le32_to_cpu(super->anchor->check_sum)) { @@ -2754,6 +3276,8 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) return 0; } +static int read_imsm_migr_rec(int fd, struct intel_super *super); + static int load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd) { @@ -2815,6 +3339,10 @@ static void __free_imsm(struct intel_super *super, int free_disks) } /* unlink capability description */ super->orom = NULL; + if (super->migr_rec_buf) { + free(super->migr_rec_buf); + super->migr_rec_buf = NULL; + } if (free_disks) free_imsm_disks(super); free_devlist(super); @@ -2914,7 +3442,6 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de return 0; } -#ifndef MDASSEMBLE /* find_missing - helper routine for load_super_imsm_all that identifies * disks that have disappeared from the system. This routine relies on * the mpb being uptodate, which it is at load time. @@ -2950,6 +3477,7 @@ static int find_missing(struct intel_super *super) return 0; } +#ifndef MDASSEMBLE static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list) { struct intel_disk *idisk = disk_list; @@ -3339,6 +3867,26 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, err = 2; goto error; } + + /* load migration record */ + err = load_imsm_migr_rec(super, NULL); + if (err) { + err = 4; + goto error; + } + + /* Check migration compatibility */ + if (check_mpb_migr_compatibility(super) != 0) { + fprintf(stderr, Name ": Unsupported migration detected"); + if (devname) + fprintf(stderr, " on %s\n", devname); + else + fprintf(stderr, " (IMSM).\n"); + + err = 5; + goto error; + } + err = 0; error: @@ -3417,6 +3965,21 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) st->minor_version = 0; st->max_devs = IMSM_MAX_DEVICES; } + + /* load migration record */ + if (load_imsm_migr_rec(super, NULL) == 0) { + /* Check for unsupported migration features */ + if (check_mpb_migr_compatibility(super) != 0) { + fprintf(stderr, + Name ": Unsupported migration detected"); + if (devname) + fprintf(stderr, " on %s\n", devname); + else + fprintf(stderr, " (IMSM).\n"); + return 3; + } + } + return 0; } @@ -3552,6 +4115,14 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, fprintf(stderr, Name": could not allocate new mpb\n"); return 0; } + if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) { + fprintf(stderr, Name + ": %s could not allocate migr_rec buffer\n", + __func__); + free(super->buf); + free(super); + return 0; + } memcpy(mpb_new, mpb, size_old); free(mpb); mpb = mpb_new; @@ -3560,12 +4131,40 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, memset(mpb_new + size_old, 0, size_round - size_old); } super->current_vol = idx; - /* when creating the first raid device in this container set num_disks - * to zero, i.e. delete this spare and add raid member devices in - * add_to_super_imsm_volume() + + /* handle 'failed_disks' by either: + * a) create dummy disk entries in the table if this the first + * volume in the array. We add them here as this is the only + * opportunity to add them. add_to_super_imsm_volume() + * handles the non-failed disks and continues incrementing + * mpb->num_disks. + * b) validate that 'failed_disks' matches the current number + * of missing disks if the container is populated */ - if (super->current_vol == 0) + if (super->current_vol == 0) { mpb->num_disks = 0; + for (i = 0; i < info->failed_disks; i++) { + struct imsm_disk *disk; + + mpb->num_disks++; + disk = __get_imsm_disk(mpb, i); + disk->status = CONFIGURED_DISK | FAILED_DISK; + disk->scsi_id = __cpu_to_le32(~(__u32)0); + snprintf((char *) disk->serial, MAX_RAID_SERIAL_LEN, + "missing:%d", i); + } + find_missing(super); + } else { + int missing = 0; + struct dl *d; + + for (d = super->missing; d; d = d->next) + missing++; + if (info->failed_disks > missing) { + fprintf(stderr, Name": unable to add 'missing' disk to container\n"); + return 0; + } + } if (!check_name(super, name, 0)) return 0; @@ -3597,15 +4196,14 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, vol = &dev->vol; vol->migr_state = 0; set_migr_type(dev, MIGR_INIT); - vol->dirty = 0; + vol->dirty = !info->state; vol->curr_migr_unit = 0; map = get_imsm_map(dev, 0); map->pba_of_lba0 = __cpu_to_le32(super->create_offset); map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info)); map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info)); map->failed_disk_num = ~0; - map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED : - IMSM_T_STATE_NORMAL; + map->map_state = info->failed_disks ? IMSM_T_STATE_DEGRADED : IMSM_T_STATE_NORMAL; map->ddf = 1; if (info->level == 1 && info->raid_disks > 2) { @@ -3680,6 +4278,13 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, ": %s could not allocate superblock\n", __func__); return 0; } + if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) { + fprintf(stderr, Name + ": %s could not allocate migr_rec buffer\n", __func__); + free(super->buf); + free(super); + return 0; + } memset(super->buf, 0, mpb_size); mpb = super->buf; mpb->mpb_size = __cpu_to_le32(mpb_size); @@ -3706,9 +4311,10 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, { struct intel_super *super = st->sb; struct imsm_super *mpb = super->anchor; - struct dl *dl; + struct imsm_disk *_disk; struct imsm_dev *dev; struct imsm_map *map; + struct dl *dl, *df; int slot; dev = get_imsm_dev(super, super->current_vol); @@ -3752,18 +4358,43 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, devname); return 1; } - set_imsm_ord_tbl_ent(map, dk->number, dl->index); + set_imsm_ord_tbl_ent(map, dk->raid_disk, dl->index); dl->disk.status = CONFIGURED_DISK; - /* if we are creating the first raid device update the family number */ + /* update size of 'missing' disks to be at least as large as the + * largest acitve member (we only have dummy missing disks when + * creating the first volume) + */ if (super->current_vol == 0) { - __u32 sum; - struct imsm_dev *_dev = __get_imsm_dev(mpb, 0); - struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index); - - if (!_dev || !_disk) { - fprintf(stderr, Name ": BUG mpb setup error\n"); - return 1; + for (df = super->missing; df; df = df->next) { + if (dl->disk.total_blocks > df->disk.total_blocks) + df->disk.total_blocks = dl->disk.total_blocks; + _disk = __get_imsm_disk(mpb, df->index); + *_disk = df->disk; + } + } + + /* refresh unset/failed slots to point to valid 'missing' entries */ + for (df = super->missing; df; df = df->next) + for (slot = 0; slot < mpb->num_disks; slot++) { + __u32 ord = get_imsm_ord_tbl_ent(dev, slot, -1); + + if ((ord & IMSM_ORD_REBUILD) == 0) + continue; + set_imsm_ord_tbl_ent(map, slot, df->index | IMSM_ORD_REBUILD); + dprintf("set slot:%d to missing disk:%d\n", slot, df->index); + break; + } + + /* if we are creating the first raid device update the family number */ + if (super->current_vol == 0) { + __u32 sum; + struct imsm_dev *_dev = __get_imsm_dev(mpb, 0); + + _disk = __get_imsm_disk(mpb, dl->index); + if (!_dev || !_disk) { + fprintf(stderr, Name ": BUG mpb setup error\n"); + return 1; } *_dev = *dev; *_disk = dl->disk; @@ -3772,7 +4403,7 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, mpb->family_num = __cpu_to_le32(sum); mpb->orig_family_num = mpb->family_num; } - + super->current_disk = dl; return 0; } @@ -3949,6 +4580,7 @@ static int write_super_imsm(struct supertype *st, int doclose) int i; __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk); int num_disks = 0; + int clear_migration_record = 1; /* 'generation' is incremented everytime the metadata is written */ generation = __le32_to_cpu(mpb->generation_num); @@ -3983,6 +4615,8 @@ static int write_super_imsm(struct supertype *st, int doclose) imsm_copy_dev(dev, dev2); mpb_size += sizeof_imsm_dev(dev, 0); } + if (is_gen_migration(dev2)) + clear_migration_record = 0; } mpb_size += __le32_to_cpu(mpb->bbm_log_size); mpb->mpb_size = __cpu_to_le32(mpb_size); @@ -3991,13 +4625,25 @@ static int write_super_imsm(struct supertype *st, int doclose) sum = __gen_imsm_checksum(mpb); mpb->check_sum = __cpu_to_le32(sum); + if (clear_migration_record) + memset(super->migr_rec_buf, 0, 512); + /* write the mpb for disks that compose raid devices */ for (d = super->disks; d ; d = d->next) { - if (d->index < 0) + if (d->index < 0 || is_failed(&d->disk)) continue; if (store_imsm_mpb(d->fd, mpb)) fprintf(stderr, "%s: failed for device %d:%d %s\n", __func__, d->major, d->minor, strerror(errno)); + if (clear_migration_record) { + unsigned long long dsize; + + get_dev_size(d->fd, NULL, &dsize); + if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) { + if (write(d->fd, super->migr_rec_buf, 512) != 512) + perror("Write migr_rec failed"); + } + } if (doclose) { close(d->fd); d->fd = -1; @@ -4322,43 +4968,44 @@ static int is_raid_level_supported(const struct imsm_orom *orom, int level, int return 0; } +static int imsm_default_chunk(const struct imsm_orom *orom) +{ + /* up to 512 if the plaform supports it, otherwise the platform max. + * 128 if no platform detected + */ + int fs = max(7, orom ? fls(orom->sss) : 0); + + return min(512, (1 << fs)); +} #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg)) -/* - * validate volume parameters with OROM/EFI capabilities - */ static int validate_geometry_imsm_orom(struct intel_super *super, int level, int layout, int raiddisks, int *chunk, int verbose) { -#if DEBUG - verbose = 1; -#endif - /* validate container capabilities */ - if (super->orom && raiddisks > super->orom->tds) { - if (verbose) - fprintf(stderr, Name ": %d exceeds maximum number of" - " platform supported disks: %d\n", - raiddisks, super->orom->tds); + /* check/set platform and metadata limits/defaults */ + if (super->orom && raiddisks > super->orom->dpa) { + pr_vrb(": platform supports a maximum of %d disks per array\n", + super->orom->dpa); return 0; } /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */ - if (super->orom && (!is_raid_level_supported(super->orom, level, - raiddisks))) { + if (!is_raid_level_supported(super->orom, level, raiddisks)) { pr_vrb(": platform does not support raid%d with %d disk%s\n", level, raiddisks, raiddisks > 1 ? "s" : ""); return 0; } - if (super->orom && level != 1) { - if (chunk && (*chunk == 0 || *chunk == UnSet)) - *chunk = imsm_orom_default_chunk(super->orom); - else if (chunk && !imsm_orom_has_chunk(super->orom, *chunk)) { - pr_vrb(": platform does not support a chunk size of: " - "%d\n", *chunk); - return 0; - } + + if (chunk && (*chunk == 0 || *chunk == UnSet)) + *chunk = imsm_default_chunk(super->orom); + + if (super->orom && chunk && !imsm_orom_has_chunk(super->orom, *chunk)) { + pr_vrb(": platform does not support a chunk size of: " + "%d\n", *chunk); + return 0; } + if (layout != imsm_level_to_layout(level)) { if (level == 5) pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n"); @@ -4708,9 +5355,8 @@ static void default_geometry_imsm(struct supertype *st, int *level, int *layout, if (level && layout && *layout == UnSet) *layout = imsm_level_to_layout(*level); - if (chunk && (*chunk == UnSet || *chunk == 0) && - super && super->orom) - *chunk = imsm_orom_default_chunk(super->orom); + if (chunk && (*chunk == UnSet || *chunk == 0)) + *chunk = imsm_default_chunk(super->orom); } static void handle_missing(struct intel_super *super, struct imsm_dev *dev); @@ -4843,6 +5489,9 @@ static int update_subarray_imsm(struct supertype *st, char *subarray, static int is_gen_migration(struct imsm_dev *dev) { + if (dev == NULL) + return 0; + if (!dev->vol.migr_state) return 0; @@ -4871,7 +5520,9 @@ static int is_rebuilding(struct imsm_dev *dev) return 0; } -static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array) +static void update_recovery_start(struct intel_super *super, + struct imsm_dev *dev, + struct mdinfo *array) { struct mdinfo *rebuild = NULL; struct mdinfo *d; @@ -4898,9 +5549,12 @@ static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array) } units = __le32_to_cpu(dev->vol.curr_migr_unit); - rebuild->recovery_start = units * blocks_per_migr_unit(dev); + rebuild->recovery_start = units * blocks_per_migr_unit(super, dev); } +#ifndef MDASSEMBLE +static int recover_backup_imsm(struct supertype *st, struct mdinfo *info); +#endif static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray) { @@ -4921,6 +5575,13 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra struct dl *d; int spare_disks = 0; + /* do not assemble arrays when not all attributes are supported */ + if (imsm_check_attributes(mpb->attributes) == 0) { + fprintf(stderr, Name ": IMSM metadata loading not allowed " + "due to attributes incompatibility.\n"); + return NULL; + } + /* check for bad blocks */ if (imsm_bbm_log_size(super->anchor)) bbm_errors = 1; @@ -4962,6 +5623,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra */ chunk = __le16_to_cpu(map->blocks_per_strip) >> 1; +#ifndef MDASSEMBLE if (!validate_geometry_imsm_orom(super, get_imsm_raid_level(map), /* RAID level */ imsm_level_to_layout(get_imsm_raid_level(map)), @@ -4972,17 +5634,17 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra "Cannot proceed with the action(s).\n"); continue; } +#endif /* MDASSEMBLE */ this = malloc(sizeof(*this)); if (!this) { fprintf(stderr, Name ": failed to allocate %zu bytes\n", sizeof(*this)); break; } - memset(this, 0, sizeof(*this)); - this->next = rest; super->current_vol = i; getinfo_super_imsm_volume(st, this, NULL); + this->next = rest; for (slot = 0 ; slot < map->num_members; slot++) { unsigned long long recovery_start; struct mdinfo *info_d; @@ -5060,8 +5722,14 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra info_d->component_size = __le32_to_cpu(map->blocks_per_member); } /* now that the disk list is up-to-date fixup recovery_start */ - update_recovery_start(dev, this); + update_recovery_start(super, dev, this); this->array.spare_disks += spare_disks; + +#ifndef MDASSEMBLE + /* check for reshape */ + if (this->reshape_active == 1) + recover_backup_imsm(st, this); +#endif rest = this; } @@ -5218,6 +5886,8 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) __u32 ord; int slot; struct imsm_map *map; + char buf[MAX_RAID_SERIAL_LEN+3]; + unsigned int len, shift = 0; /* new failures are always set in map[0] */ map = get_imsm_map(dev, 0); @@ -5230,6 +5900,11 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) if (is_failed(disk) && (ord & IMSM_ORD_REBUILD)) return 0; + sprintf(buf, "%s:0", disk->serial); + if ((len = strlen(buf)) >= MAX_RAID_SERIAL_LEN) + shift = len - MAX_RAID_SERIAL_LEN + 1; + strncpy((char *)disk->serial, &buf[shift], MAX_RAID_SERIAL_LEN); + disk->status |= FAILED_DISK; set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD); if (map->failed_disk_num == 0xff) @@ -5377,14 +6052,18 @@ static int imsm_set_array_state(struct active_array *a, int consistent) } else { if (a->last_checkpoint == 0 && a->prev_action == reshape) { /* for some reason we aborted the reshape. - * Better clean up + * + * disable automatic metadata rollback + * user action is required to recover process */ + if (0) { struct imsm_map *map2 = get_imsm_map(dev, 1); dev->vol.migr_state = 0; dev->vol.migr_type = 0; dev->vol.curr_migr_unit = 0; memcpy(map, map2, sizeof_imsm_map(map2)); super->updates_pending++; + } } if (a->last_checkpoint >= a->info.component_size) { unsigned long long array_blocks; @@ -5444,15 +6123,21 @@ static int imsm_set_array_state(struct active_array *a, int consistent) /* mark the start of the init process if nothing is failed */ dprintf("imsm: mark resync start\n"); if (map->map_state == IMSM_T_STATE_UNINITIALIZED) - migrate(dev, IMSM_T_STATE_NORMAL, MIGR_INIT); + migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_INIT); else - migrate(dev, IMSM_T_STATE_NORMAL, MIGR_REPAIR); + migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_REPAIR); super->updates_pending++; } mark_checkpoint: + /* skip checkpointing for general migration, + * it is controlled in mdadm + */ + if (is_gen_migration(dev)) + goto skip_mark_checkpoint; + /* check if we can update curr_migr_unit from resync_start, recovery_start */ - blocks_per_unit = blocks_per_migr_unit(dev); + blocks_per_unit = blocks_per_migr_unit(super, dev); if (blocks_per_unit) { __u32 units32; __u64 units; @@ -5464,6 +6149,7 @@ mark_checkpoint: * curr_migr_unit needs updating */ if (units32 == units && + units32 != 0 && __le32_to_cpu(dev->vol.curr_migr_unit) != units32) { dprintf("imsm: mark checkpoint (%u)\n", units32); dev->vol.curr_migr_unit = __cpu_to_le32(units32); @@ -5471,6 +6157,7 @@ mark_checkpoint: } } +skip_mark_checkpoint: /* mark dirty / clean */ if (dev->vol.dirty != !consistent) { dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty"); @@ -6058,6 +6745,130 @@ static int add_remove_disk_update(struct intel_super *super) return check_degraded; } + +static int apply_reshape_migration_update(struct imsm_update_reshape_migration *u, + struct intel_super *super, + void ***space_list) +{ + struct intel_dev *id; + void **tofree = NULL; + int ret_val = 0; + + dprintf("apply_reshape_migration_update()\n"); + if ((u->subdev < 0) || + (u->subdev > 1)) { + dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev); + return ret_val; + } + if ((space_list == NULL) || (*space_list == NULL)) { + dprintf("imsm: Error: Memory is not allocated\n"); + return ret_val; + } + + for (id = super->devlist ; id; id = id->next) { + if (id->index == (unsigned)u->subdev) { + struct imsm_dev *dev = get_imsm_dev(super, u->subdev); + struct imsm_map *map; + struct imsm_dev *new_dev = + (struct imsm_dev *)*space_list; + struct imsm_map *migr_map = get_imsm_map(dev, 1); + int to_state; + struct dl *new_disk; + + if (new_dev == NULL) + return ret_val; + *space_list = **space_list; + memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0)); + map = get_imsm_map(new_dev, 0); + if (migr_map) { + dprintf("imsm: Error: migration in progress"); + return ret_val; + } + + to_state = map->map_state; + if ((u->new_level == 5) && (map->raid_level == 0)) { + map->num_members++; + /* this should not happen */ + if (u->new_disks[0] < 0) { + map->failed_disk_num = + map->num_members - 1; + to_state = IMSM_T_STATE_DEGRADED; + } else + to_state = IMSM_T_STATE_NORMAL; + } + migrate(new_dev, super, to_state, MIGR_GEN_MIGR); + if (u->new_level > -1) + map->raid_level = u->new_level; + migr_map = get_imsm_map(new_dev, 1); + if ((u->new_level == 5) && + (migr_map->raid_level == 0)) { + int ord = map->num_members - 1; + migr_map->num_members--; + if (u->new_disks[0] < 0) + ord |= IMSM_ORD_REBUILD; + set_imsm_ord_tbl_ent(map, + map->num_members - 1, + ord); + } + id->dev = new_dev; + tofree = (void **)dev; + + /* update chunk size + */ + if (u->new_chunksize > 0) + map->blocks_per_strip = + __cpu_to_le16(u->new_chunksize * 2); + + /* add disk + */ + if ((u->new_level != 5) || + (migr_map->raid_level != 0) || + (migr_map->raid_level == map->raid_level)) + goto skip_disk_add; + + if (u->new_disks[0] >= 0) { + /* use passes spare + */ + new_disk = get_disk_super(super, + major(u->new_disks[0]), + minor(u->new_disks[0])); + dprintf("imsm: new disk for reshape is: %i:%i " + "(%p, index = %i)\n", + major(u->new_disks[0]), + minor(u->new_disks[0]), + new_disk, new_disk->index); + if (new_disk == NULL) + goto error_disk_add; + + new_disk->index = map->num_members - 1; + /* slot to fill in autolayout + */ + new_disk->raiddisk = new_disk->index; + new_disk->disk.status |= CONFIGURED_DISK; + new_disk->disk.status &= ~SPARE_DISK; + } else + goto error_disk_add; + +skip_disk_add: + *tofree = *space_list; + /* calculate new size + */ + imsm_set_array_size(new_dev); + + ret_val = 1; + } + } + + if (tofree) + *space_list = tofree; + return ret_val; + +error_disk_add: + dprintf("Error: imsm: Cannot find disk.\n"); + return ret_val; +} + + static int apply_reshape_container_disks_update(struct imsm_update_reshape *u, struct intel_super *super, void ***space_list) @@ -6149,6 +6960,9 @@ static int apply_reshape_container_disks_update(struct imsm_update_reshape *u, id->dev = newdev; *sp = tofree; tofree = sp; + + /* Clear migration record */ + memset(super->migr_rec, 0, sizeof(struct migr_record)); } if (tofree) *space_list = tofree; @@ -6258,7 +7072,7 @@ static int apply_takeover_update(struct imsm_update_takeover *u, for (du = super->missing; du; du = du->next) if (du->index >= 0) { set_imsm_ord_tbl_ent(map, du->index, du->index); - mark_missing(dev_new, &du->disk, du->index); + mark_missing(dv->dev, &du->disk, du->index); } return 1; @@ -6306,6 +7120,24 @@ static void imsm_process_update(struct supertype *st, mpb = super->anchor; switch (type) { + case update_general_migration_checkpoint: { + struct intel_dev *id; + struct imsm_update_general_migration_checkpoint *u = + (void *)update->buf; + + dprintf("imsm: process_update() " + "for update_general_migration_checkpoint called\n"); + + /* find device under general migration */ + for (id = super->devlist ; id; id = id->next) { + if (is_gen_migration(id->dev)) { + id->dev->vol.curr_migr_unit = + __cpu_to_le32(u->curr_migr_unit); + super->updates_pending++; + } + } + break; + } case update_takeover: { struct imsm_update_takeover *u = (void *)update->buf; if (apply_takeover_update(u, super, &update->space_list)) { @@ -6322,6 +7154,13 @@ static void imsm_process_update(struct supertype *st, super->updates_pending++; break; } + case update_reshape_migration: { + struct imsm_update_reshape_migration *u = (void *)update->buf; + if (apply_reshape_migration_update( + u, super, &update->space_list)) + super->updates_pending++; + break; + } case update_activate_spare: { struct imsm_update_activate_spare *u = (void *) update->buf; struct imsm_dev *dev = get_imsm_dev(super, u->array); @@ -6348,7 +7187,6 @@ static void imsm_process_update(struct supertype *st, } super->updates_pending++; - /* count failures (excluding rebuilds and the victim) * to determine map[0] state */ @@ -6374,7 +7212,7 @@ static void imsm_process_update(struct supertype *st, /* mark rebuild */ to_state = imsm_check_degraded(super, dev, failed); map->map_state = IMSM_T_STATE_DEGRADED; - migrate(dev, to_state, MIGR_REBUILD); + migrate(dev, super, to_state, MIGR_REBUILD); migr_map = get_imsm_map(dev, 1); set_imsm_ord_tbl_ent(map, u->slot, dl->index); set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD); @@ -6618,6 +7456,8 @@ static void imsm_process_update(struct supertype *st, } } +static struct mdinfo *get_spares_for_grow(struct supertype *st); + static void imsm_prepare_update(struct supertype *st, struct metadata_update *update) { @@ -6635,6 +7475,10 @@ static void imsm_prepare_update(struct supertype *st, size_t len = 0; switch (type) { + case update_general_migration_checkpoint: + dprintf("imsm: prepare_update() " + "for update_general_migration_checkpoint called\n"); + break; case update_takeover: { struct imsm_update_takeover *u = (void *)update->buf; if (u->direction == R0_TO_R10) { @@ -6715,6 +7559,93 @@ static void imsm_prepare_update(struct supertype *st, dprintf("New anchor length is %llu\n", (unsigned long long)len); break; } + case update_reshape_migration: { + /* for migration level 0->5 we need to add disks + * so the same as for container operation we will copy + * device to the bigger location. + * in memory prepared device and new disk area are prepared + * for usage in process update + */ + struct imsm_update_reshape_migration *u = (void *)update->buf; + struct intel_dev *id; + void **space_tail = (void **)&update->space_list; + int size; + void *s; + int current_level = -1; + + dprintf("imsm: imsm_prepare_update() for update_reshape\n"); + + /* add space for bigger array in update + */ + for (id = super->devlist; id; id = id->next) { + if (id->index == (unsigned)u->subdev) { + size = sizeof_imsm_dev(id->dev, 1); + if (u->new_raid_disks > u->old_raid_disks) + size += sizeof(__u32)*2* + (u->new_raid_disks - u->old_raid_disks); + s = malloc(size); + if (!s) + break; + *space_tail = s; + space_tail = s; + *space_tail = NULL; + break; + } + } + if (update->space_list == NULL) + break; + + /* add space for disk in update + */ + size = sizeof(struct dl); + s = malloc(size); + if (!s) { + free(update->space_list); + update->space_list = NULL; + break; + } + *space_tail = s; + space_tail = s; + *space_tail = NULL; + + /* add spare device to update + */ + for (id = super->devlist ; id; id = id->next) + if (id->index == (unsigned)u->subdev) { + struct imsm_dev *dev; + struct imsm_map *map; + + dev = get_imsm_dev(super, u->subdev); + map = get_imsm_map(dev, 0); + current_level = map->raid_level; + break; + } + if ((u->new_level == 5) && (u->new_level != current_level)) { + struct mdinfo *spares; + + spares = get_spares_for_grow(st); + if (spares) { + struct dl *dl; + struct mdinfo *dev; + + dev = spares->devs; + if (dev) { + u->new_disks[0] = + makedev(dev->disk.major, + dev->disk.minor); + dl = get_disk_super(super, + dev->disk.major, + dev->disk.minor); + dl->index = u->old_raid_disks; + dev = dev->next; + } + sysfs_free(spares); + } + } + len = disks_to_mpb_size(u->new_raid_disks); + dprintf("New anchor length is %llu\n", (unsigned long long)len); + break; + } case update_create_array: { struct imsm_update_create_array *u = (void *) update->buf; struct intel_dev *dv; @@ -6832,6 +7763,389 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind __free_imsm_disk(dl); } } +#endif /* MDASSEMBLE */ +/******************************************************************************* + * Function: open_backup_targets + * Description: Function opens file descriptors for all devices given in + * info->devs + * Parameters: + * info : general array info + * raid_disks : number of disks + * raid_fds : table of device's file descriptors + * Returns: + * 0 : success + * -1 : fail + ******************************************************************************/ +int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds) +{ + struct mdinfo *sd; + + for (sd = info->devs ; sd ; sd = sd->next) { + char *dn; + + if (sd->disk.state & (1<disk.raid_disk >= raid_disks) || + (sd->disk.raid_disk < 0)) + continue; + + dn = map_dev(sd->disk.major, + sd->disk.minor, 1); + raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR); + if (raid_fds[sd->disk.raid_disk] < 0) { + fprintf(stderr, "cannot open component\n"); + return -1; + } + } + return 0; +} + +#ifndef MDASSEMBLE +/******************************************************************************* + * Function: init_migr_record_imsm + * Description: Function inits imsm migration record + * Parameters: + * super : imsm internal array info + * dev : device under migration + * info : general array info to find the smallest device + * Returns: + * none + ******************************************************************************/ +void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, + struct mdinfo *info) +{ + struct intel_super *super = st->sb; + struct migr_record *migr_rec = super->migr_rec; + int new_data_disks; + unsigned long long dsize, dev_sectors; + long long unsigned min_dev_sectors = -1LLU; + struct mdinfo *sd; + char nm[30]; + int fd; + struct imsm_map *map_dest = get_imsm_map(dev, 0); + struct imsm_map *map_src = get_imsm_map(dev, 1); + unsigned long long num_migr_units; + unsigned long long array_blocks; + + memset(migr_rec, 0, sizeof(struct migr_record)); + migr_rec->family_num = __cpu_to_le32(super->anchor->family_num); + + /* only ascending reshape supported now */ + migr_rec->ascending_migr = __cpu_to_le32(1); + + migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE / + max(map_dest->blocks_per_strip, map_src->blocks_per_strip); + migr_rec->dest_depth_per_unit *= map_dest->blocks_per_strip; + new_data_disks = imsm_num_data_members(dev, 0); + migr_rec->blocks_per_unit = + __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks); + migr_rec->dest_depth_per_unit = + __cpu_to_le32(migr_rec->dest_depth_per_unit); + array_blocks = info->component_size * new_data_disks; + num_migr_units = + array_blocks / __le32_to_cpu(migr_rec->blocks_per_unit); + + if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit)) + num_migr_units++; + migr_rec->num_migr_units = __cpu_to_le32(num_migr_units); + + migr_rec->post_migr_vol_cap = dev->size_low; + migr_rec->post_migr_vol_cap_hi = dev->size_high; + + + /* Find the smallest dev */ + for (sd = info->devs ; sd ; sd = sd->next) { + sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); + fd = dev_open(nm, O_RDONLY); + if (fd < 0) + continue; + get_dev_size(fd, NULL, &dsize); + dev_sectors = dsize / 512; + if (dev_sectors < min_dev_sectors) + min_dev_sectors = dev_sectors; + close(fd); + } + migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors - + RAID_DISK_RESERVED_BLOCKS_IMSM_HI); + + write_imsm_migr_rec(st); + + return; +} + +/******************************************************************************* + * Function: save_backup_imsm + * Description: Function saves critical data stripes to Migration Copy Area + * and updates the current migration unit status. + * Use restore_stripes() to form a destination stripe, + * and to write it to the Copy Area. + * Parameters: + * st : supertype information + * dev : imsm device that backup is saved for + * info : general array info + * buf : input buffer + * length : length of data to backup (blocks_per_unit) + * Returns: + * 0 : success + *, -1 : fail + ******************************************************************************/ +int save_backup_imsm(struct supertype *st, + struct imsm_dev *dev, + struct mdinfo *info, + void *buf, + int length) +{ + int rv = -1; + struct intel_super *super = st->sb; + unsigned long long *target_offsets = NULL; + int *targets = NULL; + int i; + struct imsm_map *map_dest = get_imsm_map(dev, 0); + int new_disks = map_dest->num_members; + int dest_layout = 0; + int dest_chunk; + unsigned long long start; + int data_disks = imsm_num_data_members(dev, 0); + + targets = malloc(new_disks * sizeof(int)); + if (!targets) + goto abort; + + for (i = 0; i < new_disks; i++) + targets[i] = -1; + + target_offsets = malloc(new_disks * sizeof(unsigned long long)); + if (!target_offsets) + goto abort; + + start = info->reshape_progress * 512; + for (i = 0; i < new_disks; i++) { + target_offsets[i] = (unsigned long long) + __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512; + /* move back copy area adderss, it will be moved forward + * in restore_stripes() using start input variable + */ + target_offsets[i] -= start/data_disks; + } + + if (open_backup_targets(info, new_disks, targets)) + goto abort; + + dest_layout = imsm_level_to_layout(map_dest->raid_level); + dest_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512; + + if (restore_stripes(targets, /* list of dest devices */ + target_offsets, /* migration record offsets */ + new_disks, + dest_chunk, + map_dest->raid_level, + dest_layout, + -1, /* source backup file descriptor */ + 0, /* input buf offset + * always 0 buf is already offseted */ + start, + length, + buf) != 0) { + fprintf(stderr, Name ": Error restoring stripes\n"); + goto abort; + } + + rv = 0; + +abort: + if (targets) { + for (i = 0; i < new_disks; i++) + if (targets[i] >= 0) + close(targets[i]); + free(targets); + } + free(target_offsets); + + return rv; +} + +/******************************************************************************* + * Function: save_checkpoint_imsm + * Description: Function called for current unit status update + * in the migration record. It writes it to disk. + * Parameters: + * super : imsm internal array info + * info : general array info + * Returns: + * 0: success + * 1: failure + * 2: failure, means no valid migration record + * / no general migration in progress / + ******************************************************************************/ +int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state) +{ + struct intel_super *super = st->sb; + unsigned long long blocks_per_unit; + unsigned long long curr_migr_unit; + + if (load_imsm_migr_rec(super, info) != 0) { + dprintf("imsm: ERROR: Cannot read migration record " + "for checkpoint save.\n"); + return 1; + } + + blocks_per_unit = __le32_to_cpu(super->migr_rec->blocks_per_unit); + if (blocks_per_unit == 0) { + dprintf("imsm: no migration in progress.\n"); + return 2; + } + curr_migr_unit = info->reshape_progress / blocks_per_unit; + /* check if array is alligned to copy area + * if it is not alligned, add one to current migration unit value + * this can happend on array reshape finish only + */ + if (info->reshape_progress % blocks_per_unit) + curr_migr_unit++; + + super->migr_rec->curr_migr_unit = + __cpu_to_le32(curr_migr_unit); + super->migr_rec->rec_status = __cpu_to_le32(state); + super->migr_rec->dest_1st_member_lba = + __cpu_to_le32(curr_migr_unit * + __le32_to_cpu(super->migr_rec->dest_depth_per_unit)); + if (write_imsm_migr_rec(st) < 0) { + dprintf("imsm: Cannot write migration record " + "outside backup area\n"); + return 1; + } + + return 0; +} + +/******************************************************************************* + * Function: recover_backup_imsm + * Description: Function recovers critical data from the Migration Copy Area + * while assembling an array. + * Parameters: + * super : imsm internal array info + * info : general array info + * Returns: + * 0 : success (or there is no data to recover) + * 1 : fail + ******************************************************************************/ +int recover_backup_imsm(struct supertype *st, struct mdinfo *info) +{ + struct intel_super *super = st->sb; + struct migr_record *migr_rec = super->migr_rec; + struct imsm_map *map_dest = NULL; + struct intel_dev *id = NULL; + unsigned long long read_offset; + unsigned long long write_offset; + unsigned unit_len; + int *targets = NULL; + int new_disks, i, err; + char *buf = NULL; + int retval = 1; + unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit); + unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units); + char buffer[20]; + int skipped_disks = 0; + int max_degradation; + + err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20); + if (err < 1) + return 1; + + /* recover data only during assemblation */ + if (strncmp(buffer, "inactive", 8) != 0) + return 0; + /* no data to recover */ + if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL) + return 0; + if (curr_migr_unit >= num_migr_units) + return 1; + + /* find device during reshape */ + for (id = super->devlist; id; id = id->next) + if (is_gen_migration(id->dev)) + break; + if (id == NULL) + return 1; + + map_dest = get_imsm_map(id->dev, 0); + new_disks = map_dest->num_members; + max_degradation = new_disks - imsm_num_data_members(id->dev, 0); + + read_offset = (unsigned long long) + __le32_to_cpu(migr_rec->ckpt_area_pba) * 512; + + write_offset = ((unsigned long long) + __le32_to_cpu(migr_rec->dest_1st_member_lba) + + __le32_to_cpu(map_dest->pba_of_lba0)) * 512; + + unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512; + if (posix_memalign((void **)&buf, 512, unit_len) != 0) + goto abort; + targets = malloc(new_disks * sizeof(int)); + if (!targets) + goto abort; + + open_backup_targets(info, new_disks, targets); + + for (i = 0; i < new_disks; i++) { + if (targets[i] < 0) { + skipped_disks++; + continue; + } + if (lseek64(targets[i], read_offset, SEEK_SET) < 0) { + fprintf(stderr, + Name ": Cannot seek to block: %s\n", + strerror(errno)); + goto abort; + } + if ((unsigned)read(targets[i], buf, unit_len) != unit_len) { + fprintf(stderr, + Name ": Cannot read copy area block: %s\n", + strerror(errno)); + goto abort; + } + if (lseek64(targets[i], write_offset, SEEK_SET) < 0) { + fprintf(stderr, + Name ": Cannot seek to block: %s\n", + strerror(errno)); + goto abort; + } + if ((unsigned)write(targets[i], buf, unit_len) != unit_len) { + fprintf(stderr, + Name ": Cannot restore block: %s\n", + strerror(errno)); + goto abort; + } + } + + if (skipped_disks > max_degradation) { + fprintf(stderr, + Name ": Cannot restore data from backup." + " Too many failed disks\n"); + goto abort; + } + + if (save_checkpoint_imsm(st, info, UNIT_SRC_NORMAL)) { + /* ignore error == 2, this can mean end of reshape here + */ + dprintf("imsm: Cannot write checkpoint to " + "migration record (UNIT_SRC_NORMAL) during restart\n"); + } else + retval = 0; + +abort: + if (targets) { + for (i = 0; i < new_disks; i++) + if (targets[i]) + close(targets[i]); + free(targets); + } + free(buf); + return retval; +} static char disk_by_path[] = "/dev/disk/by-path/"; @@ -6947,6 +8261,14 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, geo->raid_disks > 1 ? "s" : ""); break; } + /* check if component size is aligned to chunk size + */ + if (info->component_size % + (info->array.chunk_size/512)) { + dprintf("Component size is not aligned to " + "chunk size\n"); + break; + } } if (*old_raid_disks && @@ -7042,6 +8364,7 @@ static int imsm_create_metadata_update_for_reshape( || delta_disks > spares->array.spare_disks) { fprintf(stderr, Name ": imsm: ERROR: Cannot get spare devices " "for %s.\n", geo->dev_name); + i = -1; goto abort; } @@ -7082,6 +8405,81 @@ abort: return 0; } +/****************************************************************************** + * function: imsm_create_metadata_update_for_migration() + * Creates update for IMSM array. + * + ******************************************************************************/ +static int imsm_create_metadata_update_for_migration( + struct supertype *st, + struct geo_params *geo, + struct imsm_update_reshape_migration **updatep) +{ + struct intel_super *super = st->sb; + int update_memory_size = 0; + struct imsm_update_reshape_migration *u = NULL; + struct imsm_dev *dev; + int previous_level = -1; + + dprintf("imsm_create_metadata_update_for_migration(enter)" + " New Level = %i\n", geo->level); + + /* size of all update data without anchor */ + update_memory_size = sizeof(struct imsm_update_reshape_migration); + + u = calloc(1, update_memory_size); + if (u == NULL) { + dprintf("error: cannot get memory for " + "imsm_create_metadata_update_for_migration\n"); + return 0; + } + u->type = update_reshape_migration; + u->subdev = super->current_vol; + u->new_level = geo->level; + u->new_layout = geo->layout; + u->new_raid_disks = u->old_raid_disks = geo->raid_disks; + u->new_disks[0] = -1; + u->new_chunksize = -1; + + dev = get_imsm_dev(super, u->subdev); + if (dev) { + struct imsm_map *map; + + map = get_imsm_map(dev, 0); + if (map) { + int current_chunk_size = + __le16_to_cpu(map->blocks_per_strip) / 2; + + if (geo->chunksize != current_chunk_size) { + u->new_chunksize = geo->chunksize / 1024; + dprintf("imsm: " + "chunk size change from %i to %i\n", + current_chunk_size, u->new_chunksize); + } + previous_level = map->raid_level; + } + } + if ((geo->level == 5) && (previous_level == 0)) { + struct mdinfo *spares = NULL; + + u->new_raid_disks++; + spares = get_spares_for_grow(st); + if ((spares == NULL) || (spares->array.spare_disks < 1)) { + free(u); + sysfs_free(spares); + update_memory_size = 0; + dprintf("error: cannot get spare device " + "for requested migration"); + return 0; + } + sysfs_free(spares); + } + dprintf("imsm: reshape update preparation : OK\n"); + *updatep = u; + + return update_memory_size; +} + static void imsm_update_metadata_locally(struct supertype *st, void *buf, int len) { @@ -7118,7 +8516,6 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, int chunk; getinfo_super_imsm_volume(st, &info, NULL); - if ((geo->level != info.array.level) && (geo->level >= 0) && (geo->level != UnSet)) { @@ -7126,6 +8523,14 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, case 0: if (geo->level == 5) { change = CH_MIGRATION; + if (geo->layout != ALGORITHM_LEFT_ASYMMETRIC) { + fprintf(stderr, + Name " Error. Requested Layout " + "not supported (left-asymmetric layout " + "is supported only)!\n"); + change = -1; + goto analyse_change_exit; + } check_devs = 1; } if (geo->level == 10) { @@ -7139,10 +8544,6 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, check_devs = 1; } break; - case 5: - if (geo->level == 0) - change = CH_MIGRATION; - break; case 10: if (geo->level == 0) { change = CH_TAKEOVER; @@ -7284,6 +8685,7 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level, /* On container level we can only increase number of devices. */ dprintf("imsm: info: Container operation\n"); int old_raid_disks = 0; + if (imsm_reshape_is_allowed_on_container( st, &geo, &old_raid_disks)) { struct imsm_update_reshape *u = NULL; @@ -7322,8 +8724,9 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level, dprintf("imsm: info: Volume operation\n"); /* find requested device */ while (dev) { - imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum); - if (devnum == geo.dev_id) + if (imsm_find_array_minor_by_subdev( + dev->index, st->container_dev, &devnum) == 0 + && devnum == geo.dev_id) break; dev = dev->next; } @@ -7338,9 +8741,26 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level, case CH_TAKEOVER: ret_val = imsm_takeover(st, &geo); break; - case CH_MIGRATION: + case CH_MIGRATION: { + struct imsm_update_reshape_migration *u = NULL; + int len = + imsm_create_metadata_update_for_migration( + st, &geo, &u); + if (len < 1) { + dprintf("imsm: " + "Cannot prepare update\n"); + break; + } ret_val = 0; - break; + /* update metadata locally */ + imsm_update_metadata_locally(st, u, len); + /* and possibly remotely */ + if (st->update_tail) + append_metadata_update(st, u, len); + else + free(u); + } + break; default: ret_val = 1; } @@ -7351,16 +8771,346 @@ exit_imsm_reshape_super: return ret_val; } +/******************************************************************************* + * Function: wait_for_reshape_imsm + * Description: Function writes new sync_max value and waits until + * reshape process reach new position + * Parameters: + * sra : general array info + * ndata : number of disks in new array's layout + * Returns: + * 0 : success, + * 1 : there is no reshape in progress, + * -1 : fail + ******************************************************************************/ +int wait_for_reshape_imsm(struct mdinfo *sra, int ndata) +{ + int fd = sysfs_get_fd(sra, NULL, "reshape_position"); + unsigned long long completed; + /* to_complete : new sync_max position */ + unsigned long long to_complete = sra->reshape_progress; + unsigned long long position_to_set = to_complete / ndata; + + if (fd < 0) { + dprintf("imsm: wait_for_reshape_imsm() " + "cannot open reshape_position\n"); + return 1; + } + + if (sysfs_fd_get_ll(fd, &completed) < 0) { + dprintf("imsm: wait_for_reshape_imsm() " + "cannot read reshape_position (no reshape in progres)\n"); + close(fd); + return 0; + } + + if (completed > to_complete) { + dprintf("imsm: wait_for_reshape_imsm() " + "wrong next position to set %llu (%llu)\n", + to_complete, completed); + close(fd); + return -1; + } + dprintf("Position set: %llu\n", position_to_set); + if (sysfs_set_num(sra, NULL, "sync_max", + position_to_set) != 0) { + dprintf("imsm: wait_for_reshape_imsm() " + "cannot set reshape position to %llu\n", + position_to_set); + close(fd); + return -1; + } + + do { + char action[20]; + fd_set rfds; + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + select(fd+1, &rfds, NULL, NULL, NULL); + if (sysfs_get_str(sra, NULL, "sync_action", + action, 20) > 0 && + strncmp(action, "reshape", 7) != 0) + break; + if (sysfs_fd_get_ll(fd, &completed) < 0) { + dprintf("imsm: wait_for_reshape_imsm() " + "cannot read reshape_position (in loop)\n"); + close(fd); + return 1; + } + } while (completed < to_complete); + close(fd); + return 0; + +} + +/******************************************************************************* + * Function: check_degradation_change + * Description: Check that array hasn't become failed. + * Parameters: + * info : for sysfs access + * sources : source disks descriptors + * degraded: previous degradation level + * Returns: + * degradation level + ******************************************************************************/ +int check_degradation_change(struct mdinfo *info, + int *sources, + int degraded) +{ + unsigned long long new_degraded; + sysfs_get_ll(info, NULL, "degraded", &new_degraded); + if (new_degraded != (unsigned long long)degraded) { + /* check each device to ensure it is still working */ + struct mdinfo *sd; + new_degraded = 0; + for (sd = info->devs ; sd ; sd = sd->next) { + if (sd->disk.state & (1<disk.state & (1<disk.state = (1<disk.raid_disk >= 0 && + sources[sd->disk.raid_disk] >= 0) { + close(sources[ + sd->disk.raid_disk]); + sources[sd->disk.raid_disk] = + -1; + } + new_degraded++; + } + } + } + } + + return new_degraded; +} + +/******************************************************************************* + * Function: imsm_manage_reshape + * Description: Function finds array under reshape and it manages reshape + * process. It creates stripes backups (if required) and sets + * checheckpoits. + * Parameters: + * afd : Backup handle (nattive) - not used + * sra : general array info + * reshape : reshape parameters - not used + * st : supertype structure + * blocks : size of critical section [blocks] + * fds : table of source device descriptor + * offsets : start of array (offest per devices) + * dests : not used + * destfd : table of destination device descriptor + * destoffsets : table of destination offsets (per device) + * Returns: + * 1 : success, reshape is done + * 0 : fail + ******************************************************************************/ static int imsm_manage_reshape( int afd, struct mdinfo *sra, struct reshape *reshape, - struct supertype *st, unsigned long stripes, + struct supertype *st, unsigned long backup_blocks, int *fds, unsigned long long *offsets, int dests, int *destfd, unsigned long long *destoffsets) { - /* Just use child_monitor for now */ - return child_monitor( - afd, sra, reshape, st, stripes, - fds, offsets, dests, destfd, destoffsets); + int ret_val = 0; + struct intel_super *super = st->sb; + struct intel_dev *dv = NULL; + struct imsm_dev *dev = NULL; + struct imsm_map *map_src; + int migr_vol_qan = 0; + int ndata, odata; /* [bytes] */ + int chunk; /* [bytes] */ + struct migr_record *migr_rec; + char *buf = NULL; + unsigned int buf_size; /* [bytes] */ + unsigned long long max_position; /* array size [bytes] */ + unsigned long long next_step; /* [blocks]/[bytes] */ + unsigned long long old_data_stripe_length; + unsigned long long start_src; /* [bytes] */ + unsigned long long start; /* [bytes] */ + unsigned long long start_buf_shift; /* [bytes] */ + int degraded = 0; + int source_layout = 0; + + if (!fds || !offsets || !sra) + goto abort; + + /* Find volume during the reshape */ + for (dv = super->devlist; dv; dv = dv->next) { + if (dv->dev->vol.migr_type == MIGR_GEN_MIGR + && dv->dev->vol.migr_state == 1) { + dev = dv->dev; + migr_vol_qan++; + } + } + /* Only one volume can migrate at the same time */ + if (migr_vol_qan != 1) { + fprintf(stderr, Name " : %s", migr_vol_qan ? + "Number of migrating volumes greater than 1\n" : + "There is no volume during migrationg\n"); + goto abort; + } + + map_src = get_imsm_map(dev, 1); + if (map_src == NULL) + goto abort; + + ndata = imsm_num_data_members(dev, 0); + odata = imsm_num_data_members(dev, 1); + + chunk = __le16_to_cpu(map_src->blocks_per_strip) * 512; + old_data_stripe_length = odata * chunk; + + migr_rec = super->migr_rec; + + /* initialize migration record for start condition */ + if (sra->reshape_progress == 0) + init_migr_record_imsm(st, dev, sra); + else { + if (__le32_to_cpu(migr_rec->rec_status) != UNIT_SRC_NORMAL) { + dprintf("imsm: cannot restart migration when data " + "are present in copy area.\n"); + goto abort; + } + } + + /* size for data */ + buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512; + /* extend buffer size for parity disk */ + buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512; + /* add space for stripe aligment */ + buf_size += old_data_stripe_length; + if (posix_memalign((void **)&buf, 4096, buf_size)) { + dprintf("imsm: Cannot allocate checpoint buffer\n"); + goto abort; + } + + max_position = sra->component_size * ndata; + source_layout = imsm_level_to_layout(map_src->raid_level); + + while (__le32_to_cpu(migr_rec->curr_migr_unit) < + __le32_to_cpu(migr_rec->num_migr_units)) { + /* current reshape position [blocks] */ + unsigned long long current_position = + __le32_to_cpu(migr_rec->blocks_per_unit) + * __le32_to_cpu(migr_rec->curr_migr_unit); + unsigned long long border; + + /* Check that array hasn't become failed. + */ + degraded = check_degradation_change(sra, fds, degraded); + if (degraded > 1) { + dprintf("imsm: Abort reshape due to degradation" + " level (%i)\n", degraded); + goto abort; + } + + next_step = __le32_to_cpu(migr_rec->blocks_per_unit); + + if ((current_position + next_step) > max_position) + next_step = max_position - current_position; + + start = current_position * 512; + + /* allign reading start to old geometry */ + start_buf_shift = start % old_data_stripe_length; + start_src = start - start_buf_shift; + + border = (start_src / odata) - (start / ndata); + border /= 512; + if (border <= __le32_to_cpu(migr_rec->dest_depth_per_unit)) { + /* save critical stripes to buf + * start - start address of current unit + * to backup [bytes] + * start_src - start address of current unit + * to backup alligned to source array + * [bytes] + */ + unsigned long long next_step_filler = 0; + unsigned long long copy_length = next_step * 512; + + /* allign copy area length to stripe in old geometry */ + next_step_filler = ((copy_length + start_buf_shift) + % old_data_stripe_length); + if (next_step_filler) + next_step_filler = (old_data_stripe_length + - next_step_filler); + dprintf("save_stripes() parameters: start = %llu," + "\tstart_src = %llu,\tnext_step*512 = %llu," + "\tstart_in_buf_shift = %llu," + "\tnext_step_filler = %llu\n", + start, start_src, copy_length, + start_buf_shift, next_step_filler); + + if (save_stripes(fds, offsets, map_src->num_members, + chunk, map_src->raid_level, + source_layout, 0, NULL, start_src, + copy_length + + next_step_filler + start_buf_shift, + buf)) { + dprintf("imsm: Cannot save stripes" + " to buffer\n"); + goto abort; + } + /* Convert data to destination format and store it + * in backup general migration area + */ + if (save_backup_imsm(st, dev, sra, + buf + start_buf_shift, copy_length)) { + dprintf("imsm: Cannot save stripes to " + "target devices\n"); + goto abort; + } + if (save_checkpoint_imsm(st, sra, + UNIT_SRC_IN_CP_AREA)) { + dprintf("imsm: Cannot write checkpoint to " + "migration record (UNIT_SRC_IN_CP_AREA)\n"); + goto abort; + } + } else { + /* set next step to use whole border area */ + border /= next_step; + if (border > 1) + next_step *= border; + } + /* When data backed up, checkpoint stored, + * kick the kernel to reshape unit of data + */ + next_step = next_step + sra->reshape_progress; + /* limit next step to array max position */ + if (next_step > max_position) + next_step = max_position; + sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress); + sysfs_set_num(sra, NULL, "suspend_hi", next_step); + sra->reshape_progress = next_step; + + /* wait until reshape finish */ + if (wait_for_reshape_imsm(sra, ndata) < 0) { + dprintf("wait_for_reshape_imsm returned error!\n"); + goto abort; + } + + if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) { + /* ignore error == 2, this can mean end of reshape here + */ + dprintf("imsm: Cannot write checkpoint to " + "migration record (UNIT_SRC_NORMAL)\n"); + goto abort; + } + + } + + /* return '1' if done */ + ret_val = 1; +abort: + free(buf); + abort_reshape(sra); + + return ret_val; } #endif /* MDASSEMBLE */ @@ -7384,6 +9134,7 @@ struct superswitch super_imsm = { .get_disk_controller_domain = imsm_get_disk_controller_domain, .reshape_super = imsm_reshape_super, .manage_reshape = imsm_manage_reshape, + .recover_backup = recover_backup_imsm, #endif .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm, @@ -7403,6 +9154,7 @@ struct superswitch super_imsm = { .match_metadata_desc = match_metadata_desc_imsm, .container_content = container_content_imsm, + .external = 1, .name = "imsm",