X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=super-intel.c;h=96857263b318ddc6f1c05a53296eb8698697aec5;hb=c41e00b2e68aed0ab9d41f70a3e119d86a92cf29;hp=a921cbc7b10e5fb6639efd7935d19e1a8a60960f;hpb=660260d027bed08fad2393998e1ca8cb67e1b573;p=thirdparty%2Fmdadm.git diff --git a/super-intel.c b/super-intel.c index a921cbc7..96857263 100644 --- a/super-intel.c +++ b/super-intel.c @@ -88,28 +88,36 @@ #define MPB_SECTOR_CNT 2210 #define IMSM_RESERVED_SECTORS 4096 +#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056 #define SECT_PER_MB_SHIFT 11 /* Disk configuration info. */ #define IMSM_MAX_DEVICES 255 struct imsm_disk { __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */ - __u32 total_blocks; /* 0xE8 - 0xEB total blocks */ + __u32 total_blocks_lo; /* 0xE8 - 0xEB total blocks lo */ __u32 scsi_id; /* 0xEC - 0xEF scsi ID */ #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */ #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */ #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */ __u32 status; /* 0xF0 - 0xF3 */ __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */ -#define IMSM_DISK_FILLERS 4 - __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */ + __u32 total_blocks_hi; /* 0xF4 - 0xF5 total blocks hi */ +#define IMSM_DISK_FILLERS 3 + __u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */ }; +/* map selector for map managment + */ +#define MAP_0 0 +#define MAP_1 1 +#define MAP_X -1 + /* RAID map configuration infos. */ struct imsm_map { - __u32 pba_of_lba0; /* start address of partition */ - __u32 blocks_per_member;/* blocks per member */ - __u32 num_data_stripes; /* number of data stripes */ + __u32 pba_of_lba0_lo; /* start address of partition */ + __u32 blocks_per_member_lo;/* blocks per member */ + __u32 num_data_stripes_lo; /* number of data stripes */ __u16 blocks_per_strip; __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */ #define IMSM_T_STATE_NORMAL 0 @@ -124,7 +132,10 @@ struct imsm_map { __u8 num_domains; /* number of parity domains */ __u8 failed_disk_num; /* valid only when state is degraded */ __u8 ddf; - __u32 filler[7]; /* expansion area */ + __u32 pba_of_lba0_hi; + __u32 blocks_per_member_hi; + __u32 num_data_stripes_hi; + __u32 filler[4]; /* expansion area */ #define IMSM_ORD_REBUILD (1 << 24) __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members], * top byte contains some flags @@ -233,6 +244,12 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */ +#define MIGR_REC_BUF_SIZE 512 /* size of migr_record i/o buffer */ +#define MIGR_REC_POSITION 512 /* migr_record position offset on disk, + * MIGR_REC_BUF_SIZE <= MIGR_REC_POSITION + */ + + #define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must * be recovered using srcMap */ #define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has @@ -264,6 +281,22 @@ struct migr_record { * (for recovered migrations) */ } __attribute__ ((__packed__)); +struct md_list { + /* usage marker: + * 1: load metadata + * 2: metadata does not match + * 4: already checked + */ + int used; + char *devname; + int found; + int container; + dev_t st_rdev; + struct md_list *next; +}; + +#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg)) + static __u8 migr_type(struct imsm_dev *dev) { if (dev->vol.migr_type == MIGR_VERIFY && @@ -289,7 +322,7 @@ static void set_migr_type(struct imsm_dev *dev, __u8 migr_type) static unsigned int sector_count(__u32 bytes) { - return ((bytes + (512-1)) & (~(512-1))) / 512; + return ROUND_UP(bytes, 512) / 512; } static unsigned int mpb_sectors(struct imsm_super *mpb) @@ -324,12 +357,15 @@ struct intel_super { void *migr_rec_buf; /* buffer for I/O operations */ struct migr_record *migr_rec; /* migration record */ }; + int clean_migration_record_by_mdmon; /* when reshape is switched to next + array, it indicates that mdmon is allowed to clean migration + record */ size_t len; /* size of the 'buf' allocation */ void *next_buf; /* for realloc'ing buf from the manager */ size_t next_len; int updates_pending; /* count of pending updates for mdmon */ int current_vol; /* index of raid device undergoing creation */ - __u32 create_offset; /* common start for 'current_vol' */ + unsigned long long create_offset; /* common start for 'current_vol' */ __u32 random; /* random data for seeding new family numbers */ struct intel_dev *devlist; struct dl { @@ -369,6 +405,7 @@ struct extent { enum imsm_reshape_type { CH_TAKEOVER, CH_MIGRATION, + CH_ARRAY_SIZE, }; /* definition of messages passed to imsm_process_update */ @@ -382,6 +419,7 @@ enum imsm_update_type { update_reshape_migration, update_takeover, update_general_migration_checkpoint, + update_size_change, }; struct imsm_update_activate_spare { @@ -434,6 +472,12 @@ struct imsm_update_reshape_migration { int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */ }; +struct imsm_update_size_change { + enum imsm_update_type type; + int subdev; + long long new_size; +}; + struct imsm_update_general_migration_checkpoint { enum imsm_update_type type; __u32 curr_migr_unit; @@ -661,21 +705,30 @@ struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map) { /* A device can have 2 maps if it is in the middle of a migration. * If second_map is: - * 0 - we return the first map - * 1 - we return the second map if it exists, else NULL - * -1 - we return the second map if it exists, else the first + * MAP_0 - we return the first map + * MAP_1 - we return the second map if it exists, else NULL + * MAP_X - we return the second map if it exists, else the first */ struct imsm_map *map = &dev->vol.map[0]; + struct imsm_map *map2 = NULL; - if (second_map == 1 && !dev->vol.migr_state) - return NULL; - else if (second_map == 1 || - (second_map < 0 && dev->vol.migr_state)) { - void *ptr = map; + if (dev->vol.migr_state) + map2 = (void *)map + sizeof_imsm_map(map); - return ptr + sizeof_imsm_map(map); - } else - return map; + switch (second_map) { + case MAP_0: + break; + case MAP_1: + map = map2; + break; + case MAP_X: + if (map2) + map = map2; + break; + default: + map = NULL; + } + return map; } @@ -685,13 +738,13 @@ struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map) static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state) { size_t size = sizeof(*dev) - sizeof(struct imsm_map) + - sizeof_imsm_map(get_imsm_map(dev, 0)); + sizeof_imsm_map(get_imsm_map(dev, MAP_0)); /* migrating means an additional map */ if (dev->vol.migr_state) - size += sizeof_imsm_map(get_imsm_map(dev, 1)); + size += sizeof_imsm_map(get_imsm_map(dev, MAP_1)); else if (migr_state) - size += sizeof_imsm_map(get_imsm_map(dev, 0)); + size += sizeof_imsm_map(get_imsm_map(dev, MAP_0)); return size; } @@ -745,9 +798,9 @@ static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index) /* * for second_map: - * == 0 get first map - * == 1 get second map - * == -1 than get map according to the current migr_state + * == MAP_0 get first map + * == MAP_1 get second map + * == MAP_X than get map according to the current migr_state */ static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot, @@ -818,7 +871,7 @@ static int count_memberships(struct dl *dl, struct intel_super *super) for (i = 0; i < super->anchor->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); if (get_imsm_disk_slot(map, dl->index) >= 0) memberships++; @@ -827,13 +880,87 @@ static int count_memberships(struct dl *dl, struct intel_super *super) return memberships; } +static __u32 imsm_min_reserved_sectors(struct intel_super *super); + +static int split_ull(unsigned long long n, __u32 *lo, __u32 *hi) +{ + if (lo == 0 || hi == 0) + return 1; + *lo = __le32_to_cpu((unsigned)n); + *hi = __le32_to_cpu((unsigned)(n >> 32)); + return 0; +} + +static unsigned long long join_u32(__u32 lo, __u32 hi) +{ + return (unsigned long long)__le32_to_cpu(lo) | + (((unsigned long long)__le32_to_cpu(hi)) << 32); +} + +static unsigned long long total_blocks(struct imsm_disk *disk) +{ + if (disk == NULL) + return 0; + return join_u32(disk->total_blocks_lo, disk->total_blocks_hi); +} + +static unsigned long long pba_of_lba0(struct imsm_map *map) +{ + if (map == NULL) + return 0; + return join_u32(map->pba_of_lba0_lo, map->pba_of_lba0_hi); +} + +static unsigned long long blocks_per_member(struct imsm_map *map) +{ + if (map == NULL) + return 0; + return join_u32(map->blocks_per_member_lo, map->blocks_per_member_hi); +} + +static unsigned long long num_data_stripes(struct imsm_map *map) +{ + if (map == NULL) + return 0; + return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi); +} + +static void set_total_blocks(struct imsm_disk *disk, unsigned long long n) +{ + split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi); +} + +static void set_pba_of_lba0(struct imsm_map *map, unsigned long long n) +{ + split_ull(n, &map->pba_of_lba0_lo, &map->pba_of_lba0_hi); +} + +static void set_blocks_per_member(struct imsm_map *map, unsigned long long n) +{ + split_ull(n, &map->blocks_per_member_lo, &map->blocks_per_member_hi); +} + +static void set_num_data_stripes(struct imsm_map *map, unsigned long long n) +{ + split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi); +} + static struct extent *get_extents(struct intel_super *super, struct dl *dl) { /* find a list of used extents on the given physical device */ struct extent *rv, *e; int i; int memberships = count_memberships(dl, super); - __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + __u32 reservation; + + /* trim the reserved area for spares, so they can join any array + * regardless of whether the OROM has assigned sectors from the + * IMSM_RESERVED_SECTORS region + */ + if (dl->index == -1) + reservation = imsm_min_reserved_sectors(super); + else + reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; rv = malloc(sizeof(struct extent) * (memberships + 1)); if (!rv) @@ -842,11 +969,11 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) for (i = 0; i < super->anchor->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); if (get_imsm_disk_slot(map, dl->index) >= 0) { - e->start = __le32_to_cpu(map->pba_of_lba0); - e->size = __le32_to_cpu(map->blocks_per_member); + e->start = pba_of_lba0(map); + e->size = blocks_per_member(map); e++; } } @@ -859,10 +986,9 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) */ if (memberships) { struct extent *last = &rv[memberships - 1]; - __u32 remainder; + unsigned long long remainder; - remainder = __le32_to_cpu(dl->disk.total_blocks) - - (last->start + last->size); + remainder = total_blocks(&dl->disk) - (last->start + last->size); /* round down to 1k block to satisfy precision of the kernel * 'size' interface */ @@ -873,7 +999,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) if (reservation > remainder) reservation = remainder; } - e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation; + e->start = total_blocks(&dl->disk) - reservation; e->size = 0; return rv; } @@ -902,7 +1028,7 @@ static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl) for (i = 0; e[i].size; i++) continue; - rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start; + rv = total_blocks(&dl->disk) - e[i].start; free(e); @@ -924,6 +1050,53 @@ static int is_failed(struct imsm_disk *disk) return (disk->status & FAILED_DISK) == FAILED_DISK; } +/* try to determine how much space is reserved for metadata from + * the last get_extents() entry on the smallest active disk, + * otherwise fallback to the default + */ +static __u32 imsm_min_reserved_sectors(struct intel_super *super) +{ + struct extent *e; + int i; + unsigned long long min_active; + __u32 remainder; + __u32 rv = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + struct dl *dl, *dl_min = NULL; + + if (!super) + return rv; + + min_active = 0; + for (dl = super->disks; dl; dl = dl->next) { + if (dl->index < 0) + continue; + unsigned long long blocks = total_blocks(&dl->disk); + if (blocks < min_active || min_active == 0) { + dl_min = dl; + min_active = blocks; + } + } + if (!dl_min) + return rv; + + /* find last lba used by subarrays on the smallest active disk */ + e = get_extents(super, dl_min); + if (!e) + return rv; + for (i = 0; e[i].size; i++) + continue; + + remainder = min_active - e[i].start; + free(e); + + /* to give priority to recovery we should not require full + IMSM_RESERVED_SECTORS from the spare */ + rv = MPB_SECTOR_CNT + NUM_BLOCKS_DIRTY_STRIPE_REGION; + + /* if real reservation is smaller use that value */ + return (remainder < rv) ? remainder : rv; +} + /* Return minimum size of a spare that can be used in this array*/ static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st) { @@ -950,11 +1123,15 @@ static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st) if (i > 0) rv = e[i-1].start + e[i-1].size; free(e); + /* add the amount of space needed for metadata */ - rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; + rv = rv + imsm_min_reserved_sectors(super); + return rv * 512; } +static int is_gen_migration(struct imsm_dev *dev); + #ifndef MDASSEMBLE static __u64 blocks_per_migr_unit(struct intel_super *super, struct imsm_dev *dev); @@ -966,8 +1143,8 @@ static void print_imsm_dev(struct intel_super *super, { __u64 sz; int slot, i; - struct imsm_map *map = get_imsm_map(dev, 0); - struct imsm_map *map2 = get_imsm_map(dev, 1); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + struct imsm_map *map2 = get_imsm_map(dev, MAP_1); __u32 ord; printf("\n"); @@ -983,14 +1160,14 @@ static void print_imsm_dev(struct intel_super *super, printf("\n"); printf(" Slots : ["); for (i = 0; i < map->num_members; i++) { - ord = get_imsm_ord_tbl_ent(dev, i, 0); + ord = get_imsm_ord_tbl_ent(dev, i, MAP_0); printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U"); } printf("]"); if (map2) { printf(" <-- ["); for (i = 0; i < map2->num_members; i++) { - ord = get_imsm_ord_tbl_ent(dev, i, 1); + ord = get_imsm_ord_tbl_ent(dev, i, MAP_1); printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U"); } printf("]"); @@ -1004,7 +1181,7 @@ static void print_imsm_dev(struct intel_super *super, printf("\n"); slot = get_imsm_disk_slot(map, disk_idx); if (slot >= 0) { - ord = get_imsm_ord_tbl_ent(dev, slot, -1); + ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X); printf(" This Slot : %d%s\n", slot, ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : ""); } else @@ -1014,13 +1191,13 @@ static void print_imsm_dev(struct intel_super *super, sz += __le32_to_cpu(dev->size_low); printf(" Array Size : %llu%s\n", (unsigned long long)sz, human_size(sz * 512)); - sz = __le32_to_cpu(map->blocks_per_member); + sz = blocks_per_member(map); printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz, human_size(sz * 512)); - printf(" Sector Offset : %u\n", - __le32_to_cpu(map->pba_of_lba0)); - printf(" Num Stripes : %u\n", - __le32_to_cpu(map->num_data_stripes)); + printf(" Sector Offset : %llu\n", + pba_of_lba0(map)); + printf(" Num Stripes : %llu\n", + num_data_stripes(map)); printf(" Chunk Size : %u KiB", __le16_to_cpu(map->blocks_per_strip) / 2); if (map2) @@ -1048,40 +1225,44 @@ static void print_imsm_dev(struct intel_super *super, printf("idle\n"); printf(" Map State : %s", map_state_str[map->map_state]); if (dev->vol.migr_state) { - struct imsm_map *map = get_imsm_map(dev, 1); + struct imsm_map *map = get_imsm_map(dev, MAP_1); printf(" <-- %s", map_state_str[map->map_state]); - printf("\n Checkpoint : %u (%llu)", - __le32_to_cpu(dev->vol.curr_migr_unit), - (unsigned long long)blocks_per_migr_unit(super, dev)); + printf("\n Checkpoint : %u ", + __le32_to_cpu(dev->vol.curr_migr_unit)); + if ((is_gen_migration(dev)) && ((slot > 1) || (slot < 0))) + printf("(N/A)"); + else + printf("(%llu)", (unsigned long long) + blocks_per_migr_unit(super, dev)); } printf("\n"); printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean"); } -static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved) +static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved) { - struct imsm_disk *disk = __get_imsm_disk(mpb, index); char str[MAX_RAID_SERIAL_LEN + 1]; __u64 sz; - if (index < 0 || !disk) + if (index < -1 || !disk) return; printf("\n"); snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); - printf(" Disk%02d Serial : %s\n", index, str); + if (index >= 0) + printf(" Disk%02d Serial : %s\n", index, str); + else + printf(" Disk Serial : %s\n", str); printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", is_configured(disk) ? " active" : "", is_failed(disk) ? " failed" : ""); printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id)); - sz = __le32_to_cpu(disk->total_blocks) - reserved; + sz = total_blocks(disk) - reserved; printf(" Usable Size : %llu%s\n", (unsigned long long)sz, human_size(sz * 512)); } -static int is_gen_migration(struct imsm_dev *dev); - void examine_migr_rec_imsm(struct intel_super *super) { struct migr_record *migr_rec = super->migr_rec; @@ -1090,11 +1271,19 @@ void examine_migr_rec_imsm(struct intel_super *super) for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = __get_imsm_dev(mpb, i); + struct imsm_map *map; + int slot = -1; + if (is_gen_migration(dev) == 0) continue; printf("\nMigration Record Information:"); - if (super->disks->index > 1) { + + /* first map under migration */ + map = get_imsm_map(dev, MAP_0); + if (map) + slot = get_imsm_disk_slot(map, super->disks->index); + if ((map == NULL) || (slot > 1) || (slot < 0)) { printf(" Empty\n "); printf("Examine one of first two disks in array\n"); break; @@ -1254,7 +1443,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" MPB Sectors : %d\n", mpb_sectors(mpb)); printf(" Disks : %d\n", mpb->num_disks); printf(" RAID Devices : %d\n", mpb->num_raid_devs); - print_imsm_disk(mpb, super->disks->index, reserved); + print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), super->disks->index, reserved); if (super->bbm_log) { struct bbm_log *log = super->bbm_log; @@ -1279,28 +1468,12 @@ static void examine_super_imsm(struct supertype *st, char *homehost) for (i = 0; i < mpb->num_disks; i++) { if (i == super->disks->index) continue; - print_imsm_disk(mpb, i, reserved); + print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved); } - for (dl = super->disks ; dl; dl = dl->next) { - struct imsm_disk *disk; - char str[MAX_RAID_SERIAL_LEN + 1]; - __u64 sz; - - if (dl->index >= 0) - continue; - disk = &dl->disk; - printf("\n"); - snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial); - printf(" Disk Serial : %s\n", str); - printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", - is_configured(disk) ? " active" : "", - is_failed(disk) ? " failed" : ""); - printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id)); - sz = __le32_to_cpu(disk->total_blocks) - reserved; - printf(" Usable Size : %llu%s\n", (unsigned long long)sz, - human_size(sz * 512)); - } + for (dl = super->disks; dl; dl = dl->next) + if (dl->index == -1) + print_imsm_disk(&dl->disk, -1, reserved); examine_migr_rec_imsm(super); } @@ -1527,8 +1700,8 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf); else printf(" ()\n"); + close(fd); } - close(fd); free(path); path = NULL; } @@ -1618,8 +1791,13 @@ static void print_imsm_capability(const struct imsm_orom *orom) imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "", imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "", imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : ""); + printf(" 2TB volumes :%s supported\n", + (orom->attr & IMSM_OROM_ATTR_2TB)?"":" not"); + printf(" 2TB disks :%s supported\n", + (orom->attr & IMSM_OROM_ATTR_2TB_DISK)?"":" not"); printf(" Max Disks : %d\n", orom->tds); - printf(" Max Volumes : %d\n", orom->vpa); + printf(" Max Volumes : %d per array, %d per controller\n", + orom->vpa, orom->vphba); return; } @@ -1795,7 +1973,7 @@ get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p) static __u32 migr_strip_blocks_resync(struct imsm_dev *dev) { /* migr_strip_size when repairing or initializing parity */ - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); __u32 chunk = __le32_to_cpu(map->blocks_per_strip); switch (get_imsm_raid_level(map)) { @@ -1813,7 +1991,7 @@ static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev) * this is different than migr_strip_size_resync(), but it's good * to be compatible */ - struct imsm_map *map = get_imsm_map(dev, 1); + struct imsm_map *map = get_imsm_map(dev, MAP_1); __u32 chunk = __le32_to_cpu(map->blocks_per_strip); switch (get_imsm_raid_level(map)) { @@ -1832,8 +2010,8 @@ static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev) static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev) { - struct imsm_map *lo = get_imsm_map(dev, 0); - struct imsm_map *hi = get_imsm_map(dev, 1); + struct imsm_map *lo = get_imsm_map(dev, MAP_0); + struct imsm_map *hi = get_imsm_map(dev, MAP_1); __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip); __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip); @@ -1842,11 +2020,11 @@ static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev) static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev) { - struct imsm_map *lo = get_imsm_map(dev, 0); + struct imsm_map *lo = get_imsm_map(dev, MAP_0); int level = get_imsm_raid_level(lo); if (level == 1 || level == 10) { - struct imsm_map *hi = get_imsm_map(dev, 1); + struct imsm_map *hi = get_imsm_map(dev, MAP_1); return hi->num_domains; } else @@ -1862,9 +2040,11 @@ static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map) switch (get_imsm_raid_level(map)) { case 0: + return map->num_members; + break; case 1: case 10: - return map->num_members; + return map->num_members/2; case 5: return map->num_members - 1; default: @@ -1875,7 +2055,7 @@ static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map) static __u32 parity_segment_depth(struct imsm_dev *dev) { - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); __u32 chunk = __le32_to_cpu(map->blocks_per_strip); switch(get_imsm_raid_level(map)) { @@ -1891,7 +2071,7 @@ static __u32 parity_segment_depth(struct imsm_dev *dev) static __u32 map_migr_block(struct imsm_dev *dev, __u32 block) { - struct imsm_map *map = get_imsm_map(dev, 1); + struct imsm_map *map = get_imsm_map(dev, MAP_1); __u32 chunk = __le32_to_cpu(map->blocks_per_strip); __u32 strip = block / chunk; @@ -1930,7 +2110,7 @@ static __u64 blocks_per_migr_unit(struct intel_super *super, case MIGR_VERIFY: case MIGR_REPAIR: case MIGR_INIT: { - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); __u32 stripes_per_unit; __u32 blocks_per_unit; __u32 parity_depth; @@ -1946,7 +2126,7 @@ static __u64 blocks_per_migr_unit(struct intel_super *super, */ stripes_per_unit = num_stripes_per_unit_resync(dev); migr_chunk = migr_strip_blocks_resync(dev); - disks = imsm_num_data_members(dev, 0); + disks = imsm_num_data_members(dev, MAP_0); blocks_per_unit = stripes_per_unit * migr_chunk * disks; stripe = __le16_to_cpu(map->blocks_per_strip) * disks; segment = blocks_per_unit / stripe; @@ -2000,13 +2180,14 @@ static int read_imsm_migr_rec(int fd, struct intel_super *super) unsigned long long dsize; get_dev_size(fd, NULL, &dsize); - if (lseek64(fd, dsize - 512, SEEK_SET) < 0) { + if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) { fprintf(stderr, Name ": Cannot seek to anchor block: %s\n", strerror(errno)); goto out; } - if (read(fd, super->migr_rec_buf, 512) != 512) { + if (read(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) != + MIGR_REC_BUF_SIZE) { fprintf(stderr, Name ": Cannot read migr record block: %s\n", strerror(errno)); @@ -2018,6 +2199,19 @@ out: return ret_val; } +static struct imsm_dev *imsm_get_device_during_migration( + struct intel_super *super) +{ + + struct intel_dev *dv; + + for (dv = super->devlist; dv; dv = dv->next) { + if (is_gen_migration(dv->dev)) + return dv->dev; + } + return NULL; +} + /******************************************************************************* * Function: load_imsm_migr_rec * Description: Function reads imsm migration record (it is stored at the last @@ -2028,6 +2222,7 @@ out: * Returns: * 0 : success * -1 : fail + * -2 : no migration in progress ******************************************************************************/ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info) { @@ -2036,13 +2231,31 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info) char nm[30]; int retval = -1; int fd = -1; + struct imsm_dev *dev; + struct imsm_map *map = NULL; + int slot = -1; + + /* find map under migration */ + dev = imsm_get_device_during_migration(super); + /* nothing to load,no migration in progress? + */ + if (dev == NULL) + return -2; + map = get_imsm_map(dev, MAP_0); if (info) { for (sd = info->devs ; sd ; sd = sd->next) { + /* skip spare and failed disks + */ + if (sd->disk.raid_disk < 0) + continue; /* read only from one of the first two slots */ - if ((sd->disk.raid_disk > 1) || - (sd->disk.raid_disk < 0)) + if (map) + slot = get_imsm_disk_slot(map, + sd->disk.raid_disk); + if ((map == NULL) || (slot > 1) || (slot < 0)) continue; + sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); fd = dev_open(nm, O_RDONLY); if (fd >= 0) @@ -2051,8 +2264,14 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info) } if (fd < 0) { for (dl = super->disks; dl; dl = dl->next) { + /* skip spare and failed disks + */ + if (dl->index < 0) + continue; /* read only from one of the first two slots */ - if (dl->index > 1) + if (map) + slot = get_imsm_disk_slot(map, dl->index); + if ((map == NULL) || (slot > 1) || (slot < 0)) continue; sprintf(nm, "%d:%d", dl->major, dl->minor); fd = dev_open(nm, O_RDONLY); @@ -2136,23 +2355,45 @@ static int write_imsm_migr_rec(struct supertype *st) struct dl *sd; int len; struct imsm_update_general_migration_checkpoint *u; + struct imsm_dev *dev; + struct imsm_map *map = NULL; + + /* find map under migration */ + dev = imsm_get_device_during_migration(super); + /* if no migration, write buffer anyway to clear migr_record + * on disk based on first available device + */ + if (dev == NULL) + dev = get_imsm_dev(super, super->current_vol < 0 ? 0 : + super->current_vol); + + map = get_imsm_map(dev, MAP_0); for (sd = super->disks ; sd ; sd = sd->next) { + int slot = -1; + + /* skip failed and spare devices */ + if (sd->index < 0) + continue; /* write to 2 first slots only */ - if ((sd->index < 0) || (sd->index > 1)) + if (map) + slot = get_imsm_disk_slot(map, sd->index); + if ((map == NULL) || (slot > 1) || (slot < 0)) continue; + sprintf(nm, "%d:%d", sd->major, sd->minor); fd = dev_open(nm, O_RDWR); if (fd < 0) continue; get_dev_size(fd, NULL, &dsize); - if (lseek64(fd, dsize - 512, SEEK_SET) < 0) { + if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) { fprintf(stderr, Name ": Cannot seek to anchor block: %s\n", strerror(errno)); goto out; } - if (write(fd, super->migr_rec_buf, 512) != 512) { + if (write(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) != + MIGR_REC_BUF_SIZE) { fprintf(stderr, Name ": Cannot write migr record block: %s\n", strerror(errno)); @@ -2190,17 +2431,66 @@ static int write_imsm_migr_rec(struct supertype *st) } #endif /* MDASSEMBLE */ +/* spare/missing disks activations are not allowe when + * array/container performs reshape operation, because + * all arrays in container works on the same disks set + */ +int imsm_reshape_blocks_arrays_changes(struct intel_super *super) +{ + int rv = 0; + struct intel_dev *i_dev; + struct imsm_dev *dev; + + /* check whole container + */ + for (i_dev = super->devlist; i_dev; i_dev = i_dev->next) { + dev = i_dev->dev; + if (is_gen_migration(dev)) { + /* No repair during any migration in container + */ + rv = 1; + break; + } + } + return rv; +} +static unsigned long long imsm_component_size_aligment_check(int level, + int chunk_size, + unsigned long long component_size) +{ + unsigned int component_size_alligment; + + /* check component size aligment + */ + component_size_alligment = component_size % (chunk_size/512); + + dprintf("imsm_component_size_aligment_check(Level: %i, " + "chunk_size = %i, component_size = %llu), " + "component_size_alligment = %u\n", + level, chunk_size, component_size, + component_size_alligment); + + if (component_size_alligment && (level != 1) && (level != UnSet)) { + dprintf("imsm: reported component size alligned from %llu ", + component_size); + component_size -= component_size_alligment; + dprintf("to %llu (%i).\n", + component_size, component_size_alligment); + } + + return component_size; +} + static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap) { struct intel_super *super = st->sb; struct migr_record *migr_rec = super->migr_rec; struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); - struct imsm_map *map = get_imsm_map(dev, 0); - struct imsm_map *prev_map = get_imsm_map(dev, 1); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + struct imsm_map *prev_map = get_imsm_map(dev, MAP_1); struct imsm_map *map_to_analyse = map; struct dl *dl; char *devname; - unsigned int component_size_alligment; int map_disks = info->array.raid_disks; memset(info, 0, sizeof(*info)); @@ -2222,7 +2512,9 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, info->custom_array_size = __le32_to_cpu(dev->size_high); info->custom_array_size <<= 32; info->custom_array_size |= __le32_to_cpu(dev->size_low); - if (prev_map && map->map_state == prev_map->map_state) { + info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb); + + if (is_gen_migration(dev)) { info->reshape_active = 1; info->new_level = get_imsm_raid_level(map); info->new_layout = imsm_level_to_layout(info->new_level); @@ -2232,7 +2524,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, /* this needs to be applied to every array * in the container. */ - info->reshape_active = 2; + info->reshape_active = CONTAINER_RESHAPE; } /* We shape information that we give to md might have to be * modify to cope with md's requirement for reshaping arrays. @@ -2278,31 +2570,22 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, dl->index); } - info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0); - info->component_size = - __le32_to_cpu(map_to_analyse->blocks_per_member); + info->data_offset = pba_of_lba0(map_to_analyse); + info->component_size = blocks_per_member(map_to_analyse); - /* check component size aligment - */ - component_size_alligment = - info->component_size % (info->array.chunk_size/512); - - if (component_size_alligment && - (info->array.level != 1) && (info->array.level != UnSet)) { - dprintf("imsm: reported component size alligned from %llu ", - info->component_size); - info->component_size -= component_size_alligment; - dprintf("to %llu (%i).\n", - info->component_size, component_size_alligment); - } + info->component_size = imsm_component_size_aligment_check( + info->array.level, + info->array.chunk_size, + info->component_size); memset(info->uuid, 0, sizeof(info->uuid)); info->recovery_start = MaxSector; info->reshape_progress = 0; info->resync_start = MaxSector; - if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED || - dev->vol.dirty) { + if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED || + dev->vol.dirty) && + imsm_reshape_blocks_arrays_changes(super) == 0) { info->resync_start = 0; } if (dev->vol.migr_state) { @@ -2338,9 +2621,9 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, (unsigned long long)blocks_per_unit, info->reshape_progress); - used_disks = imsm_num_data_members(dev, 1); + used_disks = imsm_num_data_members(dev, MAP_1); if (used_disks > 0) { - array_blocks = map->blocks_per_member * + array_blocks = blocks_per_member(map) * used_disks; /* round array size down to closest MB */ @@ -2383,7 +2666,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, dmap[i] = 0; if (i < info->array.raid_disks) { struct imsm_disk *dsk; - j = get_imsm_disk_idx(dev, i, -1); + j = get_imsm_disk_idx(dev, i, MAP_X); dsk = get_imsm_disk(super, j); if (dsk && (dsk->status & CONFIGURED_DISK)) dmap[i] = 1; @@ -2392,8 +2675,30 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, } } -static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed); -static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev); +static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, + int failed, int look_in_map); + +static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev, + int look_in_map); + + +#ifndef MDASSEMBLE +static void manage_second_map(struct intel_super *super, struct imsm_dev *dev) +{ + if (is_gen_migration(dev)) { + int failed; + __u8 map_state; + struct imsm_map *map2 = get_imsm_map(dev, MAP_1); + + failed = imsm_count_failed(super, dev, MAP_1); + map_state = imsm_check_degraded(super, dev, failed, MAP_1); + if (map2->map_state != map_state) { + map2->map_state = map_state; + super->updates_pending++; + } + } +} +#endif static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index) { @@ -2443,6 +2748,7 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * info->disk.state = 0; info->name[0] = 0; info->recovery_start = MaxSector; + info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb); /* do we have the all the insync disks that we expect? */ mpb = super->anchor; @@ -2453,15 +2759,15 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * struct imsm_map *map; __u8 state; - failed = imsm_count_failed(super, dev); - state = imsm_check_degraded(super, dev, failed); - map = get_imsm_map(dev, dev->vol.migr_state); + failed = imsm_count_failed(super, dev, MAP_0); + state = imsm_check_degraded(super, dev, failed, MAP_0); + map = get_imsm_map(dev, MAP_0); /* any newly missing disks? * (catches single-degraded vs double-degraded) */ for (j = 0; j < map->num_members; j++) { - __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1); + __u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0); __u32 idx = ord_to_idx(ord); if (!(ord & IMSM_ORD_REBUILD) && @@ -2478,7 +2784,17 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * enough = 0; else /* we're normal, or already degraded */ enough = 1; - + if (is_gen_migration(dev) && missing) { + /* during general migration we need all disks + * that process is running on. + * No new missing disk is allowed. + */ + max_enough = -1; + enough = -1; + /* no more checks necessary + */ + break; + } /* in the missing/failed disk case check to see * if at least one array is runnable */ @@ -2491,7 +2807,7 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * __u32 reserved = imsm_reserved_sectors(super, super->disks); disk = &super->disks->disk; - info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved; + info->data_offset = total_blocks(&super->disks->disk) - reserved; info->component_size = reserved; info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0; /* we don't change info->disk.raid_disk here because @@ -2602,25 +2918,30 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info, mpb = super->anchor; - if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private) - rv = -1; - else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) { - mpb->orig_family_num = *((__u32 *) info->update_private); - rv = 0; - } else if (strcmp(update, "uuid") == 0) { - __u32 *new_family = malloc(sizeof(*new_family)); - - /* update orig_family_number with the incoming random - * data, report the new effective uuid, and store the - * new orig_family_num for future updates. + if (strcmp(update, "uuid") == 0) { + /* We take this to mean that the family_num should be updated. + * However that is much smaller than the uuid so we cannot really + * allow an explicit uuid to be given. And it is hard to reliably + * know if one was. + * So if !uuid_set we know the current uuid is random and just used + * the first 'int' and copy it to the other 3 positions. + * Otherwise we require the 4 'int's to be the same as would be the + * case if we are using a random uuid. So an explicit uuid will be + * accepted as long as all for ints are the same... which shouldn't hurt */ - if (new_family) { - memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32)); - uuid_from_super_imsm(st, info->uuid); - *new_family = mpb->orig_family_num; - info->update_private = new_family; + if (!uuid_set) { + info->uuid[1] = info->uuid[2] = info->uuid[3] = info->uuid[0]; rv = 0; + } else { + if (info->uuid[0] != info->uuid[1] || + info->uuid[1] != info->uuid[2] || + info->uuid[2] != info->uuid[3]) + rv = -1; + else + rv = 0; } + if (rv == 0) + mpb->orig_family_num = info->uuid[0]; } else if (strcmp(update, "assemble") == 0) rv = 0; else @@ -2788,14 +3109,16 @@ static void fd2devname(int fd, char *name) sprintf(path, "/sys/dev/block/%d:%d", major(st.st_rdev), minor(st.st_rdev)); - rv = readlink(path, dname, sizeof(dname)); + rv = readlink(path, dname, sizeof(dname)-1); if (rv <= 0) return; - + dname[rv] = '\0'; nm = strrchr(dname, '/'); - nm++; - snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm); + if (nm) { + nm++; + snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm); + } } extern int scsi_get_serial(int fd, void *buf, size_t buf_len); @@ -2880,7 +3203,6 @@ static void serialcpy(__u8 *dest, __u8 *src) strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN); } -#ifndef MDASSEMBLE static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super) { struct dl *dl; @@ -2891,7 +3213,6 @@ static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super) return dl; } -#endif static struct imsm_disk * __serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx) @@ -2988,12 +3309,12 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super, __u8 to_state, int migr_type) { struct imsm_map *dest; - struct imsm_map *src = get_imsm_map(dev, 0); + struct imsm_map *src = get_imsm_map(dev, MAP_0); dev->vol.migr_state = 1; set_migr_type(dev, migr_type); dev->vol.curr_migr_unit = 0; - dest = get_imsm_map(dev, 1); + dest = get_imsm_map(dev, MAP_1); /* duplicate and then set the target end state in map[0] */ memcpy(dest, src, sizeof_imsm_map(src)); @@ -3015,10 +3336,12 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super, src->map_state = to_state; } -static void end_migration(struct imsm_dev *dev, __u8 map_state) +static void end_migration(struct imsm_dev *dev, struct intel_super *super, + __u8 map_state) { - struct imsm_map *map = get_imsm_map(dev, 0); - struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state == 0 ? + MAP_0 : MAP_1); int i, j; /* merge any IMSM_ORD_REBUILD bits that were not successfully @@ -3026,19 +3349,31 @@ static void end_migration(struct imsm_dev *dev, __u8 map_state) * * FIXME add support for raid-level-migration */ - for (i = 0; i < prev->num_members; i++) - for (j = 0; j < map->num_members; j++) - /* during online capacity expansion - * disks position can be changed if takeover is used - */ - if (ord_to_idx(map->disk_ord_tbl[j]) == - ord_to_idx(prev->disk_ord_tbl[i])) { - map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i]; - break; - } + if ((map_state != map->map_state) && (is_gen_migration(dev) == 0) && + (prev->map_state != IMSM_T_STATE_UNINITIALIZED)) { + /* when final map state is other than expected + * merge maps (not for migration) + */ + int failed; + + for (i = 0; i < prev->num_members; i++) + for (j = 0; j < map->num_members; j++) + /* during online capacity expansion + * disks position can be changed + * if takeover is used + */ + if (ord_to_idx(map->disk_ord_tbl[j]) == + ord_to_idx(prev->disk_ord_tbl[i])) { + map->disk_ord_tbl[j] |= + prev->disk_ord_tbl[i]; + break; + } + failed = imsm_count_failed(super, dev, MAP_0); + map_state = imsm_check_degraded(super, dev, failed, MAP_0); + } dev->vol.migr_state = 0; - dev->vol.migr_type = 0; + set_migr_type(dev, 0); dev->vol.curr_migr_unit = 0; map->map_state = map_state; } @@ -3061,7 +3396,7 @@ static int parse_raid_devices(struct intel_super *super) len_migr = sizeof_imsm_dev(dev_iter, 1); if (len_migr > len) space_needed += len_migr - len; - + dv = malloc(sizeof(*dv)); if (!dv) return 1; @@ -3097,7 +3432,7 @@ static int parse_raid_devices(struct intel_super *super) super->buf = buf; super->len = len; } - + return 0; } @@ -3138,9 +3473,9 @@ int check_mpb_migr_compatibility(struct intel_super *super) dev_iter->vol.migr_state == 1 && dev_iter->vol.migr_type == MIGR_GEN_MIGR) { /* This device is migrating */ - map0 = get_imsm_map(dev_iter, 0); - map1 = get_imsm_map(dev_iter, 1); - if (map0->pba_of_lba0 != map1->pba_of_lba0) + map0 = get_imsm_map(dev_iter, MAP_0); + map1 = get_imsm_map(dev_iter, MAP_1); + if (pba_of_lba0(map0) != pba_of_lba0(map1)) /* migration optimization area was used */ return -1; if (migr_rec->ascending_migr == 0 @@ -3225,12 +3560,13 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) sectors = mpb_sectors(anchor) - 1; free(anchor); - if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) { + if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) { fprintf(stderr, Name ": %s could not allocate migr_rec buffer\n", __func__); free(super->buf); return 2; } + super->clean_migration_record_by_mdmon = 0; if (!sectors) { check_sum = __gen_imsm_checksum(super->anchor); @@ -3285,6 +3621,32 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) static int read_imsm_migr_rec(int fd, struct intel_super *super); +/* clears hi bits in metadata if MPB_ATTRIB_2TB_DISK not set */ +static void clear_hi(struct intel_super *super) +{ + struct imsm_super *mpb = super->anchor; + int i, n; + if (mpb->attributes & MPB_ATTRIB_2TB_DISK) + return; + for (i = 0; i < mpb->num_disks; ++i) { + struct imsm_disk *disk = &mpb->disk[i]; + disk->total_blocks_hi = 0; + } + for (i = 0; i < mpb->num_raid_devs; ++i) { + struct imsm_dev *dev = get_imsm_dev(super, i); + if (!dev) + return; + for (n = 0; n < 2; ++n) { + struct imsm_map *map = get_imsm_map(dev, n); + if (!map) + continue; + map->pba_of_lba0_hi = 0; + map->blocks_per_member_hi = 0; + map->num_data_stripes_hi = 0; + } + } +} + static int load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd) { @@ -3297,7 +3659,7 @@ load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd if (err) return err; err = parse_raid_devices(super); - + clear_hi(super); return err; } @@ -3388,7 +3750,7 @@ static struct intel_super *alloc_super(void) if (super) { memset(super, 0, sizeof(*super)); super->current_vol = -1; - super->create_offset = ~((__u32 ) 0); + super->create_offset = ~((unsigned long long) 0); } return super; } @@ -3449,7 +3811,6 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de return 0; } -#ifndef MDASSEMBLE /* find_missing - helper routine for load_super_imsm_all that identifies * disks that have disappeared from the system. This routine relies on * the mpb being uptodate, which it is at load time. @@ -3485,6 +3846,7 @@ static int find_missing(struct intel_super *super) return 0; } +#ifndef MDASSEMBLE static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list) { struct intel_disk *idisk = disk_list; @@ -3801,70 +4163,36 @@ imsm_thunderdome(struct intel_super **super_list, int len) return champion; } + +static int +get_sra_super_block(int fd, struct intel_super **super_list, char *devname, int *max, int keep_fd); +static int get_super_block(struct intel_super **super_list, int devnum, char *devname, + int major, int minor, int keep_fd); +static int +get_devlist_super_block(struct md_list *devlist, struct intel_super **super_list, + int *max, int keep_fd); + + static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, - char *devname) + char *devname, struct md_list *devlist, + int keep_fd) { - struct mdinfo *sra; struct intel_super *super_list = NULL; struct intel_super *super = NULL; - int devnum = fd2devnum(fd); - struct mdinfo *sd; - int retry; int err = 0; - int i; - - /* check if 'fd' an opened container */ - sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); - if (!sra) - return 1; + int i = 0; - if (sra->array.major_version != -1 || - sra->array.minor_version != -2 || - strcmp(sra->text_version, "imsm") != 0) { - err = 1; - goto error; - } - /* load all mpbs */ - for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) { - struct intel_super *s = alloc_super(); - char nm[32]; - int dfd; - int rv; - - err = 1; - if (!s) - goto error; - s->next = super_list; - super_list = s; - - err = 2; - sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); - dfd = dev_open(nm, O_RDWR); - if (dfd < 0) - goto error; - - rv = find_intel_hba_capability(dfd, s, devname); - /* no orom/efi or non-intel hba of the disk */ - if (rv != 0) - goto error; - - err = load_and_parse_mpb(dfd, s, NULL, 1); - - /* retry the load if we might have raced against mdmon */ - if (err == 3 && mdmon_running(devnum)) - for (retry = 0; retry < 3; retry++) { - usleep(3000); - err = load_and_parse_mpb(dfd, s, NULL, 1); - if (err != 3) - break; - } - if (err) - goto error; - } - - /* all mpbs enter, maybe one leaves */ - super = imsm_thunderdome(&super_list, i); - if (!super) { + if (fd >= 0) + /* 'fd' is an opened container */ + err = get_sra_super_block(fd, &super_list, devname, &i, keep_fd); + else + /* get super block from devlist devices */ + err = get_devlist_super_block(devlist, &super_list, &i, keep_fd); + if (err) + goto error; + /* all mpbs enter, maybe one leaves */ + super = imsm_thunderdome(&super_list, i); + if (!super) { err = 1; goto error; } @@ -3877,13 +4205,16 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, /* load migration record */ err = load_imsm_migr_rec(super, NULL); - if (err) { + if (err == -1) { + /* migration is in progress, + * but migr_rec cannot be loaded, + */ err = 4; goto error; } /* Check migration compatibility */ - if (check_mpb_migr_compatibility(super) != 0) { + if ((err == 0) && (check_mpb_migr_compatibility(super) != 0)) { fprintf(stderr, Name ": Unsupported migration detected"); if (devname) fprintf(stderr, " on %s\n", devname); @@ -3903,13 +4234,16 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, super_list = super_list->next; free_imsm(s); } - sysfs_free(sra); + if (err) return err; *sbp = super; - st->container_dev = devnum; + if (fd >= 0) + st->container_dev = fd2devnum(fd); + else + st->container_dev = NoMdDev; if (err == 0 && st->ss == NULL) { st->ss = &super_imsm; st->minor_version = 0; @@ -3918,9 +4252,148 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, return 0; } + +static int +get_devlist_super_block(struct md_list *devlist, struct intel_super **super_list, + int *max, int keep_fd) +{ + struct md_list *tmpdev; + int err = 0; + int i = 0; + + for (i = 0, tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) { + if (tmpdev->used != 1) + continue; + if (tmpdev->container == 1) { + int lmax = 0; + int fd = dev_open(tmpdev->devname, O_RDONLY|O_EXCL); + if (fd < 0) { + fprintf(stderr, Name ": cannot open device %s: %s\n", + tmpdev->devname, strerror(errno)); + err = 8; + goto error; + } + err = get_sra_super_block(fd, super_list, + tmpdev->devname, &lmax, + keep_fd); + i += lmax; + close(fd); + if (err) { + err = 7; + goto error; + } + } else { + int major = major(tmpdev->st_rdev); + int minor = minor(tmpdev->st_rdev); + err = get_super_block(super_list, + -1, + tmpdev->devname, + major, minor, + keep_fd); + i++; + if (err) { + err = 6; + goto error; + } + } + } + error: + *max = i; + return err; +} + +static int get_super_block(struct intel_super **super_list, int devnum, char *devname, + int major, int minor, int keep_fd) +{ + struct intel_super*s = NULL; + char nm[32]; + int dfd = -1; + int rv; + int err = 0; + int retry; + + s = alloc_super(); + if (!s) { + err = 1; + goto error; + } + + sprintf(nm, "%d:%d", major, minor); + dfd = dev_open(nm, O_RDWR); + if (dfd < 0) { + err = 2; + goto error; + } + + rv = find_intel_hba_capability(dfd, s, devname); + /* no orom/efi or non-intel hba of the disk */ + if (rv != 0) { + err = 4; + goto error; + } + + err = load_and_parse_mpb(dfd, s, NULL, keep_fd); + + /* retry the load if we might have raced against mdmon */ + if (err == 3 && (devnum != -1) && mdmon_running(devnum)) + for (retry = 0; retry < 3; retry++) { + usleep(3000); + err = load_and_parse_mpb(dfd, s, NULL, keep_fd); + if (err != 3) + break; + } + error: + if (!err) { + s->next = *super_list; + *super_list = s; + } else { + if (s) + free(s); + if (dfd) + close(dfd); + } + if ((dfd >= 0) && (!keep_fd)) + close(dfd); + return err; + +} + +static int +get_sra_super_block(int fd, struct intel_super **super_list, char *devname, int *max, int keep_fd) +{ + struct mdinfo *sra; + int devnum; + struct mdinfo *sd; + int err = 0; + int i = 0; + sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); + if (!sra) + return 1; + + if (sra->array.major_version != -1 || + sra->array.minor_version != -2 || + strcmp(sra->text_version, "imsm") != 0) { + err = 1; + goto error; + } + /* load all mpbs */ + devnum = fd2devnum(fd); + for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) { + if (get_super_block(super_list, devnum, devname, + sd->disk.major, sd->disk.minor, keep_fd) != 0) { + err = 7; + goto error; + } + } + error: + sysfs_free(sra); + *max = i; + return err; +} + static int load_container_imsm(struct supertype *st, int fd, char *devname) { - return load_super_imsm_all(st, fd, &st->sb, devname); + return load_super_imsm_all(st, fd, &st->sb, devname, NULL, 1); } #endif @@ -3997,22 +4470,13 @@ static __u16 info_to_blocks_per_strip(mdu_array_info_t *info) return info->chunk_size >> 9; } -static __u32 info_to_num_data_stripes(mdu_array_info_t *info, int num_domains) -{ - __u32 num_stripes; - - num_stripes = (info->size * 2) / info_to_blocks_per_strip(info); - num_stripes /= num_domains; - - return num_stripes; -} - -static __u32 info_to_blocks_per_member(mdu_array_info_t *info) +static unsigned long long info_to_blocks_per_member(mdu_array_info_t *info, + unsigned long long size) { if (info->level == 1) - return info->size * 2; + return size * 2; else - return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1); + return (size * 2) & ~(info_to_blocks_per_strip(info) - 1); } static void imsm_update_version_info(struct intel_super *super) @@ -4026,7 +4490,7 @@ static void imsm_update_version_info(struct intel_super *super) for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); if (__le32_to_cpu(dev->size_high) > 0) mpb->attributes |= MPB_ATTRIB_2TB; @@ -4103,7 +4567,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, int i; unsigned long long array_blocks; size_t size_old, size_new; - __u32 num_data_stripes; + unsigned long long num_data_stripes; if (super->orom && mpb->num_raid_devs >= super->orom->vpa) { fprintf(stderr, Name": This imsm-container already has the " @@ -4122,12 +4586,14 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, fprintf(stderr, Name": could not allocate new mpb\n"); return 0; } - if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) { + if (posix_memalign(&super->migr_rec_buf, 512, + MIGR_REC_BUF_SIZE) != 0) { fprintf(stderr, Name ": %s could not allocate migr_rec buffer\n", __func__); free(super->buf); free(super); + free(mpb_new); return 0; } memcpy(mpb_new, mpb, size_old); @@ -4138,12 +4604,40 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, memset(mpb_new + size_old, 0, size_round - size_old); } super->current_vol = idx; - /* when creating the first raid device in this container set num_disks - * to zero, i.e. delete this spare and add raid member devices in - * add_to_super_imsm_volume() + + /* handle 'failed_disks' by either: + * a) create dummy disk entries in the table if this the first + * volume in the array. We add them here as this is the only + * opportunity to add them. add_to_super_imsm_volume() + * handles the non-failed disks and continues incrementing + * mpb->num_disks. + * b) validate that 'failed_disks' matches the current number + * of missing disks if the container is populated */ - if (super->current_vol == 0) + if (super->current_vol == 0) { mpb->num_disks = 0; + for (i = 0; i < info->failed_disks; i++) { + struct imsm_disk *disk; + + mpb->num_disks++; + disk = __get_imsm_disk(mpb, i); + disk->status = CONFIGURED_DISK | FAILED_DISK; + disk->scsi_id = __cpu_to_le32(~(__u32)0); + snprintf((char *) disk->serial, MAX_RAID_SERIAL_LEN, + "missing:%d", i); + } + find_missing(super); + } else { + int missing = 0; + struct dl *d; + + for (d = super->missing; d; d = d->next) + missing++; + if (info->failed_disks > missing) { + fprintf(stderr, Name": unable to add 'missing' disk to container\n"); + return 0; + } + } if (!check_name(super, name, 0)) return 0; @@ -4160,12 +4654,9 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, } strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN); - if (info->level == 1) - array_blocks = info_to_blocks_per_member(info); - else - array_blocks = calc_array_size(info->level, info->raid_disks, + array_blocks = calc_array_size(info->level, info->raid_disks, info->layout, info->chunk_size, - info->size*2); + size * 2); /* round array size down to closest MB */ array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT; @@ -4175,15 +4666,18 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, vol = &dev->vol; vol->migr_state = 0; set_migr_type(dev, MIGR_INIT); - vol->dirty = 0; + vol->dirty = !info->state; vol->curr_migr_unit = 0; - map = get_imsm_map(dev, 0); - map->pba_of_lba0 = __cpu_to_le32(super->create_offset); - map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info)); + map = get_imsm_map(dev, MAP_0); + set_pba_of_lba0(map, super->create_offset); + set_blocks_per_member(map, info_to_blocks_per_member(info, size)); map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info)); map->failed_disk_num = ~0; - map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED : - IMSM_T_STATE_NORMAL; + if (info->level > 0) + map->map_state = IMSM_T_STATE_UNINITIALIZED; + else + map->map_state = info->failed_disks ? IMSM_T_STATE_FAILED : + IMSM_T_STATE_NORMAL; map->ddf = 1; if (info->level == 1 && info->raid_disks > 2) { @@ -4203,8 +4697,10 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, else map->num_domains = 1; - num_data_stripes = info_to_num_data_stripes(info, map->num_domains); - map->num_data_stripes = __cpu_to_le32(num_data_stripes); + /* info->size is only int so use the 'size' parameter instead */ + num_data_stripes = (size * 2) / info_to_blocks_per_strip(info); + num_data_stripes /= map->num_domains; + set_num_data_stripes(map, num_data_stripes); map->num_members = info->raid_disks; for (i = 0; i < map->num_members; i++) { @@ -4258,7 +4754,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, ": %s could not allocate superblock\n", __func__); return 0; } - if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) { + if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) { fprintf(stderr, Name ": %s could not allocate migr_rec buffer\n", __func__); free(super->buf); @@ -4291,13 +4787,14 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, { struct intel_super *super = st->sb; struct imsm_super *mpb = super->anchor; - struct dl *dl; + struct imsm_disk *_disk; struct imsm_dev *dev; struct imsm_map *map; + struct dl *dl, *df; int slot; dev = get_imsm_dev(super, super->current_vol); - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); if (! (dk->state & (1<index); if (slot >= 0 && - (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) { + (get_imsm_ord_tbl_ent(dev, slot, MAP_X) & IMSM_ORD_REBUILD) == 0) { fprintf(stderr, Name ": %s has been included in this array twice\n", devname); return 1; @@ -4340,12 +4837,54 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, set_imsm_ord_tbl_ent(map, dk->raid_disk, dl->index); dl->disk.status = CONFIGURED_DISK; + /* update size of 'missing' disks to be at least as large as the + * largest acitve member (we only have dummy missing disks when + * creating the first volume) + */ + if (super->current_vol == 0) { + for (df = super->missing; df; df = df->next) { + if (total_blocks(&dl->disk) > total_blocks(&df->disk)) + set_total_blocks(&df->disk, total_blocks(&dl->disk)); + _disk = __get_imsm_disk(mpb, df->index); + *_disk = df->disk; + } + } + + /* refresh unset/failed slots to point to valid 'missing' entries */ + for (df = super->missing; df; df = df->next) + for (slot = 0; slot < mpb->num_disks; slot++) { + __u32 ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X); + + if ((ord & IMSM_ORD_REBUILD) == 0) + continue; + set_imsm_ord_tbl_ent(map, slot, df->index | IMSM_ORD_REBUILD); + if (is_gen_migration(dev)) { + struct imsm_map *map2 = get_imsm_map(dev, + MAP_1); + int slot2 = get_imsm_disk_slot(map2, df->index); + if ((slot2 < map2->num_members) && + (slot2 >= 0)) { + __u32 ord2 = get_imsm_ord_tbl_ent(dev, + slot2, + MAP_1); + if ((unsigned)df->index == + ord_to_idx(ord2)) + set_imsm_ord_tbl_ent(map2, + slot2, + df->index | + IMSM_ORD_REBUILD); + } + } + dprintf("set slot:%d to missing disk:%d\n", slot, df->index); + break; + } + /* if we are creating the first raid device update the family number */ if (super->current_vol == 0) { __u32 sum; struct imsm_dev *_dev = __get_imsm_dev(mpb, 0); - struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index); + _disk = __get_imsm_disk(mpb, dl->index); if (!_dev || !_disk) { fprintf(stderr, Name ": BUG mpb setup error\n"); return 1; @@ -4361,6 +4900,37 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, return 0; } +/* mark_spare() + * Function marks disk as spare and restores disk serial + * in case it was previously marked as failed by takeover operation + * reruns: + * -1 : critical error + * 0 : disk is marked as spare but serial is not set + * 1 : success + */ +int mark_spare(struct dl *disk) +{ + __u8 serial[MAX_RAID_SERIAL_LEN]; + int ret_val = -1; + + if (!disk) + return ret_val; + + ret_val = 0; + if (!imsm_read_serial(disk->fd, NULL, serial)) { + /* Restore disk serial number, because takeover marks disk + * as failed and adds to serial ':0' before it becomes + * a spare disk. + */ + serialcpy(disk->serial, serial); + serialcpy(disk->disk.serial, serial); + ret_val = 1; + } + disk->disk.status = SPARE_DISK; + disk->index = -1; + + return ret_val; +} static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, int fd, char *devname) @@ -4398,7 +4968,6 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, memset(dd, 0, sizeof(*dd)); dd->major = major(stb.st_rdev); dd->minor = minor(stb.st_rdev); - dd->index = -1; dd->devname = devname ? strdup(devname) : NULL; dd->fd = fd; dd->e = NULL; @@ -4414,8 +4983,12 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, get_dev_size(fd, NULL, &size); size /= 512; serialcpy(dd->disk.serial, dd->serial); - dd->disk.total_blocks = __cpu_to_le32(size); - dd->disk.status = SPARE_DISK; + set_total_blocks(&dd->disk, size); + if (__le32_to_cpu(dd->disk.total_blocks_hi) > 0) { + struct imsm_super *mpb = super->anchor; + mpb->attributes |= MPB_ATTRIB_2TB_DISK; + } + mark_spare(dd); if (sysfs_disk_to_scsi_id(fd, &id) == 0) dd->disk.scsi_id = __cpu_to_le32(id); else @@ -4458,9 +5031,8 @@ static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk) memset(dd, 0, sizeof(*dd)); dd->major = dk->major; dd->minor = dk->minor; - dd->index = -1; dd->fd = -1; - dd->disk.status = SPARE_DISK; + mark_spare(dd); dd->action = DISK_REMOVE; dd->next = super->disk_mgmt_list; @@ -4503,6 +5075,9 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose) continue; spare->disk[0] = d->disk; + if (__le32_to_cpu(d->disk.total_blocks_hi) > 0) + spare->attributes |= MPB_ATTRIB_2TB_DISK; + sum = __gen_imsm_checksum(spare); spare->family_num = __cpu_to_le32(sum); spare->orig_family_num = 0; @@ -4579,25 +5154,35 @@ static int write_super_imsm(struct supertype *st, int doclose) sum = __gen_imsm_checksum(mpb); mpb->check_sum = __cpu_to_le32(sum); + if (super->clean_migration_record_by_mdmon) { + clear_migration_record = 1; + super->clean_migration_record_by_mdmon = 0; + } if (clear_migration_record) - memset(super->migr_rec_buf, 0, 512); + memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SIZE); /* write the mpb for disks that compose raid devices */ for (d = super->disks; d ; d = d->next) { - if (d->index < 0) + if (d->index < 0 || is_failed(&d->disk)) continue; - if (store_imsm_mpb(d->fd, mpb)) - fprintf(stderr, "%s: failed for device %d:%d %s\n", - __func__, d->major, d->minor, strerror(errno)); + if (clear_migration_record) { unsigned long long dsize; get_dev_size(d->fd, NULL, &dsize); if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) { - if (write(d->fd, super->migr_rec_buf, 512) != 512) + if (write(d->fd, super->migr_rec_buf, + MIGR_REC_BUF_SIZE) != MIGR_REC_BUF_SIZE) perror("Write migr_rec failed"); } } + + if (store_imsm_mpb(d->fd, mpb)) + fprintf(stderr, + "%s: failed for device %d:%d (fd: %d)%s\n", + __func__, d->major, d->minor, + d->fd, strerror(errno)); + if (doclose) { close(d->fd); d->fd = -1; @@ -4617,7 +5202,7 @@ static int create_array(struct supertype *st, int dev_idx) struct imsm_update_create_array *u; struct intel_super *super = st->sb; struct imsm_dev *dev = get_imsm_dev(super, dev_idx); - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); struct disk_info *inf; struct imsm_disk *disk; int i; @@ -4636,7 +5221,7 @@ static int create_array(struct supertype *st, int dev_idx) imsm_copy_dev(&u->dev, dev); inf = get_disk_info(u); for (i = 0; i < map->num_members; i++) { - int idx = get_imsm_disk_idx(dev, i, -1); + int idx = get_imsm_disk_idx(dev, i, MAP_X); disk = get_imsm_disk(super, idx); serialcpy(inf[i].serial, disk->serial); @@ -4776,14 +5361,22 @@ static int validate_geometry_imsm_container(struct supertype *st, int level, return 0; } close(fd); - if (super->orom && raiddisks > super->orom->tds) { - if (verbose) - fprintf(stderr, Name ": %d exceeds maximum number of" - " platform supported disks: %d\n", - raiddisks, super->orom->tds); - - free_imsm(super); - return 0; + if (super->orom) { + if (raiddisks > super->orom->tds) { + if (verbose) + fprintf(stderr, Name ": %d exceeds maximum number of" + " platform supported disks: %d\n", + raiddisks, super->orom->tds); + free_imsm(super); + return 0; + } + if ((super->orom->attr & IMSM_OROM_ATTR_2TB_DISK) == 0 && + (ldsize >> 9) >> 32 > 0) { + if (verbose) + fprintf(stderr, Name ": %s exceeds maximum platform supported size\n", dev); + free_imsm(super); + return 0; + } } *freesize = avail_size_imsm(st, ldsize >> 9); @@ -4892,7 +5485,7 @@ static unsigned long long merge_extents(struct intel_super *super, int sum_exten if (maxsize < reserve) return 0; - super->create_offset = ~((__u32) 0); + super->create_offset = ~((unsigned long long) 0); if (start + reserve > super->create_offset) return 0; /* start overflows create_offset */ super->create_offset = start + reserve; @@ -4922,6 +5515,377 @@ static int is_raid_level_supported(const struct imsm_orom *orom, int level, int return 0; } + +static int +active_arrays_by_format(char *name, char* hba, struct md_list **devlist, + int dpa, int verbose) +{ + struct mdstat_ent *mdstat = mdstat_read(0, 0); + struct mdstat_ent *memb = NULL; + int count = 0; + int num = 0; + struct md_list *dv = NULL; + int found; + + for (memb = mdstat ; memb ; memb = memb->next) { + if (memb->metadata_version && + (strncmp(memb->metadata_version, "external:", 9) == 0) && + (strcmp(&memb->metadata_version[9], name) == 0) && + !is_subarray(memb->metadata_version+9) && + memb->members) { + struct dev_member *dev = memb->members; + int fd = -1; + while(dev && (fd < 0)) { + char *path = malloc(strlen(dev->name) + strlen("/dev/") + 1); + if (path) { + num = sprintf(path, "%s%s", "/dev/", dev->name); + if (num > 0) + fd = open(path, O_RDONLY, 0); + if ((num <= 0) || (fd < 0)) { + pr_vrb(": Cannot open %s: %s\n", + dev->name, strerror(errno)); + } + free(path); + } + dev = dev->next; + } + found = 0; + if ((fd >= 0) && disk_attached_to_hba(fd, hba)) { + struct mdstat_ent *vol; + for (vol = mdstat ; vol ; vol = vol->next) { + if ((vol->active > 0) && + vol->metadata_version && + is_container_member(vol, memb->dev)) { + found++; + count++; + } + } + if (*devlist && (found < dpa)) { + dv = calloc(1, sizeof(*dv)); + if (dv == NULL) + fprintf(stderr, Name ": calloc failed\n"); + else { + dv->devname = malloc(strlen(memb->dev) + strlen("/dev/") + 1); + if (dv->devname != NULL) { + sprintf(dv->devname, "%s%s", "/dev/", memb->dev); + dv->found = found; + dv->used = 0; + dv->next = *devlist; + *devlist = dv; + } else + free(dv); + } + } + } + if (fd >= 0) + close(fd); + } + } + free_mdstat(mdstat); + return count; +} + +#ifdef DEBUG_LOOP +static struct md_list* +get_loop_devices(void) +{ + int i; + struct md_list *devlist = NULL; + struct md_list *dv = NULL; + + for(i = 0; i < 12; i++) { + dv = calloc(1, sizeof(*dv)); + if (dv == NULL) { + fprintf(stderr, Name ": calloc failed\n"); + break; + } + dv->devname = malloc(40); + if (dv->devname == NULL) { + fprintf(stderr, Name ": malloc failed\n"); + free(dv); + break; + } + sprintf(dv->devname, "/dev/loop%d", i); + dv->next = devlist; + devlist = dv; + } + return devlist; +} +#endif + +static struct md_list* +get_devices(const char *hba_path) +{ + struct md_list *devlist = NULL; + struct md_list *dv = NULL; + struct dirent *ent; + DIR *dir; + int err = 0; + +#if DEBUG_LOOP + devlist = get_loop_devices(); + return devlist; +#endif + /* scroll through /sys/dev/block looking for devices attached to + * this hba + */ + dir = opendir("/sys/dev/block"); + for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) { + int fd; + char buf[1024]; + int major, minor; + char *path = NULL; + if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2) + continue; + path = devt_to_devpath(makedev(major, minor)); + if (!path) + continue; + if (!path_attached_to_hba(path, hba_path)) { + free(path); + path = NULL; + continue; + } + free(path); + path = NULL; + fd = dev_open(ent->d_name, O_RDONLY); + if (fd >= 0) { + fd2devname(fd, buf); + close(fd); + } else { + fprintf(stderr, Name ": cannot open device: %s\n", + ent->d_name); + continue; + } + + + dv = calloc(1, sizeof(*dv)); + if (dv == NULL) { + fprintf(stderr, Name ": malloc failed\n"); + err = 1; + break; + } + dv->devname = strdup(buf); + if (dv->devname == NULL) { + fprintf(stderr, Name ": malloc failed\n"); + err = 1; + free(dv); + break; + } + dv->next = devlist; + devlist = dv; + } + if (err) { + while(devlist) { + dv = devlist; + devlist = devlist->next; + free(dv->devname); + free(dv); + } + } + return devlist; +} + +static int +count_volumes_list(struct md_list *devlist, char *homehost, + int verbose, int *found) +{ + struct md_list *tmpdev; + int count = 0; + struct supertype *st = NULL; + + /* first walk the list of devices to find a consistent set + * that match the criterea, if that is possible. + * We flag the ones we like with 'used'. + */ + *found = 0; + st = match_metadata_desc_imsm("imsm"); + if (st == NULL) { + pr_vrb(": cannot allocate memory for imsm supertype\n"); + return 0; + } + + for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) { + char *devname = tmpdev->devname; + struct stat stb; + struct supertype *tst; + int dfd; + if (tmpdev->used > 1) + continue; + tst = dup_super(st); + if (tst == NULL) { + pr_vrb(": cannot allocate memory for imsm supertype\n"); + goto err_1; + } + tmpdev->container = 0; + dfd = dev_open(devname, O_RDONLY|O_EXCL); + if (dfd < 0) { + dprintf(": cannot open device %s: %s\n", + devname, strerror(errno)); + tmpdev->used = 2; + } else if (fstat(dfd, &stb)< 0) { + /* Impossible! */ + dprintf(": fstat failed for %s: %s\n", + devname, strerror(errno)); + tmpdev->used = 2; + } else if ((stb.st_mode & S_IFMT) != S_IFBLK) { + dprintf(": %s is not a block device.\n", + devname); + tmpdev->used = 2; + } else if (must_be_container(dfd)) { + struct supertype *cst; + cst = super_by_fd(dfd, NULL); + if (cst == NULL) { + dprintf(": cannot recognize container type %s\n", + devname); + tmpdev->used = 2; + } else if (tst->ss != st->ss) { + dprintf(": non-imsm container - ignore it: %s\n", + devname); + tmpdev->used = 2; + } else if (!tst->ss->load_container || + tst->ss->load_container(tst, dfd, NULL)) + tmpdev->used = 2; + else { + tmpdev->container = 1; + } + if (cst) + cst->ss->free_super(cst); + } else { + tmpdev->st_rdev = stb.st_rdev; + if (tst->ss->load_super(tst,dfd, NULL)) { + dprintf(": no RAID superblock on %s\n", + devname); + tmpdev->used = 2; + } else if (tst->ss->compare_super == NULL) { + dprintf(": Cannot assemble %s metadata on %s\n", + tst->ss->name, devname); + tmpdev->used = 2; + } + } + if (dfd >= 0) + close(dfd); + if (tmpdev->used == 2 || tmpdev->used == 4) { + /* Ignore unrecognised devices during auto-assembly */ + goto loop; + } + else { + struct mdinfo info; + tst->ss->getinfo_super(tst, &info, NULL); + + if (st->minor_version == -1) + st->minor_version = tst->minor_version; + + if (memcmp(info.uuid, uuid_zero, + sizeof(int[4])) == 0) { + /* this is a floating spare. It cannot define + * an array unless there are no more arrays of + * this type to be found. It can be included + * in an array of this type though. + */ + tmpdev->used = 3; + goto loop; + } + + if (st->ss != tst->ss || + st->minor_version != tst->minor_version || + st->ss->compare_super(st, tst) != 0) { + /* Some mismatch. If exactly one array matches this host, + * we can resolve on that one. + * Or, if we are auto assembling, we just ignore the second + * for now. + */ + dprintf(": superblock on %s doesn't match others - assembly aborted\n", + devname); + goto loop; + } + tmpdev->used = 1; + *found = 1; + dprintf("found: devname: %s\n", devname); + } + loop: + if (tst) + tst->ss->free_super(tst); + } + if (*found != 0) { + int err; + if ((err = load_super_imsm_all(st, -1, &st->sb, NULL, devlist, 0)) == 0) { + struct mdinfo *iter, *head = st->ss->container_content(st, NULL); + for (iter = head; iter; iter = iter->next) { + dprintf("content->text_version: %s vol\n", + iter->text_version); + if (iter->array.state & (1<text_version); + } else + count++; + } + sysfs_free(head); + + } else { + dprintf(" no valid super block on device list: err: %d %p\n", + err, st->sb); + } + } else { + dprintf(" no more devices to examin\n"); + } + + for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) { + if ((tmpdev->used == 1) && (tmpdev->found)) { + if (count) { + if (count < tmpdev->found) + count = 0; + else + count -= tmpdev->found; + } + } + if (tmpdev->used == 1) + tmpdev->used = 4; + } + err_1: + if (st) + st->ss->free_super(st); + return count; +} + + +static int +count_volumes(char *hba, int dpa, int verbose) +{ + struct md_list *devlist = NULL; + int count = 0; + int found = 0;; + + devlist = get_devices(hba); + /* if no intel devices return zero volumes */ + if (devlist == NULL) + return 0; + + count = active_arrays_by_format("imsm", hba, &devlist, dpa, verbose); + dprintf(" path: %s active arrays: %d\n", hba, count); + if (devlist == NULL) + return 0; + do { + found = 0; + count += count_volumes_list(devlist, + NULL, + verbose, + &found); + dprintf("found %d count: %d\n", found, count); + } while (found); + + dprintf("path: %s total number of volumes: %d\n", hba, count); + + while(devlist) { + struct md_list *dv = devlist; + devlist = devlist->next; + free(dv->devname); + free(dv); + } + return count; +} + static int imsm_default_chunk(const struct imsm_orom *orom) { /* up to 512 if the plaform supports it, otherwise the platform max. @@ -4932,10 +5896,9 @@ static int imsm_default_chunk(const struct imsm_orom *orom) return min(512, (1 << fs)); } -#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg)) static int validate_geometry_imsm_orom(struct intel_super *super, int level, int layout, - int raiddisks, int *chunk, int verbose) + int raiddisks, int *chunk, unsigned long long size, int verbose) { /* check/set platform and metadata limits/defaults */ if (super->orom && raiddisks > super->orom->dpa) { @@ -4970,6 +5933,12 @@ validate_geometry_imsm_orom(struct intel_super *super, int level, int layout, layout, level); return 0; } + + if (super->orom && (super->orom->attr & IMSM_OROM_ATTR_2TB) == 0 && chunk && + (calc_array_size(level, raiddisks, layout, *chunk, size) >> 32) > 0) { + pr_vrb(": platform does not support a volume size over 2TB\n"); + return 0; + } return 1; } @@ -4984,7 +5953,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, { struct stat stb; struct intel_super *super = st->sb; - struct imsm_super *mpb = super->anchor; + struct imsm_super *mpb; struct dl *dl; unsigned long long pos = 0; unsigned long long maxsize; @@ -4995,7 +5964,9 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, if (!super) return 0; - if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) { + mpb = super->anchor; + + if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, size, verbose)) { fprintf(stderr, Name ": RAID gemetry validation failed. " "Cannot proceed with the action(s).\n"); return 0; @@ -5073,6 +6044,11 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, fprintf(stderr, Name ": The option-rom requires all member" " disks to be a member of all volumes\n"); return 0; + } else if (super->orom && mpb->num_raid_devs > 0 && + mpb->num_disks != raiddisks) { + fprintf(stderr, Name ": The option-rom requires all member" + " disks to be a member of all volumes\n"); + return 0; } /* retrieve the largest free space block */ @@ -5111,19 +6087,44 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, i += dl->extent_cnt; maxsize = merge_extents(super, i); + + if (!check_env("IMSM_NO_PLATFORM") && + mpb->num_raid_devs > 0 && size && size != maxsize) { + fprintf(stderr, Name ": attempting to create a second " + "volume with size less then remaining space. " + "Aborting...\n"); + return 0; + } + if (maxsize < size || maxsize == 0) { - if (verbose) - fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n", - maxsize, size); + if (verbose) { + if (maxsize == 0) + fprintf(stderr, Name ": no free space" + " left on device. Aborting...\n"); + else + fprintf(stderr, Name ": not enough space" + " to create volume of given size" + " (%llu < %llu). Aborting...\n", + maxsize, size); + } return 0; } *freesize = maxsize; + if (super->orom) { + int count = count_volumes(super->hba->path, + super->orom->dpa, verbose); + if (super->orom->vphba <= count) { + pr_vrb(": platform does not support more than %d raid volumes.\n", + super->orom->vphba); + return 0; + } + } return 1; } -static int reserve_space(struct supertype *st, int raiddisks, +static int imsm_get_free_size(struct supertype *st, int raiddisks, unsigned long long size, int chunk, unsigned long long *freesize) { @@ -5185,16 +6186,46 @@ static int reserve_space(struct supertype *st, int raiddisks, size /= 2 * chunk; size *= 2 * chunk; } + maxsize = size; + } + if (!check_env("IMSM_NO_PLATFORM") && + mpb->num_raid_devs > 0 && size && size != maxsize) { + fprintf(stderr, Name ": attempting to create a second " + "volume with size less then remaining space. " + "Aborting...\n"); + return 0; } - cnt = 0; for (dl = super->disks; dl; dl = dl->next) if (dl->e) dl->raiddisk = cnt++; - *freesize = size; + *freesize = size; + + dprintf("imsm: imsm_get_free_size() returns : %llu\n", size); + + return 1; +} + +static int reserve_space(struct supertype *st, int raiddisks, + unsigned long long size, int chunk, + unsigned long long *freesize) +{ + struct intel_super *super = st->sb; + struct dl *dl; + int cnt; + int rv = 0; + + rv = imsm_get_free_size(st, raiddisks, size, chunk, freesize); + if (rv) { + cnt = 0; + for (dl = super->disks; dl; dl = dl->next) + if (dl->e) + dl->raiddisk = cnt++; + rv = 1; + } - return 1; + return rv; } static int validate_geometry_imsm(struct supertype *st, int level, int layout, @@ -5218,9 +6249,14 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, dev, freesize, verbose); } - + if (!dev) { - if (st->sb && freesize) { + if (st->sb) { + struct intel_super *super = st->sb; + if (!validate_geometry_imsm_orom(st->sb, level, layout, + raiddisks, chunk, size, + verbose)) + return 0; /* we are being asked to automatically layout a * new volume based on the current contents of * the container. If the the parameters can be @@ -5229,12 +6265,22 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, * created. add_to_super and getinfo_super * detect when autolayout is in progress. */ - if (!validate_geometry_imsm_orom(st->sb, level, layout, - raiddisks, chunk, - verbose)) - return 0; - return reserve_space(st, raiddisks, size, - chunk?*chunk:0, freesize); + /* assuming that freesize is always given when array is + created */ + if (super->orom && freesize) { + int count; + count = count_volumes(super->hba->path, + super->orom->dpa, verbose); + if (super->orom->vphba <= count) { + pr_vrb(": platform does not support more" + " than %d raid volumes.\n", + super->orom->vphba); + return 0; + } + } + if (freesize) + return reserve_space(st, raiddisks, size, + chunk?*chunk:0, freesize); } return 1; } @@ -5281,14 +6327,15 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, */ struct intel_super *super; - if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) { + if (load_super_imsm_all(st, cfd, (void **) &super, NULL, NULL, 1) == 0) { st->sb = super; st->container_dev = fd2devnum(cfd); close(cfd); return validate_geometry_imsm_volume(st, level, layout, raiddisks, chunk, size, dev, - freesize, verbose); + freesize, 1) + ? 1 : -1; } } @@ -5378,10 +6425,8 @@ static int kill_subarray_imsm(struct supertype *st) struct dl *d; for (d = super->disks; d; d = d->next) - if (d->index > -2) { - d->index = -1; - d->disk.status = SPARE_DISK; - } + if (d->index > -2) + mark_spare(d); } super->updates_pending++; @@ -5440,6 +6485,7 @@ static int update_subarray_imsm(struct supertype *st, char *subarray, return 0; } +#endif /* MDASSEMBLE */ static int is_gen_migration(struct imsm_dev *dev) { @@ -5454,7 +6500,6 @@ static int is_gen_migration(struct imsm_dev *dev) return 0; } -#endif /* MDASSEMBLE */ static int is_rebuilding(struct imsm_dev *dev) { @@ -5466,7 +6511,7 @@ static int is_rebuilding(struct imsm_dev *dev) if (migr_type(dev) != MIGR_REBUILD) return 0; - migr_map = get_imsm_map(dev, 1); + migr_map = get_imsm_map(dev, MAP_1); if (migr_map->map_state == IMSM_T_STATE_DEGRADED) return 1; @@ -5474,6 +6519,26 @@ static int is_rebuilding(struct imsm_dev *dev) return 0; } +#ifndef MDASSEMBLE +static int is_initializing(struct imsm_dev *dev) +{ + struct imsm_map *migr_map; + + if (!dev->vol.migr_state) + return 0; + + if (migr_type(dev) != MIGR_INIT) + return 0; + + migr_map = get_imsm_map(dev, MAP_1); + + if (migr_map->map_state == IMSM_T_STATE_UNINITIALIZED) + return 1; + + return 0; +} +#endif + static void update_recovery_start(struct intel_super *super, struct imsm_dev *dev, struct mdinfo *array) @@ -5525,20 +6590,24 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra struct imsm_super *mpb = super->anchor; struct mdinfo *rest = NULL; unsigned int i; - int bbm_errors = 0; + int sb_errors = 0; struct dl *d; int spare_disks = 0; /* do not assemble arrays when not all attributes are supported */ if (imsm_check_attributes(mpb->attributes) == 0) { - fprintf(stderr, Name ": IMSM metadata loading not allowed " - "due to attributes incompatibility.\n"); - return NULL; + sb_errors = 1; + fprintf(stderr, Name ": Unsupported attributes in IMSM metadata." + "Arrays activation is blocked.\n"); } /* check for bad blocks */ - if (imsm_bbm_log_size(super->anchor)) - bbm_errors = 1; + if (imsm_bbm_log_size(super->anchor)) { + fprintf(stderr, Name ": BBM log found in IMSM metadata." + "Arrays activation is blocked.\n"); + sb_errors = 1; + } + /* count spare devices, not used in maps */ @@ -5551,7 +6620,10 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra struct imsm_map *map; struct imsm_map *map2; struct mdinfo *this; - int slot, chunk; + int slot; +#ifndef MDASSEMBLE + int chunk; +#endif char *ep; if (subarray && @@ -5559,8 +6631,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra continue; dev = get_imsm_dev(super, i); - map = get_imsm_map(dev, 0); - map2 = get_imsm_map(dev, 1); + map = get_imsm_map(dev, MAP_0); + map2 = get_imsm_map(dev, MAP_1); /* do not publish arrays that are in the middle of an * unsupported migration @@ -5576,19 +6648,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra * OROM/EFI */ - chunk = __le16_to_cpu(map->blocks_per_strip) >> 1; -#ifndef MDASSEMBLE - if (!validate_geometry_imsm_orom(super, - get_imsm_raid_level(map), /* RAID level */ - imsm_level_to_layout(get_imsm_raid_level(map)), - map->num_members, /* raid disks */ - &chunk, - 1 /* verbose */)) { - fprintf(stderr, Name ": RAID gemetry validation failed. " - "Cannot proceed with the action(s).\n"); - continue; - } -#endif /* MDASSEMBLE */ this = malloc(sizeof(*this)); if (!this) { fprintf(stderr, Name ": failed to allocate %zu bytes\n", @@ -5599,6 +6658,30 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra super->current_vol = i; getinfo_super_imsm_volume(st, this, NULL); this->next = rest; +#ifndef MDASSEMBLE + chunk = __le16_to_cpu(map->blocks_per_strip) >> 1; + /* mdadm does not support all metadata features- set the bit in all arrays state */ + if (!validate_geometry_imsm_orom(super, + get_imsm_raid_level(map), /* RAID level */ + imsm_level_to_layout(get_imsm_raid_level(map)), + map->num_members, /* raid disks */ + &chunk, join_u32(dev->size_low, dev->size_high), + 1 /* verbose */)) { + fprintf(stderr, Name ": IMSM RAID geometry validation" + " failed. Array %s activation is blocked.\n", + dev->volume); + this->array.state |= + (1<array.state |= + (1<num_members; slot++) { unsigned long long recovery_start; struct mdinfo *info_d; @@ -5608,8 +6691,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra __u32 ord; skip = 0; - idx = get_imsm_disk_idx(dev, slot, 0); - ord = get_imsm_ord_tbl_ent(dev, slot, -1); + idx = get_imsm_disk_idx(dev, slot, MAP_0); + ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X); for (d = super->disks; d ; d = d->next) if (d->index == idx) break; @@ -5672,8 +6755,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra this->array.working_disks++; info_d->events = __le32_to_cpu(mpb->generation_num); - info_d->data_offset = __le32_to_cpu(map->pba_of_lba0); - info_d->component_size = __le32_to_cpu(map->blocks_per_member); + info_d->data_offset = pba_of_lba0(map); + info_d->component_size = blocks_per_member(map); } /* now that the disk list is up-to-date fixup recovery_start */ update_recovery_start(super, dev, this); @@ -5687,17 +6770,16 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra rest = this; } - /* if array has bad blocks, set suitable bit in array status */ - if (bbm_errors) - rest->array.state |= (1<map_state == IMSM_T_STATE_UNINITIALIZED ? @@ -5725,7 +6807,7 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int insync = insync; for (i = 0; i < map->num_members; i++) { - __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1); + __u32 ord = get_imsm_ord_tbl_ent(dev, i, MAP_X); int idx = ord_to_idx(ord); struct imsm_disk *disk; @@ -5761,33 +6843,55 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, return map->map_state; } -static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev) +static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev, + int look_in_map) { int i; int failed = 0; struct imsm_disk *disk; - struct imsm_map *map = get_imsm_map(dev, 0); - struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + struct imsm_map *prev = get_imsm_map(dev, MAP_1); + struct imsm_map *map_for_loop; __u32 ord; int idx; + int idx_1; /* at the beginning of migration we set IMSM_ORD_REBUILD on * disks that are being rebuilt. New failures are recorded to * map[0]. So we look through all the disks we started with and * see if any failures are still present, or if any new ones * have arrived - * - * FIXME add support for online capacity expansion and - * raid-level-migration */ - for (i = 0; i < prev->num_members; i++) { - ord = __le32_to_cpu(prev->disk_ord_tbl[i]); - ord |= __le32_to_cpu(map->disk_ord_tbl[i]); - idx = ord_to_idx(ord); + map_for_loop = map; + if (prev && (map->num_members < prev->num_members)) + map_for_loop = prev; - disk = get_imsm_disk(super, idx); - if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD) - failed++; + for (i = 0; i < map_for_loop->num_members; i++) { + idx_1 = -255; + /* when MAP_X is passed both maps failures are counted + */ + if (prev && + ((look_in_map == MAP_1) || (look_in_map == MAP_X)) && + (i < prev->num_members)) { + ord = __le32_to_cpu(prev->disk_ord_tbl[i]); + idx_1 = ord_to_idx(ord); + + disk = get_imsm_disk(super, idx_1); + if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD) + failed++; + } + if (((look_in_map == MAP_0) || (look_in_map == MAP_X)) && + (i < map->num_members)) { + ord = __le32_to_cpu(map->disk_ord_tbl[i]); + idx = ord_to_idx(ord); + + if (idx != idx_1) { + disk = get_imsm_disk(super, idx); + if (!disk || is_failed(disk) || + ord & IMSM_ORD_REBUILD) + failed++; + } + } } return failed; @@ -5799,7 +6903,7 @@ static int imsm_open_new(struct supertype *c, struct active_array *a, { struct intel_super *super = c->sb; struct imsm_super *mpb = super->anchor; - + if (atoi(inst) >= mpb->num_raid_devs) { fprintf(stderr, "%s: subarry index %d, out of range\n", __func__, atoi(inst)); @@ -5825,7 +6929,7 @@ static int is_resyncing(struct imsm_dev *dev) if (migr_type(dev) == MIGR_GEN_MIGR) return 0; - migr_map = get_imsm_map(dev, 1); + migr_map = get_imsm_map(dev, MAP_1); if ((migr_map->map_state == IMSM_T_STATE_NORMAL) && (dev->vol.migr_type != MIGR_GEN_MIGR)) @@ -5840,9 +6944,11 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) __u32 ord; int slot; struct imsm_map *map; + char buf[MAX_RAID_SERIAL_LEN+3]; + unsigned int len, shift = 0; /* new failures are always set in map[0] */ - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); slot = get_imsm_disk_slot(map, idx); if (slot < 0) @@ -5852,8 +6958,28 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) if (is_failed(disk) && (ord & IMSM_ORD_REBUILD)) return 0; + memcpy(buf, disk->serial, MAX_RAID_SERIAL_LEN); + buf[MAX_RAID_SERIAL_LEN] = '\000'; + strcat(buf, ":0"); + if ((len = strlen(buf)) >= MAX_RAID_SERIAL_LEN) + shift = len - MAX_RAID_SERIAL_LEN + 1; + strncpy((char *)disk->serial, &buf[shift], MAX_RAID_SERIAL_LEN); + disk->status |= FAILED_DISK; set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD); + /* mark failures in second map if second map exists and this disk + * in this slot. + * This is valid for migration, initialization and rebuild + */ + if (dev->vol.migr_state) { + struct imsm_map *map2 = get_imsm_map(dev, MAP_1); + int slot2 = get_imsm_disk_slot(map2, idx); + + if ((slot2 < map2->num_members) && + (slot2 >= 0)) + set_imsm_ord_tbl_ent(map2, slot2, + idx | IMSM_ORD_REBUILD); + } if (map->failed_disk_num == 0xff) map->failed_disk_num = slot; return 1; @@ -5872,25 +6998,32 @@ static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx) static void handle_missing(struct intel_super *super, struct imsm_dev *dev) { - __u8 map_state; struct dl *dl; - int failed; if (!super->missing) return; - failed = imsm_count_failed(super, dev); - map_state = imsm_check_degraded(super, dev, failed); dprintf("imsm: mark missing\n"); - end_migration(dev, map_state); + /* end process for initialization and rebuild only + */ + if (is_gen_migration(dev) == 0) { + __u8 map_state; + int failed; + + failed = imsm_count_failed(super, dev, MAP_0); + map_state = imsm_check_degraded(super, dev, failed, MAP_0); + + end_migration(dev, super, map_state); + } for (dl = super->missing; dl; dl = dl->next) mark_missing(dev, &dl->disk, dl->index); super->updates_pending++; } -static unsigned long long imsm_set_array_size(struct imsm_dev *dev) +static unsigned long long imsm_set_array_size(struct imsm_dev *dev, + long long new_size) { - int used_disks = imsm_num_data_members(dev, 0); + int used_disks = imsm_num_data_members(dev, MAP_0); unsigned long long array_blocks; struct imsm_map *map; @@ -5907,8 +7040,17 @@ static unsigned long long imsm_set_array_size(struct imsm_dev *dev) /* set array size in metadata */ - map = get_imsm_map(dev, 0); - array_blocks = map->blocks_per_member * used_disks; + if (new_size <= 0) { + /* OLCE size change is caused by added disks + */ + map = get_imsm_map(dev, MAP_0); + array_blocks = blocks_per_member(map) * used_disks; + } else { + /* Online Volume Size Change + * Using available free space + */ + array_blocks = new_size; + } /* round array size down to closest MB */ @@ -5935,7 +7077,7 @@ static void imsm_progress_container_reshape(struct intel_super *super) for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); struct imsm_map *map2; int prev_num_members; @@ -5956,16 +7098,17 @@ static void imsm_progress_container_reshape(struct intel_super *super) map->num_members = prev_disks; dev->vol.migr_state = 1; dev->vol.curr_migr_unit = 0; - dev->vol.migr_type = MIGR_GEN_MIGR; + set_migr_type(dev, MIGR_GEN_MIGR); for (i = prev_num_members; i < map->num_members; i++) set_imsm_ord_tbl_ent(map, i, i); - map2 = get_imsm_map(dev, 1); + map2 = get_imsm_map(dev, MAP_1); /* Copy the current map */ memcpy(map2, map, copy_map_size); map2->num_members = prev_num_members; - imsm_set_array_size(dev); + imsm_set_array_size(dev, -1); + super->clean_migration_record_by_mdmon = 1; super->updates_pending++; } } @@ -5980,9 +7123,9 @@ static int imsm_set_array_state(struct active_array *a, int consistent) int inst = a->info.container_member; struct intel_super *super = a->container->sb; struct imsm_dev *dev = get_imsm_dev(super, inst); - struct imsm_map *map = get_imsm_map(dev, 0); - int failed = imsm_count_failed(super, dev); - __u8 map_state = imsm_check_degraded(super, dev, failed); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + int failed = imsm_count_failed(super, dev, MAP_0); + __u8 map_state = imsm_check_degraded(super, dev, failed, MAP_0); __u32 blocks_per_unit; if (dev->vol.migr_state && @@ -6004,12 +7147,14 @@ static int imsm_set_array_state(struct active_array *a, int consistent) * user action is required to recover process */ if (0) { - struct imsm_map *map2 = get_imsm_map(dev, 1); - dev->vol.migr_state = 0; - dev->vol.migr_type = 0; - dev->vol.curr_migr_unit = 0; - memcpy(map, map2, sizeof_imsm_map(map2)); - super->updates_pending++; + struct imsm_map *map2 = + get_imsm_map(dev, MAP_1); + dev->vol.migr_state = 0; + set_migr_type(dev, 0); + dev->vol.curr_migr_unit = 0; + memcpy(map, map2, + sizeof_imsm_map(map2)); + super->updates_pending++; } } if (a->last_checkpoint >= a->info.component_size) { @@ -6017,10 +7162,10 @@ static int imsm_set_array_state(struct active_array *a, int consistent) int used_disks; struct mdinfo *mdi; - used_disks = imsm_num_data_members(dev, 0); + used_disks = imsm_num_data_members(dev, MAP_0); if (used_disks > 0) { array_blocks = - map->blocks_per_member * + blocks_per_member(map) * used_disks; /* round array size down to closest MB */ @@ -6062,11 +7207,12 @@ static int imsm_set_array_state(struct active_array *a, int consistent) */ if (is_resyncing(dev)) { dprintf("imsm: mark resync done\n"); - end_migration(dev, map_state); + end_migration(dev, super, map_state); super->updates_pending++; a->last_checkpoint = 0; } - } else if (!is_resyncing(dev) && !failed) { + } else if ((!is_resyncing(dev) && !failed) && + (imsm_reshape_blocks_arrays_changes(super) == 0)) { /* mark the start of the init process if nothing is failed */ dprintf("imsm: mark resync start\n"); if (map->map_state == IMSM_T_STATE_UNINITIALIZED) @@ -6123,7 +7269,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state) int inst = a->info.container_member; struct intel_super *super = a->container->sb; struct imsm_dev *dev = get_imsm_dev(super, inst); - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); struct imsm_disk *disk; int failed; __u32 ord; @@ -6138,7 +7284,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state) dprintf("imsm: set_disk %d:%x\n", n, state); - ord = get_imsm_ord_tbl_ent(dev, n, -1); + ord = get_imsm_ord_tbl_ent(dev, n, MAP_0); disk = get_imsm_disk(super, ord_to_idx(ord)); /* check for new failures */ @@ -6149,53 +7295,99 @@ static void imsm_set_disk(struct active_array *a, int n, int state) /* check if in_sync */ if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) { - struct imsm_map *migr_map = get_imsm_map(dev, 1); + struct imsm_map *migr_map = get_imsm_map(dev, MAP_1); set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord)); super->updates_pending++; } - failed = imsm_count_failed(super, dev); - map_state = imsm_check_degraded(super, dev, failed); + failed = imsm_count_failed(super, dev, MAP_0); + map_state = imsm_check_degraded(super, dev, failed, MAP_0); /* check if recovery complete, newly degraded, or failed */ - if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) { - end_migration(dev, map_state); - map = get_imsm_map(dev, 0); - map->failed_disk_num = ~0; - super->updates_pending++; - a->last_checkpoint = 0; - } else if (map_state == IMSM_T_STATE_DEGRADED && - map->map_state != map_state && - !dev->vol.migr_state) { - dprintf("imsm: mark degraded\n"); - map->map_state = map_state; - super->updates_pending++; - a->last_checkpoint = 0; - } else if (map_state == IMSM_T_STATE_FAILED && - map->map_state != map_state) { - dprintf("imsm: mark failed\n"); - end_migration(dev, map_state); - super->updates_pending++; - a->last_checkpoint = 0; - } else if (is_gen_migration(dev)) { - dprintf("imsm: Detected General Migration in state: "); - if (map_state == IMSM_T_STATE_NORMAL) { - end_migration(dev, map_state); - map = get_imsm_map(dev, 0); + dprintf("imsm: Detected transition to state "); + switch (map_state) { + case IMSM_T_STATE_NORMAL: /* transition to normal state */ + dprintf("normal: "); + if (is_rebuilding(dev)) { + dprintf("while rebuilding"); + end_migration(dev, super, map_state); + map = get_imsm_map(dev, MAP_0); map->failed_disk_num = ~0; - dprintf("normal\n"); - } else { - if (map_state == IMSM_T_STATE_DEGRADED) { - printf("degraded\n"); - end_migration(dev, map_state); - } else { - dprintf("failed\n"); + super->updates_pending++; + a->last_checkpoint = 0; + break; + } + if (is_gen_migration(dev)) { + dprintf("while general migration"); + if (a->last_checkpoint >= a->info.component_size) + end_migration(dev, super, map_state); + else + map->map_state = map_state; + map = get_imsm_map(dev, MAP_0); + map->failed_disk_num = ~0; + super->updates_pending++; + break; + } + break; + case IMSM_T_STATE_DEGRADED: /* transition to degraded state */ + dprintf("degraded: "); + if ((map->map_state != map_state) && + !dev->vol.migr_state) { + dprintf("mark degraded"); + map->map_state = map_state; + super->updates_pending++; + a->last_checkpoint = 0; + break; + } + if (is_rebuilding(dev)) { + dprintf("while rebuilding."); + if (map->map_state != map_state) { + dprintf(" Map state change"); + end_migration(dev, super, map_state); + super->updates_pending++; + } + break; + } + if (is_gen_migration(dev)) { + dprintf("while general migration"); + if (a->last_checkpoint >= a->info.component_size) + end_migration(dev, super, map_state); + else { + map->map_state = map_state; + manage_second_map(super, dev); } + super->updates_pending++; + break; + } + if (is_initializing(dev)) { + dprintf("while initialization."); map->map_state = map_state; + super->updates_pending++; + break; } - super->updates_pending++; + break; + case IMSM_T_STATE_FAILED: /* transition to failed state */ + dprintf("failed: "); + if (is_gen_migration(dev)) { + dprintf("while general migration"); + map->map_state = map_state; + super->updates_pending++; + break; + } + if (map->map_state != map_state) { + dprintf("mark failed"); + end_migration(dev, super, map_state); + super->updates_pending++; + a->last_checkpoint = 0; + break; + } + break; + default: + dprintf("state %i\n", map_state); } + dprintf("\n"); + } static int store_imsm_mpb(int fd, struct imsm_super *mpb) @@ -6246,7 +7438,7 @@ static void imsm_sync_metadata(struct supertype *container) static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a) { struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); - int i = get_imsm_disk_idx(dev, idx, -1); + int i = get_imsm_disk_idx(dev, idx, MAP_X); struct dl *dl; for (dl = super->disks; dl; dl = dl->next) @@ -6267,7 +7459,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, struct mdinfo *additional_test_list) { struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); - int idx = get_imsm_disk_idx(dev, slot, -1); + int idx = get_imsm_disk_idx(dev, slot, MAP_X); struct imsm_super *mpb = super->anchor; struct imsm_map *map; unsigned long long pos; @@ -6329,7 +7521,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, } for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); /* check if this disk is already a member of * this array @@ -6340,9 +7532,9 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, found = 0; j = 0; pos = 0; - array_start = __le32_to_cpu(map->pba_of_lba0); + array_start = pba_of_lba0(map); array_end = array_start + - __le32_to_cpu(map->blocks_per_member) - 1; + blocks_per_member(map) - 1; do { /* check that we can start at pba_of_lba0 with @@ -6385,9 +7577,9 @@ static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed) dev2 = get_imsm_dev(cont->sb, dev_idx); if (dev2) { - state = imsm_check_degraded(cont->sb, dev2, failed); + state = imsm_check_degraded(cont->sb, dev2, failed, MAP_0); if (state == IMSM_T_STATE_FAILED) { - map = get_imsm_map(dev2, 0); + map = get_imsm_map(dev2, MAP_0); if (!map) return 1; for (slot = 0; slot < map->num_members; slot++) { @@ -6395,7 +7587,7 @@ static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed) * Check if failed disks are deleted from intel * disk list or are marked to be deleted */ - idx = get_imsm_disk_idx(dev2, slot, -1); + idx = get_imsm_disk_idx(dev2, slot, MAP_X); idisk = get_imsm_dl_disk(cont->sb, idx); /* * Do not rebuild the array if failed disks @@ -6429,7 +7621,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, struct intel_super *super = a->container->sb; int inst = a->info.container_member; struct imsm_dev *dev = get_imsm_dev(super, inst); - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); int failed = a->info.array.raid_disks; struct mdinfo *rv = NULL; struct mdinfo *d; @@ -6453,10 +7645,16 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n", inst, failed, a->info.array.raid_disks, a->info.array.level); - if (dev->vol.migr_state && - dev->vol.migr_type == MIGR_GEN_MIGR) - /* No repair during migration */ + if (imsm_reshape_blocks_arrays_changes(super)) + return NULL; + + /* Cannot activate another spare if rebuild is in progress already + */ + if (is_rebuilding(dev)) { + dprintf("imsm: No spare activation allowed. " + "Rebuild in progress already.\n"); return NULL; + } if (a->info.array.level == 4) /* No repair for takeovered array @@ -6464,7 +7662,8 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, */ return NULL; - if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED) + if (imsm_check_degraded(super, dev, failed, MAP_0) != + IMSM_T_STATE_DEGRADED) return NULL; /* @@ -6473,9 +7672,9 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, * are removed from container. */ if (failed) { - dprintf("found failed disks in %s, check if there another" + dprintf("found failed disks in %.*s, check if there another" "failed sub-array.\n", - dev->volume); + MAX_RAID_SERIAL_LEN, dev->volume); /* check if states of the other volumes allow for rebuild */ for (i = 0; i < super->anchor->num_raid_devs; i++) { if (i != inst) { @@ -6505,9 +7704,9 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, */ dl = imsm_readd(super, i, a); if (!dl) - dl = imsm_add_spare(super, i, a, 0, NULL); + dl = imsm_add_spare(super, i, a, 0, rv); if (!dl) - dl = imsm_add_spare(super, i, a, 1, NULL); + dl = imsm_add_spare(super, i, a, 1, rv); if (!dl) continue; @@ -6535,7 +7734,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, di->disk.minor = dl->minor; di->disk.state = 0; di->recovery_start = 0; - di->data_offset = __le32_to_cpu(map->pba_of_lba0); + di->data_offset = pba_of_lba0(map); di->component_size = a->info.component_size; di->container_member = inst; super->random = random32(); @@ -6544,8 +7743,6 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, num_spares++; dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor, i, di->data_offset); - - break; } if (!rv) @@ -6572,7 +7769,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, } return NULL; } - + mu->space = NULL; mu->space_list = NULL; mu->len = sizeof(struct imsm_update_activate_spare) * num_spares; @@ -6597,15 +7794,15 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u) { struct imsm_dev *dev = get_imsm_dev(super, idx); - struct imsm_map *map = get_imsm_map(dev, 0); - struct imsm_map *new_map = get_imsm_map(&u->dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + struct imsm_map *new_map = get_imsm_map(&u->dev, MAP_0); struct disk_info *inf = get_disk_info(u); struct imsm_disk *disk; int i; int j; for (i = 0; i < map->num_members; i++) { - disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1)); + disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, MAP_X)); for (j = 0; j < new_map->num_members; j++) if (serialcmp(disk->serial, inf[j].serial) == 0) return 1; @@ -6718,7 +7915,7 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration * struct imsm_map *map; struct imsm_dev *new_dev = (struct imsm_dev *)*space_list; - struct imsm_map *migr_map = get_imsm_map(dev, 1); + struct imsm_map *migr_map = get_imsm_map(dev, MAP_1); int to_state; struct dl *new_disk; @@ -6726,7 +7923,7 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration * return ret_val; *space_list = **space_list; memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0)); - map = get_imsm_map(new_dev, 0); + map = get_imsm_map(new_dev, MAP_0); if (migr_map) { dprintf("imsm: Error: migration in progress"); return ret_val; @@ -6746,7 +7943,7 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration * migrate(new_dev, super, to_state, MIGR_GEN_MIGR); if (u->new_level > -1) map->raid_level = u->new_level; - migr_map = get_imsm_map(new_dev, 1); + migr_map = get_imsm_map(new_dev, MAP_1); if ((u->new_level == 5) && (migr_map->raid_level == 0)) { int ord = map->num_members - 1; @@ -6796,26 +7993,178 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration * } else goto error_disk_add; -skip_disk_add: - *tofree = *space_list; - /* calculate new size +skip_disk_add: + *tofree = *space_list; + /* calculate new size + */ + imsm_set_array_size(new_dev, -1); + + ret_val = 1; + } + } + + if (tofree) + *space_list = tofree; + return ret_val; + +error_disk_add: + dprintf("Error: imsm: Cannot find disk.\n"); + return ret_val; +} + +static int apply_size_change_update(struct imsm_update_size_change *u, + struct intel_super *super) +{ + struct intel_dev *id; + int ret_val = 0; + + dprintf("apply_size_change_update()\n"); + if ((u->subdev < 0) || + (u->subdev > 1)) { + dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev); + return ret_val; + } + + for (id = super->devlist ; id; id = id->next) { + if (id->index == (unsigned)u->subdev) { + struct imsm_dev *dev = get_imsm_dev(super, u->subdev); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + int used_disks = imsm_num_data_members(dev, MAP_0); + unsigned long long blocks_per_member; + + /* calculate new size + */ + blocks_per_member = u->new_size / used_disks; + dprintf("imsm: apply_size_change_update(size: %llu, " + "blocks per member: %llu)\n", + u->new_size, blocks_per_member); + set_blocks_per_member(map, blocks_per_member); + imsm_set_array_size(dev, u->new_size); + + ret_val = 1; + break; + } + } + + return ret_val; +} + + +static int apply_update_activate_spare(struct imsm_update_activate_spare *u, + struct intel_super *super, + struct active_array *active_array) +{ + struct imsm_super *mpb = super->anchor; + struct imsm_dev *dev = get_imsm_dev(super, u->array); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + struct imsm_map *migr_map; + struct active_array *a; + struct imsm_disk *disk; + __u8 to_state; + struct dl *dl; + unsigned int found; + int failed; + int victim; + int i; + int second_map_created = 0; + + for (; u; u = u->next) { + victim = get_imsm_disk_idx(dev, u->slot, MAP_X); + + if (victim < 0) + return 0; + + for (dl = super->disks; dl; dl = dl->next) + if (dl == u->dl) + break; + + if (!dl) { + fprintf(stderr, "error: imsm_activate_spare passed " + "an unknown disk (index: %d)\n", + u->dl->index); + return 0; + } + + /* count failures (excluding rebuilds and the victim) + * to determine map[0] state + */ + failed = 0; + for (i = 0; i < map->num_members; i++) { + if (i == u->slot) + continue; + disk = get_imsm_disk(super, + get_imsm_disk_idx(dev, i, MAP_X)); + if (!disk || is_failed(disk)) + failed++; + } + + /* adding a pristine spare, assign a new index */ + if (dl->index < 0) { + dl->index = super->anchor->num_disks; + super->anchor->num_disks++; + } + disk = &dl->disk; + disk->status |= CONFIGURED_DISK; + disk->status &= ~SPARE_DISK; + + /* mark rebuild */ + to_state = imsm_check_degraded(super, dev, failed, MAP_0); + if (!second_map_created) { + second_map_created = 1; + map->map_state = IMSM_T_STATE_DEGRADED; + migrate(dev, super, to_state, MIGR_REBUILD); + } else + map->map_state = to_state; + migr_map = get_imsm_map(dev, MAP_1); + set_imsm_ord_tbl_ent(map, u->slot, dl->index); + set_imsm_ord_tbl_ent(migr_map, u->slot, + dl->index | IMSM_ORD_REBUILD); + + /* update the family_num to mark a new container + * generation, being careful to record the existing + * family_num in orig_family_num to clean up after + * earlier mdadm versions that neglected to set it. + */ + if (mpb->orig_family_num == 0) + mpb->orig_family_num = mpb->family_num; + mpb->family_num += super->random; + + /* count arrays using the victim in the metadata */ + found = 0; + for (a = active_array; a ; a = a->next) { + dev = get_imsm_dev(super, a->info.container_member); + map = get_imsm_map(dev, MAP_0); + + if (get_imsm_disk_slot(map, victim) >= 0) + found++; + } + + /* delete the victim if it is no longer being + * utilized anywhere + */ + if (!found) { + struct dl **dlp; + + /* We know that 'manager' isn't touching anything, + * so it is safe to delete */ - imsm_set_array_size(new_dev); + for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next) + if ((*dlp)->index == victim) + break; - ret_val = 1; + /* victim may be on the missing list */ + if (!*dlp) + for (dlp = &super->missing; *dlp; + dlp = &(*dlp)->next) + if ((*dlp)->index == victim) + break; + imsm_delete(super, dlp, victim); } } - if (tofree) - *space_list = tofree; - return ret_val; - -error_disk_add: - dprintf("Error: imsm: Cannot find disk.\n"); - return ret_val; + return 1; } - static int apply_reshape_container_disks_update(struct imsm_update_reshape *u, struct intel_super *super, void ***space_list) @@ -6876,8 +8225,8 @@ static int apply_reshape_container_disks_update(struct imsm_update_reshape *u, newdev = (void*)sp; /* Copy the dev, but not (all of) the map */ memcpy(newdev, id->dev, sizeof(*newdev)); - oldmap = get_imsm_map(id->dev, 0); - newmap = get_imsm_map(newdev, 0); + oldmap = get_imsm_map(id->dev, MAP_0); + newmap = get_imsm_map(newdev, MAP_0); /* Copy the current map */ memcpy(newmap, oldmap, sizeof_imsm_map(oldmap)); /* update one device only @@ -6888,7 +8237,7 @@ static int apply_reshape_container_disks_update(struct imsm_update_reshape *u, devices_to_reshape--; newdev->vol.migr_state = 1; newdev->vol.curr_migr_unit = 0; - newdev->vol.migr_type = MIGR_GEN_MIGR; + set_migr_type(newdev, MIGR_GEN_MIGR); newmap->num_members = u->new_raid_disks; for (i = 0; i < delta_disks; i++) { set_imsm_ord_tbl_ent(newmap, @@ -6897,10 +8246,10 @@ static int apply_reshape_container_disks_update(struct imsm_update_reshape *u, } /* New map is correct, now need to save old map */ - newmap = get_imsm_map(newdev, 1); + newmap = get_imsm_map(newdev, MAP_1); memcpy(newmap, oldmap, sizeof_imsm_map(oldmap)); - imsm_set_array_size(newdev); + imsm_set_array_size(newdev, -1); } sp = (void **)id->dev; @@ -6940,11 +8289,12 @@ static int apply_takeover_update(struct imsm_update_takeover *u, if (dev == NULL) return 0; - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); if (u->direction == R10_TO_R0) { /* Number of failed disks must be half of initial disk number */ - if (imsm_count_failed(super, dev) != (map->num_members / 2)) + if (imsm_count_failed(super, dev, MAP_0) != + (map->num_members / 2)) return 0; /* iterate through devices to mark removed disks as spare */ @@ -6958,8 +8308,7 @@ static int apply_takeover_update(struct imsm_update_takeover *u, if (du->index > idx) du->index--; /* mark as spare disk */ - dm->disk.status = SPARE_DISK; - dm->index = -1; + mark_spare(dm); } } /* update map */ @@ -7004,7 +8353,7 @@ static int apply_takeover_update(struct imsm_update_takeover *u, dev_new = (void *)space; memcpy(dev_new, dev, sizeof(*dev)); /* update new map */ - map = get_imsm_map(dev_new, 0); + map = get_imsm_map(dev_new, MAP_0); map->num_members = map->num_members * 2; map->map_state = IMSM_T_STATE_DEGRADED; map->num_domains = 2; @@ -7108,101 +8457,16 @@ static void imsm_process_update(struct supertype *st, super->updates_pending++; break; } + case update_size_change: { + struct imsm_update_size_change *u = (void *)update->buf; + if (apply_size_change_update(u, super)) + super->updates_pending++; + break; + } case update_activate_spare: { struct imsm_update_activate_spare *u = (void *) update->buf; - struct imsm_dev *dev = get_imsm_dev(super, u->array); - struct imsm_map *map = get_imsm_map(dev, 0); - struct imsm_map *migr_map; - struct active_array *a; - struct imsm_disk *disk; - __u8 to_state; - struct dl *dl; - unsigned int found; - int failed; - int victim = get_imsm_disk_idx(dev, u->slot, -1); - int i; - - for (dl = super->disks; dl; dl = dl->next) - if (dl == u->dl) - break; - - if (!dl) { - fprintf(stderr, "error: imsm_activate_spare passed " - "an unknown disk (index: %d)\n", - u->dl->index); - return; - } - - super->updates_pending++; - /* count failures (excluding rebuilds and the victim) - * to determine map[0] state - */ - failed = 0; - for (i = 0; i < map->num_members; i++) { - if (i == u->slot) - continue; - disk = get_imsm_disk(super, - get_imsm_disk_idx(dev, i, -1)); - if (!disk || is_failed(disk)) - failed++; - } - - /* adding a pristine spare, assign a new index */ - if (dl->index < 0) { - dl->index = super->anchor->num_disks; - super->anchor->num_disks++; - } - disk = &dl->disk; - disk->status |= CONFIGURED_DISK; - disk->status &= ~SPARE_DISK; - - /* mark rebuild */ - to_state = imsm_check_degraded(super, dev, failed); - map->map_state = IMSM_T_STATE_DEGRADED; - migrate(dev, super, to_state, MIGR_REBUILD); - migr_map = get_imsm_map(dev, 1); - set_imsm_ord_tbl_ent(map, u->slot, dl->index); - set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD); - - /* update the family_num to mark a new container - * generation, being careful to record the existing - * family_num in orig_family_num to clean up after - * earlier mdadm versions that neglected to set it. - */ - if (mpb->orig_family_num == 0) - mpb->orig_family_num = mpb->family_num; - mpb->family_num += super->random; - - /* count arrays using the victim in the metadata */ - found = 0; - for (a = st->arrays; a ; a = a->next) { - dev = get_imsm_dev(super, a->info.container_member); - map = get_imsm_map(dev, 0); - - if (get_imsm_disk_slot(map, victim) >= 0) - found++; - } - - /* delete the victim if it is no longer being - * utilized anywhere - */ - if (!found) { - struct dl **dlp; - - /* We know that 'manager' isn't touching anything, - * so it is safe to delete - */ - for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next) - if ((*dlp)->index == victim) - break; - - /* victim may be on the missing list */ - if (!*dlp) - for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next) - if ((*dlp)->index == victim) - break; - imsm_delete(super, dlp, victim); - } + if (apply_update_activate_spare(u, super, st->arrays)) + super->updates_pending++; break; } case update_create_array: { @@ -7239,9 +8503,9 @@ static void imsm_process_update(struct supertype *st, goto create_error; } - new_map = get_imsm_map(&u->dev, 0); - new_start = __le32_to_cpu(new_map->pba_of_lba0); - new_end = new_start + __le32_to_cpu(new_map->blocks_per_member); + new_map = get_imsm_map(&u->dev, MAP_0); + new_start = pba_of_lba0(new_map); + new_end = new_start + blocks_per_member(new_map); inf = get_disk_info(u); /* handle activate_spare versus create race: @@ -7250,9 +8514,9 @@ static void imsm_process_update(struct supertype *st, */ for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); - map = get_imsm_map(dev, 0); - start = __le32_to_cpu(map->pba_of_lba0); - end = start + __le32_to_cpu(map->blocks_per_member); + map = get_imsm_map(dev, MAP_0); + start = pba_of_lba0(map); + end = start + blocks_per_member(map); if ((new_start >= start && new_start <= end) || (start >= new_start && start <= new_end)) /* overlap */; @@ -7431,7 +8695,7 @@ static void imsm_prepare_update(struct supertype *st, if (u->direction == R0_TO_R10) { void **tail = (void **)&update->space_list; struct imsm_dev *dev = get_imsm_dev(super, u->subarray); - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); int num_members = map->num_members; void *space; int size, i; @@ -7563,7 +8827,7 @@ static void imsm_prepare_update(struct supertype *st, struct imsm_map *map; dev = get_imsm_dev(super, u->subdev); - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); current_level = map->raid_level; break; } @@ -7593,11 +8857,14 @@ static void imsm_prepare_update(struct supertype *st, dprintf("New anchor length is %llu\n", (unsigned long long)len); break; } + case update_size_change: { + break; + } case update_create_array: { struct imsm_update_create_array *u = (void *) update->buf; struct intel_dev *dv; struct imsm_dev *dev = &u->dev; - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, MAP_0); struct dl *dl; struct disk_info *inf; int i; @@ -7682,20 +8949,20 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); num_members = map->num_members; for (j = 0; j < num_members; j++) { /* update ord entries being careful not to propagate * ord-flags to the first map */ - ord = get_imsm_ord_tbl_ent(dev, j, -1); + ord = get_imsm_ord_tbl_ent(dev, j, MAP_X); if (ord_to_idx(ord) <= index) continue; - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1)); - map = get_imsm_map(dev, 1); + map = get_imsm_map(dev, MAP_1); if (map) set_imsm_ord_tbl_ent(map, j, ord - 1); } @@ -7711,6 +8978,76 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind } } #endif /* MDASSEMBLE */ + +static void close_targets(int *targets, int new_disks) +{ + int i; + + if (!targets) + return; + + for (i = 0; i < new_disks; i++) { + if (targets[i] >= 0) { + close(targets[i]); + targets[i] = -1; + } + } +} + +static int imsm_get_allowed_degradation(int level, int raid_disks, + struct intel_super *super, + struct imsm_dev *dev) +{ + switch (level) { + case 1: + case 10:{ + int ret_val = 0; + struct imsm_map *map; + int i; + + ret_val = raid_disks/2; + /* check map if all disks pairs not failed + * in both maps + */ + map = get_imsm_map(dev, MAP_0); + for (i = 0; i < ret_val; i++) { + int degradation = 0; + if (get_imsm_disk(super, i) == NULL) + degradation++; + if (get_imsm_disk(super, i + 1) == NULL) + degradation++; + if (degradation == 2) + return 0; + } + map = get_imsm_map(dev, MAP_1); + /* if there is no second map + * result can be returned + */ + if (map == NULL) + return ret_val; + /* check degradation in second map + */ + for (i = 0; i < ret_val; i++) { + int degradation = 0; + if (get_imsm_disk(super, i) == NULL) + degradation++; + if (get_imsm_disk(super, i + 1) == NULL) + degradation++; + if (degradation == 2) + return 0; + } + return ret_val; + } + case 5: + return 1; + case 6: + return 2; + default: + return 0; + } +} + + /******************************************************************************* * Function: open_backup_targets * Description: Function opens file descriptors for all devices given in @@ -7719,13 +9056,21 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind * info : general array info * raid_disks : number of disks * raid_fds : table of device's file descriptors + * super : intel super for raid10 degradation check + * dev : intel device for raid10 degradation check * Returns: * 0 : success * -1 : fail ******************************************************************************/ -int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds) +int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds, + struct intel_super *super, struct imsm_dev *dev) { struct mdinfo *sd; + int i; + int opened = 0; + + for (i = 0; i < raid_disks; i++) + raid_fds[i] = -1; for (sd = info->devs ; sd ; sd = sd->next) { char *dn; @@ -7744,8 +9089,19 @@ int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds) raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR); if (raid_fds[sd->disk.raid_disk] < 0) { fprintf(stderr, "cannot open component\n"); - return -1; + continue; } + opened++; + } + /* check if maximum array degradation level is not exceeded + */ + if ((raid_disks - opened) > + imsm_get_allowed_degradation(info->new_level, + raid_disks, + super, dev)) { + fprintf(stderr, "Not enough disks can be opened.\n"); + close_targets(raid_fds, raid_disks); + return -2; } return 0; } @@ -7772,8 +9128,8 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, struct mdinfo *sd; char nm[30]; int fd; - struct imsm_map *map_dest = get_imsm_map(dev, 0); - struct imsm_map *map_src = get_imsm_map(dev, 1); + struct imsm_map *map_dest = get_imsm_map(dev, MAP_0); + struct imsm_map *map_src = get_imsm_map(dev, MAP_1); unsigned long long num_migr_units; unsigned long long array_blocks; @@ -7785,8 +9141,9 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE / max(map_dest->blocks_per_strip, map_src->blocks_per_strip); - migr_rec->dest_depth_per_unit *= map_dest->blocks_per_strip; - new_data_disks = imsm_num_data_members(dev, 0); + migr_rec->dest_depth_per_unit *= + max(map_dest->blocks_per_strip, map_src->blocks_per_strip); + new_data_disks = imsm_num_data_members(dev, MAP_0); migr_rec->blocks_per_unit = __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks); migr_rec->dest_depth_per_unit = @@ -7850,12 +9207,12 @@ int save_backup_imsm(struct supertype *st, unsigned long long *target_offsets = NULL; int *targets = NULL; int i; - struct imsm_map *map_dest = get_imsm_map(dev, 0); + struct imsm_map *map_dest = get_imsm_map(dev, MAP_0); int new_disks = map_dest->num_members; int dest_layout = 0; int dest_chunk; unsigned long long start; - int data_disks = imsm_num_data_members(dev, 0); + int data_disks = imsm_num_data_members(dev, MAP_0); targets = malloc(new_disks * sizeof(int)); if (!targets) @@ -7878,7 +9235,8 @@ int save_backup_imsm(struct supertype *st, target_offsets[i] -= start/data_disks; } - if (open_backup_targets(info, new_disks, targets)) + if (open_backup_targets(info, new_disks, targets, + super, dev)) goto abort; dest_layout = imsm_level_to_layout(map_dest->raid_level); @@ -7904,9 +9262,7 @@ int save_backup_imsm(struct supertype *st, abort: if (targets) { - for (i = 0; i < new_disks; i++) - if (targets[i] >= 0) - close(targets[i]); + close_targets(targets, new_disks); free(targets); } free(target_offsets); @@ -7995,7 +9351,6 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units); char buffer[20]; int skipped_disks = 0; - int max_degradation; err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20); if (err < 1) @@ -8017,16 +9372,15 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) if (id == NULL) return 1; - map_dest = get_imsm_map(id->dev, 0); + map_dest = get_imsm_map(id->dev, MAP_0); new_disks = map_dest->num_members; - max_degradation = new_disks - imsm_num_data_members(id->dev, 0); read_offset = (unsigned long long) __le32_to_cpu(migr_rec->ckpt_area_pba) * 512; write_offset = ((unsigned long long) __le32_to_cpu(migr_rec->dest_1st_member_lba) + - __le32_to_cpu(map_dest->pba_of_lba0)) * 512; + pba_of_lba0(map_dest)) * 512; unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512; if (posix_memalign((void **)&buf, 512, unit_len) != 0) @@ -8035,7 +9389,11 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) if (!targets) goto abort; - open_backup_targets(info, new_disks, targets); + if (open_backup_targets(info, new_disks, targets, super, id->dev)) { + fprintf(stderr, + Name ": Cannot open some devices belonging to array.\n"); + goto abort; + } for (i = 0; i < new_disks; i++) { if (targets[i] < 0) { @@ -8046,29 +9404,36 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) fprintf(stderr, Name ": Cannot seek to block: %s\n", strerror(errno)); - goto abort; + skipped_disks++; + continue; } if ((unsigned)read(targets[i], buf, unit_len) != unit_len) { fprintf(stderr, Name ": Cannot read copy area block: %s\n", strerror(errno)); - goto abort; + skipped_disks++; + continue; } if (lseek64(targets[i], write_offset, SEEK_SET) < 0) { fprintf(stderr, Name ": Cannot seek to block: %s\n", strerror(errno)); - goto abort; + skipped_disks++; + continue; } if ((unsigned)write(targets[i], buf, unit_len) != unit_len) { fprintf(stderr, Name ": Cannot restore block: %s\n", strerror(errno)); - goto abort; + skipped_disks++; + continue; } } - if (skipped_disks > max_degradation) { + if (skipped_disks > imsm_get_allowed_degradation(info->new_level, + new_disks, + super, + id->dev)) { fprintf(stderr, Name ": Cannot restore data from backup." " Too many failed disks\n"); @@ -8144,7 +9509,8 @@ static int imsm_find_array_minor_by_subdev(int subdev, int container, int *minor static int imsm_reshape_is_allowed_on_container(struct supertype *st, struct geo_params *geo, - int *old_raid_disks) + int *old_raid_disks, + int direction) { /* currently we only support increasing the number of devices * for a container. This increases the number of device for each @@ -8168,6 +9534,12 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, return ret_val; } + if (direction == ROLLBACK_METADATA_CHANGES) { + dprintf("imsm: Metadata changes rollback is not supported for " + "container operation.\n"); + return ret_val; + } + info = container_content_imsm(st, NULL); for (member = info; member; member = member->next) { int result; @@ -8352,6 +9724,43 @@ abort: return 0; } + +/****************************************************************************** + * function: imsm_create_metadata_update_for_size_change() + * Creates update for IMSM array for array size change. + * + ******************************************************************************/ +static int imsm_create_metadata_update_for_size_change( + struct supertype *st, + struct geo_params *geo, + struct imsm_update_size_change **updatep) +{ + struct intel_super *super = st->sb; + int update_memory_size = 0; + struct imsm_update_size_change *u = NULL; + + dprintf("imsm_create_metadata_update_for_size_change(enter)" + " New size = %llu\n", geo->size); + + /* size of all update data without anchor */ + update_memory_size = sizeof(struct imsm_update_size_change); + + u = calloc(1, update_memory_size); + if (u == NULL) { + dprintf("error: cannot get memory for " + "imsm_create_metadata_update_for_size_change\n"); + return 0; + } + u->type = update_size_change; + u->subdev = super->current_vol; + u->new_size = geo->size; + + dprintf("imsm: reshape update preparation : OK\n"); + *updatep = u; + + return update_memory_size; +} + /****************************************************************************** * function: imsm_create_metadata_update_for_migration() * Creates update for IMSM array. @@ -8392,7 +9801,7 @@ static int imsm_create_metadata_update_for_migration( if (dev) { struct imsm_map *map; - map = get_imsm_map(dev, 0); + map = get_imsm_map(dev, MAP_0); if (map) { int current_chunk_size = __le16_to_cpu(map->blocks_per_strip) / 2; @@ -8451,16 +9860,26 @@ static void imsm_update_metadata_locally(struct supertype *st, * Function: imsm_analyze_change * Description: Function analyze change for single volume * and validate if transition is supported -* Parameters: Geometry parameters, supertype structure +* Parameters: Geometry parameters, supertype structure, +* metadata change direction (apply/rollback) * Returns: Operation type code on success, -1 if fail ****************************************************************************/ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, - struct geo_params *geo) + struct geo_params *geo, + int direction) { struct mdinfo info; int change = -1; int check_devs = 0; int chunk; + /* number of added/removed disks in operation result */ + int devNumChange = 0; + /* imsm compatible layout value for array geometry verification */ + int imsm_layout = -1; + int data_disks; + struct imsm_dev *dev; + struct intel_super *super; + long long current_size; getinfo_super_imsm_volume(st, &info, NULL); if ((geo->level != info.array.level) && @@ -8478,23 +9897,23 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, change = -1; goto analyse_change_exit; } + imsm_layout = geo->layout; check_devs = 1; - } - if (geo->level == 10) { + devNumChange = 1; /* parity disk added */ + } else if (geo->level == 10) { change = CH_TAKEOVER; check_devs = 1; + devNumChange = 2; /* two mirrors added */ + imsm_layout = 0x102; /* imsm supported layout */ } break; case 1: - if (geo->level == 0) { - change = CH_TAKEOVER; - check_devs = 1; - } - break; case 10: if (geo->level == 0) { change = CH_TAKEOVER; check_devs = 1; + devNumChange = -(geo->raid_disks/2); + imsm_layout = 0; /* imsm raid0 layout */ } break; } @@ -8529,8 +9948,11 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, change = -1; goto analyse_change_exit; } - } else + } else { geo->layout = info.array.layout; + if (imsm_layout == -1) + imsm_layout = info.array.layout; + } if ((geo->chunksize > 0) && (geo->chunksize != UnSet) && (geo->chunksize != info.array.chunk_size)) @@ -8539,10 +9961,89 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, geo->chunksize = info.array.chunk_size; chunk = geo->chunksize / 1024; + + super = st->sb; + dev = get_imsm_dev(super, super->current_vol); + data_disks = imsm_num_data_members(dev , MAP_0); + /* compute current size per disk member + */ + current_size = info.custom_array_size / data_disks; + + if (geo->size > 0) { + /* align component size + */ + geo->size = imsm_component_size_aligment_check( + get_imsm_raid_level(dev->vol.map), + chunk * 1024, + geo->size * 2); + } + + if ((current_size != geo->size) && (geo->size >= 0)) { + if (change != -1) { + fprintf(stderr, + Name " Error. Size change should be the only " + "one at a time.\n"); + change = -1; + goto analyse_change_exit; + } + if ((super->current_vol + 1) != super->anchor->num_raid_devs) { + fprintf(stderr, + Name " Error. The last volume in container " + "can be expanded only (%i/%i).\n", + super->current_vol, st->devnum); + goto analyse_change_exit; + } + if (geo->size == 0) { + /* requested size change to the maximum available size + */ + unsigned long long freesize; + int rv; + + rv = imsm_get_free_size(st, dev->vol.map->num_members, + 0, chunk, &freesize); + if (rv == 0) { + fprintf(stderr, Name " Error. Cannot find " + "maximum available space.\n"); + change = -1; + goto analyse_change_exit; + } + geo->size = freesize + current_size; + + /* align component size + */ + geo->size = imsm_component_size_aligment_check( + get_imsm_raid_level(dev->vol.map), + chunk * 1024, + geo->size); + } + + if ((direction == ROLLBACK_METADATA_CHANGES)) { + /* accept size for rollback only + */ + } else { + /* round size due to metadata compatibility + */ + geo->size = (geo->size >> SECT_PER_MB_SHIFT) + << SECT_PER_MB_SHIFT; + dprintf("Prepare update for size change to %llu\n", + geo->size ); + if (current_size >= geo->size) { + fprintf(stderr, + Name " Error. Size expanssion is " + "supported only (current size is %llu, " + "requested size /rounded/ is %llu).\n", + current_size, geo->size); + goto analyse_change_exit; + } + } + geo->size *= data_disks; + geo->raid_disks = dev->vol.map->num_members; + change = CH_ARRAY_SIZE; + } if (!validate_geometry_imsm(st, geo->level, - geo->layout, - geo->raid_disks, + imsm_layout, + geo->raid_disks + devNumChange, &chunk, geo->size, 0, 0, 1)) @@ -8563,7 +10064,12 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, } analyse_change_exit: - + if ((direction == ROLLBACK_METADATA_CHANGES) && + ((change == CH_MIGRATION) || (change == CH_TAKEOVER))) { + dprintf("imsm: Metadata changes rollback is not supported for " + "migration and takeover operations.\n"); + change = -1; + } return change; } @@ -8603,7 +10109,7 @@ int imsm_takeover(struct supertype *st, struct geo_params *geo) static int imsm_reshape_super(struct supertype *st, long long size, int level, int layout, int chunksize, int raid_disks, int delta_disks, char *backup, char *dev, - int verbose) + int direction, int verbose) { int ret_val = 1; struct geo_params geo; @@ -8634,7 +10140,7 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level, int old_raid_disks = 0; if (imsm_reshape_is_allowed_on_container( - st, &geo, &old_raid_disks)) { + st, &geo, &old_raid_disks, direction)) { struct imsm_update_reshape *u = NULL; int len; @@ -8683,7 +10189,7 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level, goto exit_imsm_reshape_super; } super->current_vol = dev->index; - change = imsm_analyze_change(st, &geo); + change = imsm_analyze_change(st, &geo, direction); switch (change) { case CH_TAKEOVER: ret_val = imsm_takeover(st, &geo); @@ -8708,6 +10214,26 @@ static int imsm_reshape_super(struct supertype *st, long long size, int level, free(u); } break; + case CH_ARRAY_SIZE: { + struct imsm_update_size_change *u = NULL; + int len = + imsm_create_metadata_update_for_size_change( + st, &geo, &u); + if (len < 1) { + dprintf("imsm: " + "Cannot prepare update\n"); + break; + } + ret_val = 0; + /* update metadata locally */ + imsm_update_metadata_locally(st, u, len); + /* and possibly remotely */ + if (st->update_tail) + append_metadata_update(st, u, len); + else + free(u); + } + break; default: ret_val = 1; } @@ -8902,12 +10428,12 @@ static int imsm_manage_reshape( goto abort; } - map_src = get_imsm_map(dev, 1); + map_src = get_imsm_map(dev, MAP_1); if (map_src == NULL) goto abort; - ndata = imsm_num_data_members(dev, 0); - odata = imsm_num_data_members(dev, 1); + ndata = imsm_num_data_members(dev, MAP_0); + odata = imsm_num_data_members(dev, MAP_1); chunk = __le16_to_cpu(map_src->blocks_per_strip) * 512; old_data_stripe_length = odata * chunk; @@ -8923,6 +10449,18 @@ static int imsm_manage_reshape( "are present in copy area.\n"); goto abort; } + /* Save checkpoint to update migration record for current + * reshape position (in md). It can be farther than current + * reshape position in metadata. + */ + if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) { + /* ignore error == 2, this can mean end of reshape here + */ + dprintf("imsm: Cannot write checkpoint to " + "migration record (UNIT_SRC_NORMAL, " + "initial save)\n"); + goto abort; + } } /* size for data */