X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=super-intel.c;h=95620649dea09b3de8909e4b9b0fc63970b344e8;hb=895ffd992954069e4ea67efb8a85bb0fd72c3707;hp=52011e5b44a40fdc948ac59d4fc6bf9c14dee56a;hpb=611d95290dd41d73bd8f9cc06f7ec293a40b819e;p=thirdparty%2Fmdadm.git diff --git a/super-intel.c b/super-intel.c index 52011e5b..95620649 100644 --- a/super-intel.c +++ b/super-intel.c @@ -96,6 +96,19 @@ * mutliple PPL area */ +/* + * This macro let's us ensure that no-one accidentally + * changes the size of a struct + */ +#define ASSERT_SIZE(_struct, size) \ +static inline void __assert_size_##_struct(void) \ +{ \ + switch (0) { \ + case 0: break; \ + case (sizeof(struct _struct) == size): break; \ + } \ +} + /* Disk configuration info. */ #define IMSM_MAX_DEVICES 255 struct imsm_disk { @@ -112,6 +125,7 @@ struct imsm_disk { #define IMSM_DISK_FILLERS 3 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */ }; +ASSERT_SIZE(imsm_disk, 48) /* map selector for map managment */ @@ -146,7 +160,8 @@ struct imsm_map { __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members], * top byte contains some flags */ -} __attribute__ ((packed)); +}; +ASSERT_SIZE(imsm_map, 52) struct imsm_vol { __u32 curr_migr_unit; @@ -169,7 +184,8 @@ struct imsm_vol { __u32 filler[4]; struct imsm_map map[1]; /* here comes another one if migr_state */ -} __attribute__ ((packed)); +}; +ASSERT_SIZE(imsm_vol, 84) struct imsm_dev { __u8 volume[MAX_RAID_SERIAL_LEN]; @@ -220,7 +236,8 @@ struct imsm_dev { #define IMSM_DEV_FILLERS 3 __u32 filler[IMSM_DEV_FILLERS]; struct imsm_vol vol; -} __attribute__ ((packed)); +}; +ASSERT_SIZE(imsm_dev, 164) struct imsm_super { __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */ @@ -243,12 +260,14 @@ struct imsm_super { * (starts at 1) */ __u16 filler1; /* 0x4E - 0x4F */ -#define IMSM_FILLERS 34 - __u32 filler[IMSM_FILLERS]; /* 0x50 - 0xD7 RAID_MPB_FILLERS */ + __u64 creation_time; /* 0x50 - 0x57 Array creation time */ +#define IMSM_FILLERS 32 + __u32 filler[IMSM_FILLERS]; /* 0x58 - 0xD7 RAID_MPB_FILLERS */ struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */ /* here comes imsm_dev[num_raid_devs] */ /* here comes BBM logs */ -} __attribute__ ((packed)); +}; +ASSERT_SIZE(imsm_super, 264) #define BBM_LOG_MAX_ENTRIES 254 #define BBM_LOG_MAX_LBA_ENTRY_VAL 256 /* Represents 256 LBAs */ @@ -269,7 +288,8 @@ struct bbm_log { __u32 signature; /* 0xABADB10C */ __u32 entry_count; struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES]; -} __attribute__ ((__packed__)); +}; +ASSERT_SIZE(bbm_log, 2040) static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" }; @@ -296,7 +316,7 @@ struct migr_record { __u32 rec_status; /* Status used to determine how to restart * migration in case it aborts * in some fashion */ - __u32 curr_migr_unit; /* 0..numMigrUnits-1 */ + __u32 curr_migr_unit_lo; /* 0..numMigrUnits-1 */ __u32 family_num; /* Family number of MPB * containing the RaidDev * that is migrating */ @@ -306,17 +326,25 @@ struct migr_record { __u32 dest_depth_per_unit; /* Num member blocks each destMap * member disk * advances per unit-of-operation */ - __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */ - __u32 dest_1st_member_lba; /* First member lba on first - * stripe of destination */ - __u32 num_migr_units; /* Total num migration units-of-op */ + __u32 ckpt_area_pba_lo; /* Pba of first block of ckpt copy area */ + __u32 dest_1st_member_lba_lo; /* First member lba on first + * stripe of destination */ + __u32 num_migr_units_lo; /* Total num migration units-of-op */ __u32 post_migr_vol_cap; /* Size of volume after * migration completes */ __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */ __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the * migration ckpt record was read from * (for recovered migrations) */ -} __attribute__ ((__packed__)); + __u32 curr_migr_unit_hi; /* 0..numMigrUnits-1 high order 32 bits */ + __u32 ckpt_area_pba_hi; /* Pba of first block of ckpt copy area + * high order 32 bits */ + __u32 dest_1st_member_lba_hi; /* First member lba on first stripe of + * destination - high order 32 bits */ + __u32 num_migr_units_hi; /* Total num migration units-of-op + * high order 32 bits */ +}; +ASSERT_SIZE(migr_record, 64) struct md_list { /* usage marker: @@ -1158,12 +1186,12 @@ static int count_memberships(struct dl *dl, struct intel_super *super) static __u32 imsm_min_reserved_sectors(struct intel_super *super); -static int split_ull(unsigned long long n, __u32 *lo, __u32 *hi) +static int split_ull(unsigned long long n, void *lo, void *hi) { if (lo == 0 || hi == 0) return 1; - *lo = __le32_to_cpu((unsigned)n); - *hi = __le32_to_cpu((unsigned)(n >> 32)); + __put_unaligned32(__cpu_to_le32((__u32)n), lo); + __put_unaligned32(__cpu_to_le32((n >> 32)), hi); return 0; } @@ -1201,6 +1229,45 @@ static unsigned long long num_data_stripes(struct imsm_map *map) return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi); } +static unsigned long long imsm_dev_size(struct imsm_dev *dev) +{ + if (dev == NULL) + return 0; + return join_u32(dev->size_low, dev->size_high); +} + +static unsigned long long migr_chkp_area_pba(struct migr_record *migr_rec) +{ + if (migr_rec == NULL) + return 0; + return join_u32(migr_rec->ckpt_area_pba_lo, + migr_rec->ckpt_area_pba_hi); +} + +static unsigned long long current_migr_unit(struct migr_record *migr_rec) +{ + if (migr_rec == NULL) + return 0; + return join_u32(migr_rec->curr_migr_unit_lo, + migr_rec->curr_migr_unit_hi); +} + +static unsigned long long migr_dest_1st_member_lba(struct migr_record *migr_rec) +{ + if (migr_rec == NULL) + return 0; + return join_u32(migr_rec->dest_1st_member_lba_lo, + migr_rec->dest_1st_member_lba_hi); +} + +static unsigned long long get_num_migr_units(struct migr_record *migr_rec) +{ + if (migr_rec == NULL) + return 0; + return join_u32(migr_rec->num_migr_units_lo, + migr_rec->num_migr_units_hi); +} + static void set_total_blocks(struct imsm_disk *disk, unsigned long long n) { split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi); @@ -1221,7 +1288,54 @@ static void set_num_data_stripes(struct imsm_map *map, unsigned long long n) split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi); } -static struct extent *get_extents(struct intel_super *super, struct dl *dl) +static void set_imsm_dev_size(struct imsm_dev *dev, unsigned long long n) +{ + split_ull(n, &dev->size_low, &dev->size_high); +} + +static void set_migr_chkp_area_pba(struct migr_record *migr_rec, + unsigned long long n) +{ + split_ull(n, &migr_rec->ckpt_area_pba_lo, &migr_rec->ckpt_area_pba_hi); +} + +static void set_current_migr_unit(struct migr_record *migr_rec, + unsigned long long n) +{ + split_ull(n, &migr_rec->curr_migr_unit_lo, + &migr_rec->curr_migr_unit_hi); +} + +static void set_migr_dest_1st_member_lba(struct migr_record *migr_rec, + unsigned long long n) +{ + split_ull(n, &migr_rec->dest_1st_member_lba_lo, + &migr_rec->dest_1st_member_lba_hi); +} + +static void set_num_migr_units(struct migr_record *migr_rec, + unsigned long long n) +{ + split_ull(n, &migr_rec->num_migr_units_lo, + &migr_rec->num_migr_units_hi); +} + +static unsigned long long per_dev_array_size(struct imsm_map *map) +{ + unsigned long long array_size = 0; + + if (map == NULL) + return array_size; + + array_size = num_data_stripes(map) * map->blocks_per_strip; + if (get_imsm_raid_level(map) == 1 || get_imsm_raid_level(map) == 10) + array_size *= 2; + + return array_size; +} + +static struct extent *get_extents(struct intel_super *super, struct dl *dl, + int get_minimal_reservation) { /* find a list of used extents on the given physical device */ struct extent *rv, *e; @@ -1233,7 +1347,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) * regardless of whether the OROM has assigned sectors from the * IMSM_RESERVED_SECTORS region */ - if (dl->index == -1) + if (dl->index == -1 || get_minimal_reservation) reservation = imsm_min_reserved_sectors(super); else reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; @@ -1247,7 +1361,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) if (get_imsm_disk_slot(map, dl->index) >= 0) { e->start = pba_of_lba0(map); - e->size = blocks_per_member(map); + e->size = per_dev_array_size(map); e++; } } @@ -1294,7 +1408,7 @@ static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl) if (dl->index == -1) return MPB_SECTOR_CNT; - e = get_extents(super, dl); + e = get_extents(super, dl, 0); if (!e) return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; @@ -1386,7 +1500,7 @@ static __u32 imsm_min_reserved_sectors(struct intel_super *super) return rv; /* find last lba used by subarrays on the smallest active disk */ - e = get_extents(super, dl_min); + e = get_extents(super, dl_min, 0); if (!e) return rv; for (i = 0; e[i].size; i++) @@ -1427,7 +1541,7 @@ int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c) if (!dl) return -EINVAL; /* find last lba used by subarrays */ - e = get_extents(super, dl); + e = get_extents(super, dl, 0); if (!e) return -EINVAL; for (i = 0; e[i].size; i++) @@ -1465,6 +1579,7 @@ static void print_imsm_dev(struct intel_super *super, printf("\n"); printf("[%.16s]:\n", dev->volume); + printf(" Subarray : %d\n", super->current_vol); printf(" UUID : %s\n", uuid); printf(" RAID Level : %d", get_imsm_raid_level(map)); if (map2) @@ -1503,9 +1618,7 @@ static void print_imsm_dev(struct intel_super *super, } else printf(" This Slot : ?\n"); printf(" Sector Size : %u\n", super->sector_size); - sz = __le32_to_cpu(dev->size_high); - sz <<= 32; - sz += __le32_to_cpu(dev->size_low); + sz = imsm_dev_size(dev); printf(" Array Size : %llu%s\n", (unsigned long long)sz * 512 / super->sector_size, human_size(sz * 512)); @@ -1571,6 +1684,8 @@ static void print_imsm_dev(struct intel_super *super, printf("Multiple PPLs on journaling drive\n"); else printf("\n", dev->rwh_policy); + + printf(" Volume ID : %u\n", dev->my_vol_raid_dev_num); } static void print_imsm_disk(struct imsm_disk *disk, @@ -1605,12 +1720,14 @@ void convert_to_4k_imsm_migr_rec(struct intel_super *super) struct migr_record *migr_rec = super->migr_rec; migr_rec->blocks_per_unit /= IMSM_4K_DIV; - migr_rec->ckpt_area_pba /= IMSM_4K_DIV; - migr_rec->dest_1st_member_lba /= IMSM_4K_DIV; migr_rec->dest_depth_per_unit /= IMSM_4K_DIV; split_ull((join_u32(migr_rec->post_migr_vol_cap, migr_rec->post_migr_vol_cap_hi) / IMSM_4K_DIV), &migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi); + set_migr_chkp_area_pba(migr_rec, + migr_chkp_area_pba(migr_rec) / IMSM_4K_DIV); + set_migr_dest_1st_member_lba(migr_rec, + migr_dest_1st_member_lba(migr_rec) / IMSM_4K_DIV); } void convert_to_4k_imsm_disk(struct imsm_disk *disk) @@ -1634,8 +1751,7 @@ void convert_to_4k(struct intel_super *super) struct imsm_dev *dev = __get_imsm_dev(mpb, i); struct imsm_map *map = get_imsm_map(dev, MAP_0); /* dev */ - split_ull((join_u32(dev->size_low, dev->size_high)/IMSM_4K_DIV), - &dev->size_low, &dev->size_high); + set_imsm_dev_size(dev, imsm_dev_size(dev)/IMSM_4K_DIV); dev->vol.curr_migr_unit /= IMSM_4K_DIV; /* map0 */ @@ -1704,8 +1820,8 @@ void examine_migr_rec_imsm(struct intel_super *super) printf("Normal\n"); else printf("Contains Data\n"); - printf(" Current Unit : %u\n", - __le32_to_cpu(migr_rec->curr_migr_unit)); + printf(" Current Unit : %llu\n", + current_migr_unit(migr_rec)); printf(" Family : %u\n", __le32_to_cpu(migr_rec->family_num)); printf(" Ascending : %u\n", @@ -1714,16 +1830,15 @@ void examine_migr_rec_imsm(struct intel_super *super) __le32_to_cpu(migr_rec->blocks_per_unit)); printf(" Dest. Depth Per Unit : %u\n", __le32_to_cpu(migr_rec->dest_depth_per_unit)); - printf(" Checkpoint Area pba : %u\n", - __le32_to_cpu(migr_rec->ckpt_area_pba)); - printf(" First member lba : %u\n", - __le32_to_cpu(migr_rec->dest_1st_member_lba)); - printf(" Total Number of Units : %u\n", - __le32_to_cpu(migr_rec->num_migr_units)); - printf(" Size of volume : %u\n", - __le32_to_cpu(migr_rec->post_migr_vol_cap)); - printf(" Expansion space for LBA64 : %u\n", - __le32_to_cpu(migr_rec->post_migr_vol_cap_hi)); + printf(" Checkpoint Area pba : %llu\n", + migr_chkp_area_pba(migr_rec)); + printf(" First member lba : %llu\n", + migr_dest_1st_member_lba(migr_rec)); + printf(" Total Number of Units : %llu\n", + get_num_migr_units(migr_rec)); + printf(" Size of volume : %llu\n", + join_u32(migr_rec->post_migr_vol_cap, + migr_rec->post_migr_vol_cap_hi)); printf(" Record was read from : %u\n", __le32_to_cpu(migr_rec->ckpt_read_disk_num)); @@ -1736,13 +1851,15 @@ void convert_from_4k_imsm_migr_rec(struct intel_super *super) struct migr_record *migr_rec = super->migr_rec; migr_rec->blocks_per_unit *= IMSM_4K_DIV; - migr_rec->ckpt_area_pba *= IMSM_4K_DIV; - migr_rec->dest_1st_member_lba *= IMSM_4K_DIV; migr_rec->dest_depth_per_unit *= IMSM_4K_DIV; split_ull((join_u32(migr_rec->post_migr_vol_cap, migr_rec->post_migr_vol_cap_hi) * IMSM_4K_DIV), &migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi); + set_migr_chkp_area_pba(migr_rec, + migr_chkp_area_pba(migr_rec) * IMSM_4K_DIV); + set_migr_dest_1st_member_lba(migr_rec, + migr_dest_1st_member_lba(migr_rec) * IMSM_4K_DIV); } void convert_from_4k(struct intel_super *super) @@ -1762,8 +1879,7 @@ void convert_from_4k(struct intel_super *super) struct imsm_dev *dev = __get_imsm_dev(mpb, i); struct imsm_map *map = get_imsm_map(dev, MAP_0); /* dev */ - split_ull((join_u32(dev->size_low, dev->size_high)*IMSM_4K_DIV), - &dev->size_low, &dev->size_high); + set_imsm_dev_size(dev, imsm_dev_size(dev)*IMSM_4K_DIV); dev->vol.curr_migr_unit *= IMSM_4K_DIV; /* map0 */ @@ -1902,15 +2018,18 @@ static void examine_super_imsm(struct supertype *st, char *homehost) __u32 sum; __u32 reserved = imsm_reserved_sectors(super, super->disks); struct dl *dl; + time_t creation_time; strncpy(str, (char *)mpb->sig, MPB_SIG_LEN); str[MPB_SIG_LEN-1] = '\0'; printf(" Magic : %s\n", str); - snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb)); printf(" Version : %s\n", get_imsm_version(mpb)); printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num)); printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num)); printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num)); + creation_time = __le64_to_cpu(mpb->creation_time); + printf(" Creation Time : %.24s\n", + creation_time ? ctime(&creation_time) : "Unknown"); printf(" Attributes : "); if (imsm_check_attributes(mpb->attributes)) printf("All supported\n"); @@ -2015,83 +2134,46 @@ static void export_examine_super_imsm(struct supertype *st) printf("MD_LEVEL=container\n"); printf("MD_UUID=%s\n", nbuf+5); printf("MD_DEVICES=%u\n", mpb->num_disks); + printf("MD_CREATION_TIME=%llu\n", __le64_to_cpu(mpb->creation_time)); } -static int copy_metadata_imsm(struct supertype *st, int from, int to) -{ - /* The second last sector of the device contains - * the "struct imsm_super" metadata. - * This contains mpb_size which is the size in bytes of the - * extended metadata. This is located immediately before - * the imsm_super. - * We want to read all that, plus the last sector which - * may contain a migration record, and write it all - * to the target. - */ - void *buf; - unsigned long long dsize, offset; - int sectors; - struct imsm_super *sb; - struct intel_super *super = st->sb; - unsigned int sector_size = super->sector_size; - unsigned int written = 0; - - if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE) != 0) - return 1; - - if (!get_dev_size(from, NULL, &dsize)) - goto err; - - if (lseek64(from, dsize-(2*sector_size), 0) < 0) - goto err; - if ((unsigned int)read(from, buf, sector_size) != sector_size) - goto err; - sb = buf; - if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) - goto err; - - sectors = mpb_sectors(sb, sector_size) + 2; - offset = dsize - sectors * sector_size; - if (lseek64(from, offset, 0) < 0 || - lseek64(to, offset, 0) < 0) - goto err; - while (written < sectors * sector_size) { - int n = sectors*sector_size - written; - if (n > 4096) - n = 4096; - if (read(from, buf, n) != n) - goto err; - if (write(to, buf, n) != n) - goto err; - written += n; - } - free(buf); - return 0; -err: - free(buf); - return 1; -} - -static void detail_super_imsm(struct supertype *st, char *homehost) +static void detail_super_imsm(struct supertype *st, char *homehost, + char *subarray) { struct mdinfo info; char nbuf[64]; + struct intel_super *super = st->sb; + int temp_vol = super->current_vol; + + if (subarray) + super->current_vol = strtoul(subarray, NULL, 10); getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); printf("\n UUID : %s\n", nbuf + 5); + + super->current_vol = temp_vol; } -static void brief_detail_super_imsm(struct supertype *st) +static void brief_detail_super_imsm(struct supertype *st, char *subarray) { struct mdinfo info; char nbuf[64]; + struct intel_super *super = st->sb; + int temp_vol = super->current_vol; + + if (subarray) + super->current_vol = strtoul(subarray, NULL, 10); + getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); printf(" UUID=%s", nbuf + 5); + + super->current_vol = temp_vol; } -static int imsm_read_serial(int fd, char *devname, __u8 *serial); +static int imsm_read_serial(int fd, char *devname, __u8 *serial, + size_t serial_buf_len); static void fd2devname(int fd, char *name); static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose) @@ -2237,8 +2319,9 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b else { fd2devname(fd, buf); printf(" Port%d : %s", port, buf); - if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0) - printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf); + if (imsm_read_serial(fd, NULL, (__u8 *)buf, + sizeof(buf)) == 0) + printf(" (%s)\n", buf); else printf(" ()\n"); close(fd); @@ -2261,52 +2344,45 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b return err; } -static int print_vmd_attached_devs(struct sys_dev *hba) +static int print_nvme_info(struct sys_dev *hba) { + char buf[1024]; struct dirent *ent; DIR *dir; - char path[292]; - char link[256]; - char *c, *rp; - - if (hba->type != SYS_DEV_VMD) - return 1; + char *rp; + int fd; - /* scroll through /sys/dev/block looking for devices attached to - * this hba - */ - dir = opendir("/sys/bus/pci/drivers/nvme"); + dir = opendir("/sys/block/"); if (!dir) return 1; for (ent = readdir(dir); ent; ent = readdir(dir)) { - int n; - - /* is 'ent' a device? check that the 'subsystem' link exists and - * that its target matches 'bus' - */ - sprintf(path, "/sys/bus/pci/drivers/nvme/%s/subsystem", - ent->d_name); - n = readlink(path, link, sizeof(link)); - if (n < 0 || n >= (int)sizeof(link)) - continue; - link[n] = '\0'; - c = strrchr(link, '/'); - if (!c) - continue; - if (strncmp("pci", c+1, strlen("pci")) != 0) - continue; - - sprintf(path, "/sys/bus/pci/drivers/nvme/%s", ent->d_name); - - rp = realpath(path, NULL); - if (!rp) - continue; + if (strstr(ent->d_name, "nvme")) { + sprintf(buf, "/sys/block/%s", ent->d_name); + rp = realpath(buf, NULL); + if (!rp) + continue; + if (path_attached_to_hba(rp, hba->path)) { + fd = open_dev(ent->d_name); + if (fd < 0) { + free(rp); + continue; + } - if (path_attached_to_hba(rp, hba->path)) { - printf(" NVMe under VMD : %s\n", rp); + fd2devname(fd, buf); + if (hba->type == SYS_DEV_VMD) + printf(" NVMe under VMD : %s", buf); + else if (hba->type == SYS_DEV_NVME) + printf(" NVMe Device : %s", buf); + if (!imsm_read_serial(fd, NULL, (__u8 *)buf, + sizeof(buf))) + printf(" (%s)\n", buf); + else + printf("()\n"); + close(fd); + } + free(rp); } - free(rp); } closedir(dir); @@ -2521,7 +2597,7 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle char buf[PATH_MAX]; printf(" I/O Controller : %s (%s)\n", vmd_domain_to_controller(hba, buf), get_sys_dev_type(hba->type)); - if (print_vmd_attached_devs(hba)) { + if (print_nvme_info(hba)) { if (verbose > 0) pr_err("failed to get devices attached to VMD domain.\n"); result |= 2; @@ -2536,7 +2612,7 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle if (entry->type == SYS_DEV_NVME) { for (hba = list; hba; hba = hba->next) { if (hba->type == SYS_DEV_NVME) - printf(" NVMe Device : %s\n", hba->path); + print_nvme_info(hba); } printf("\n"); continue; @@ -2759,13 +2835,11 @@ static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev) return num_stripes_per_unit_resync(dev); } -static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map) +static __u8 imsm_num_data_members(struct imsm_map *map) { /* named 'imsm_' because raid0, raid1 and raid10 * counter-intuitively have the same number of data disks */ - struct imsm_map *map = get_imsm_map(dev, second_map); - switch (get_imsm_raid_level(map)) { case 0: return map->num_members; @@ -2781,6 +2855,36 @@ static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map) } } +static unsigned long long calc_component_size(struct imsm_map *map, + struct imsm_dev *dev) +{ + unsigned long long component_size; + unsigned long long dev_size = imsm_dev_size(dev); + long long calc_dev_size = 0; + unsigned int member_disks = imsm_num_data_members(map); + + if (member_disks == 0) + return 0; + + component_size = per_dev_array_size(map); + calc_dev_size = component_size * member_disks; + + /* Component size is rounded to 1MB so difference between size from + * metadata and size calculated from num_data_stripes equals up to + * 2048 blocks per each device. If the difference is higher it means + * that array size was expanded and num_data_stripes was not updated. + */ + if (llabs(calc_dev_size - (long long)dev_size) > + (1 << SECT_PER_MB_SHIFT) * member_disks) { + component_size = dev_size / member_disks; + dprintf("Invalid num_data_stripes in metadata; expected=%llu, found=%llu\n", + component_size / map->blocks_per_strip, + num_data_stripes(map)); + } + + return component_size; +} + static __u32 parity_segment_depth(struct imsm_dev *dev) { struct imsm_map *map = get_imsm_map(dev, MAP_0); @@ -2854,7 +2958,7 @@ static __u64 blocks_per_migr_unit(struct intel_super *super, */ stripes_per_unit = num_stripes_per_unit_resync(dev); migr_chunk = migr_strip_blocks_resync(dev); - disks = imsm_num_data_members(dev, MAP_0); + disks = imsm_num_data_members(map); blocks_per_unit = stripes_per_unit * migr_chunk * disks; stripe = __le16_to_cpu(map->blocks_per_strip) * disks; segment = blocks_per_unit / stripe; @@ -3047,7 +3151,7 @@ static int imsm_create_metadata_checkpoint_update( return 0; } (*u)->type = update_general_migration_checkpoint; - (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit); + (*u)->curr_migr_unit = current_migr_unit(super->migr_rec); dprintf("prepared for %u\n", (*u)->curr_migr_unit); return update_memory_size; @@ -3178,27 +3282,27 @@ int imsm_reshape_blocks_arrays_changes(struct intel_super *super) } return rv; } -static unsigned long long imsm_component_size_aligment_check(int level, +static unsigned long long imsm_component_size_alignment_check(int level, int chunk_size, unsigned int sector_size, unsigned long long component_size) { - unsigned int component_size_alligment; + unsigned int component_size_alignment; - /* check component size aligment + /* check component size alignment */ - component_size_alligment = component_size % (chunk_size/sector_size); + component_size_alignment = component_size % (chunk_size/sector_size); - dprintf("(Level: %i, chunk_size = %i, component_size = %llu), component_size_alligment = %u\n", + dprintf("(Level: %i, chunk_size = %i, component_size = %llu), component_size_alignment = %u\n", level, chunk_size, component_size, - component_size_alligment); + component_size_alignment); - if (component_size_alligment && (level != 1) && (level != UnSet)) { - dprintf("imsm: reported component size alligned from %llu ", + if (component_size_alignment && (level != 1) && (level != UnSet)) { + dprintf("imsm: reported component size aligned from %llu ", component_size); - component_size -= component_size_alligment; + component_size -= component_size_alignment; dprintf_cont("to %llu (%i).\n", - component_size, component_size_alligment); + component_size, component_size_alignment); } return component_size; @@ -3240,9 +3344,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, info->array.chunk_size = __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9; info->array.state = !(dev->vol.dirty & RAIDVOL_DIRTY); - info->custom_array_size = __le32_to_cpu(dev->size_high); - info->custom_array_size <<= 32; - info->custom_array_size |= __le32_to_cpu(dev->size_low); + info->custom_array_size = imsm_dev_size(dev); info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb); if (is_gen_migration(dev)) { @@ -3302,15 +3404,8 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, } info->data_offset = pba_of_lba0(map_to_analyse); - - if (info->array.level == 5) { - info->component_size = num_data_stripes(map_to_analyse) * - map_to_analyse->blocks_per_strip; - } else { - info->component_size = blocks_per_member(map_to_analyse); - } - - info->component_size = imsm_component_size_aligment_check( + info->component_size = calc_component_size(map, dev); + info->component_size = imsm_component_size_alignment_check( info->array.level, info->array.chunk_size, super->sector_size, @@ -3357,13 +3452,12 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, case MIGR_GEN_MIGR: { __u64 blocks_per_unit = blocks_per_migr_unit(super, dev); - __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit); - unsigned long long array_blocks; + __u64 units = current_migr_unit(migr_rec); int used_disks; if (__le32_to_cpu(migr_rec->ascending_migr) && (units < - (__le32_to_cpu(migr_rec->num_migr_units)-1)) && + (get_num_migr_units(migr_rec)-1)) && (super->migr_rec->rec_status == __cpu_to_le32(UNIT_SRC_IN_CP_AREA))) units++; @@ -3375,14 +3469,10 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, (unsigned long long)blocks_per_unit, info->reshape_progress); - used_disks = imsm_num_data_members(dev, MAP_1); + used_disks = imsm_num_data_members(prev_map); if (used_disks > 0) { - array_blocks = blocks_per_member(map) * + info->custom_array_size = per_dev_array_size(map) * used_disks; - info->custom_array_size = - round_size_to_mb(array_blocks, - used_disks); - } } case MIGR_VERIFY: @@ -3882,11 +3972,11 @@ static int nvme_get_serial(int fd, void *buf, size_t buf_len) extern int scsi_get_serial(int fd, void *buf, size_t buf_len); static int imsm_read_serial(int fd, char *devname, - __u8 serial[MAX_RAID_SERIAL_LEN]) + __u8 *serial, size_t serial_buf_len) { char buf[50]; int rv; - int len; + size_t len; char *dest; char *src; unsigned int i; @@ -3929,13 +4019,13 @@ static int imsm_read_serial(int fd, char *devname, len = dest - buf; dest = buf; - /* truncate leading characters */ - if (len > MAX_RAID_SERIAL_LEN) { - dest += len - MAX_RAID_SERIAL_LEN; - len = MAX_RAID_SERIAL_LEN; + if (len > serial_buf_len) { + /* truncate leading characters */ + dest += len - serial_buf_len; + len = serial_buf_len; } - memset(serial, 0, MAX_RAID_SERIAL_LEN); + memset(serial, 0, serial_buf_len); memcpy(serial, dest, len); return 0; @@ -3990,7 +4080,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) char name[40]; __u8 serial[MAX_RAID_SERIAL_LEN]; - rv = imsm_read_serial(fd, devname, serial); + rv = imsm_read_serial(fd, devname, serial, MAX_RAID_SERIAL_LEN); if (rv != 0) return 2; @@ -4483,6 +4573,11 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de struct sys_dev *hba_name; int rv = 0; + if (fd >= 0 && test_partition(fd)) { + pr_err("imsm: %s is a partition, cannot be used in IMSM\n", + devname); + return 1; + } if (fd < 0 || check_env("IMSM_NO_PLATFORM")) { super->orom = NULL; super->hba = NULL; @@ -5245,10 +5340,22 @@ static int check_name(struct intel_super *super, char *name, int quiet) { struct imsm_super *mpb = super->anchor; char *reason = NULL; + char *start = name; + size_t len = strlen(name); int i; - if (strlen(name) > MAX_RAID_SERIAL_LEN) + if (len > 0) { + while (isspace(start[len - 1])) + start[--len] = 0; + while (*start && isspace(*start)) + ++start, --len; + memmove(name, start, len + 1); + } + + if (len > MAX_RAID_SERIAL_LEN) reason = "must be 16 characters or less"; + else if (len == 0) + reason = "must be a non-empty string"; for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); @@ -5282,6 +5389,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, struct imsm_map *map; int idx = mpb->num_raid_devs; int i; + int namelen; unsigned long long array_blocks; size_t size_old, size_new; unsigned long long num_data_stripes; @@ -5361,7 +5469,12 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, return 0; dv = xmalloc(sizeof(*dv)); dev = xcalloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1)); - strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN); + /* + * Explicitly allow truncating to not confuse gcc's + * -Werror=stringop-truncation + */ + namelen = min((int) strlen(name), MAX_RAID_SERIAL_LEN); + memcpy(dev->volume, name, namelen); array_blocks = calc_array_size(info->level, info->raid_disks, info->layout, info->chunk_size, s->size * BLOCKS_PER_KB); @@ -5370,8 +5483,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, array_blocks = round_size_to_mb(array_blocks, data_disks); size_per_member = array_blocks / data_disks; - dev->size_low = __cpu_to_le32((__u32) array_blocks); - dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32)); + set_imsm_dev_size(dev, array_blocks); dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING); vol = &dev->vol; vol->migr_state = 0; @@ -5380,9 +5492,6 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, vol->curr_migr_unit = 0; map = get_imsm_map(dev, MAP_0); set_pba_of_lba0(map, super->create_offset); - set_blocks_per_member(map, info_to_blocks_per_member(info, - size_per_member / - BLOCKS_PER_KB)); map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info)); map->failed_disk_num = ~0; if (info->level > 0) @@ -5414,6 +5523,11 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, num_data_stripes /= map->num_domains; set_num_data_stripes(map, num_data_stripes); + size_per_member += NUM_BLOCKS_DIRTY_STRIPE_REGION; + set_blocks_per_member(map, info_to_blocks_per_member(info, + size_per_member / + BLOCKS_PER_KB)); + map->num_members = info->raid_disks; for (i = 0; i < map->num_members; i++) { /* initialized in add_to_super */ @@ -5569,6 +5683,11 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, return 1; } + if (mpb->num_disks == 0) + if (!get_dev_sector_size(dl->fd, dl->devname, + &super->sector_size)) + return 1; + if (!drive_validate_sector_size(super, dl)) { pr_err("Combining drives of different sector size in one volume is not allowed\n"); return 1; @@ -5647,6 +5766,7 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, sum += __gen_imsm_checksum(mpb); mpb->family_num = __cpu_to_le32(sum); mpb->orig_family_num = mpb->family_num; + mpb->creation_time = __cpu_to_le64((__u64)time(NULL)); } super->current_disk = dl; return 0; @@ -5669,7 +5789,7 @@ int mark_spare(struct dl *disk) return ret_val; ret_val = 0; - if (!imsm_read_serial(disk->fd, NULL, serial)) { + if (!imsm_read_serial(disk->fd, NULL, serial, MAX_RAID_SERIAL_LEN)) { /* Restore disk serial number, because takeover marks disk * as failed and adds to serial ':0' before it becomes * a spare disk. @@ -5684,6 +5804,9 @@ int mark_spare(struct dl *disk) return ret_val; } + +static int write_super_imsm_spare(struct intel_super *super, struct dl *d); + static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, int fd, char *devname, unsigned long long data_offset) @@ -5720,7 +5843,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, dd->fd = fd; dd->e = NULL; dd->action = DISK_ADD; - rv = imsm_read_serial(fd, devname, dd->serial); + rv = imsm_read_serial(fd, devname, dd->serial, MAX_RAID_SERIAL_LEN); if (rv) { pr_err("failed to retrieve scsi serial, aborting\n"); if (dd->devname) @@ -5813,9 +5936,13 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, dd->next = super->disk_mgmt_list; super->disk_mgmt_list = dd; } else { + /* this is called outside of mdmon + * write initial spare metadata + * mdmon will overwrite it. + */ dd->next = super->disks; super->disks = dd; - super->updates_pending++; + write_super_imsm_spare(super, dd); } return 0; @@ -5854,15 +5981,15 @@ static union { struct imsm_super anchor; } spare_record __attribute__ ((aligned(MAX_SECTOR_SIZE))); -/* spare records have their own family number and do not have any defined raid - * devices - */ -static int write_super_imsm_spares(struct intel_super *super, int doclose) + +static int write_super_imsm_spare(struct intel_super *super, struct dl *d) { struct imsm_super *mpb = super->anchor; struct imsm_super *spare = &spare_record.anchor; __u32 sum; - struct dl *d; + + if (d->index != -1) + return 1; spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)); spare->generation_num = __cpu_to_le32(1UL); @@ -5875,28 +6002,41 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose) snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH, MPB_SIGNATURE MPB_VERSION_RAID0); - for (d = super->disks; d; d = d->next) { - if (d->index != -1) - continue; + spare->disk[0] = d->disk; + if (__le32_to_cpu(d->disk.total_blocks_hi) > 0) + spare->attributes |= MPB_ATTRIB_2TB_DISK; - spare->disk[0] = d->disk; - if (__le32_to_cpu(d->disk.total_blocks_hi) > 0) - spare->attributes |= MPB_ATTRIB_2TB_DISK; + if (super->sector_size == 4096) + convert_to_4k_imsm_disk(&spare->disk[0]); + + sum = __gen_imsm_checksum(spare); + spare->family_num = __cpu_to_le32(sum); + spare->orig_family_num = 0; + sum = __gen_imsm_checksum(spare); + spare->check_sum = __cpu_to_le32(sum); - if (super->sector_size == 4096) - convert_to_4k_imsm_disk(&spare->disk[0]); + if (store_imsm_mpb(d->fd, spare)) { + pr_err("failed for device %d:%d %s\n", + d->major, d->minor, strerror(errno)); + return 1; + } + + return 0; +} +/* spare records have their own family number and do not have any defined raid + * devices + */ +static int write_super_imsm_spares(struct intel_super *super, int doclose) +{ + struct dl *d; - sum = __gen_imsm_checksum(spare); - spare->family_num = __cpu_to_le32(sum); - spare->orig_family_num = 0; - sum = __gen_imsm_checksum(spare); - spare->check_sum = __cpu_to_le32(sum); + for (d = super->disks; d; d = d->next) { + if (d->index != -1) + continue; - if (store_imsm_mpb(d->fd, spare)) { - pr_err("failed for device %d:%d %s\n", - d->major, d->minor, strerror(errno)); + if (write_super_imsm_spare(super, d)) return 1; - } + if (doclose) { close(d->fd); d->fd = -1; @@ -7047,7 +7187,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, mpb = super->anchor; if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, size, verbose)) { - pr_err("RAID gemetry validation failed. Cannot proceed with the action(s).\n"); + pr_err("RAID geometry validation failed. Cannot proceed with the action(s).\n"); return 0; } if (!dev) { @@ -7065,7 +7205,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, pos = 0; i = 0; - e = get_extents(super, dl); + e = get_extents(super, dl, 0); if (!e) continue; do { unsigned long long esize; @@ -7123,7 +7263,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, } /* retrieve the largest free space block */ - e = get_extents(super, dl); + e = get_extents(super, dl, 0); maxsize = 0; i = 0; if (e) { @@ -7159,11 +7299,8 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, maxsize = merge_extents(super, i); - if (!check_env("IMSM_NO_PLATFORM") && - mpb->num_raid_devs > 0 && size && size != maxsize) { - pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n"); - return 0; - } + if (mpb->num_raid_devs > 0 && size && size != maxsize) + pr_err("attempting to create a second volume with size less then remaining space.\n"); if (maxsize < size || maxsize == 0) { if (verbose) { @@ -7221,7 +7358,7 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks, if (super->orom && dl->index < 0 && mpb->num_raid_devs) continue; - e = get_extents(super, dl); + e = get_extents(super, dl, 0); if (!e) continue; for (i = 1; e[i-1].size; i++) @@ -7254,11 +7391,8 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks, } maxsize = size; } - if (!check_env("IMSM_NO_PLATFORM") && - mpb->num_raid_devs > 0 && size && size != maxsize) { - pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n"); - return 0; - } + if (mpb->num_raid_devs > 0 && size && size != maxsize) + pr_err("attempting to create a second volume with size less then remaining space.\n"); cnt = 0; for (dl = super->disks; dl; dl = dl->next) if (dl->e) @@ -7316,6 +7450,18 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, verbose); } + /* + * Size is given in sectors. + */ + if (size && (size < 2048)) { + pr_err("Given size must be greater than 1M.\n"); + /* Depends on algorithm in Create.c : + * if container was given (dev == NULL) return -1, + * if block device was given ( dev != NULL) return 0. + */ + return dev ? -1 : 0; + } + if (!dev) { if (st->sb) { struct intel_super *super = st->sb; @@ -7427,18 +7573,17 @@ static void default_geometry_imsm(struct supertype *st, int *level, int *layout, static void handle_missing(struct intel_super *super, struct imsm_dev *dev); -static int kill_subarray_imsm(struct supertype *st) +static int kill_subarray_imsm(struct supertype *st, char *subarray_id) { - /* remove the subarray currently referenced by ->current_vol */ + /* remove the subarray currently referenced by subarray_id */ __u8 i; struct intel_dev **dp; struct intel_super *super = st->sb; - __u8 current_vol = super->current_vol; + __u8 current_vol = strtoul(subarray_id, NULL, 10); struct imsm_super *mpb = super->anchor; - if (super->current_vol < 0) + if (mpb->num_raid_devs == 0) return 2; - super->current_vol = -1; /* invalidate subarray cursor */ /* block deletions that would change the uuid of active subarrays * @@ -7530,11 +7675,12 @@ static int update_subarray_imsm(struct supertype *st, char *subarray, append_metadata_update(st, u, sizeof(*u)); } else { struct imsm_dev *dev; - int i; + int i, namelen; dev = get_imsm_dev(super, vol); - strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN); - dev->volume[MAX_RAID_SERIAL_LEN-1] = '\0'; + memset(dev->volume, '\0', MAX_RAID_SERIAL_LEN); + namelen = min((int)strlen(name), MAX_RAID_SERIAL_LEN); + memcpy(dev->volume, name, namelen); for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); handle_missing(super, dev); @@ -7677,6 +7823,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra int sb_errors = 0; struct dl *d; int spare_disks = 0; + int current_vol = super->current_vol; /* do not assemble arrays when not all attributes are supported */ if (imsm_check_attributes(mpb->attributes) == 0) { @@ -7733,7 +7880,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra level, /* RAID level */ imsm_level_to_layout(level), map->num_members, /* raid disks */ - &chunk, join_u32(dev->size_low, dev->size_high), + &chunk, imsm_dev_size(dev), 1 /* verbose */)) { pr_err("IMSM RAID geometry validation failed. Array %s activation is blocked.\n", dev->volume); @@ -7771,7 +7918,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra skip = 1; if (!skip && (ord & IMSM_ORD_REBUILD)) recovery_start = 0; - + if (!(ord & IMSM_ORD_REBUILD)) + this->array.working_disks++; /* * if we skip some disks the array will be assmebled degraded; * reset resync start to avoid a dirty-degraded @@ -7813,23 +7961,17 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra else this->array.spare_disks++; } - if (info_d->recovery_start == MaxSector) - this->array.working_disks++; info_d->events = __le32_to_cpu(mpb->generation_num); info_d->data_offset = pba_of_lba0(map); + info_d->component_size = calc_component_size(map, dev); if (map->raid_level == 5) { - info_d->component_size = - num_data_stripes(map) * - map->blocks_per_strip; info_d->ppl_sector = this->ppl_sector; info_d->ppl_size = this->ppl_size; if (this->consistency_policy == CONSISTENCY_POLICY_PPL && recovery_start == 0) this->resync_start = 0; - } else { - info_d->component_size = blocks_per_member(map); } info_d->bb.supported = 1; @@ -7848,6 +7990,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra rest = this; } + super->current_vol = current_vol; return rest; } @@ -8044,7 +8187,7 @@ static int mark_failure(struct intel_super *super, strcat(buf, ":0"); if ((len = strlen(buf)) >= MAX_RAID_SERIAL_LEN) shift = len - MAX_RAID_SERIAL_LEN + 1; - strncpy((char *)disk->serial, &buf[shift], MAX_RAID_SERIAL_LEN); + memcpy(disk->serial, &buf[shift], len + 1 - shift); disk->status |= FAILED_DISK; set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD); @@ -8060,7 +8203,8 @@ static int mark_failure(struct intel_super *super, set_imsm_ord_tbl_ent(map2, slot2, idx | IMSM_ORD_REBUILD); } - if (map->failed_disk_num == 0xff) + if (map->failed_disk_num == 0xff || + (!is_rebuilding(dev) && map->failed_disk_num > slot)) map->failed_disk_num = slot; clear_disk_badblocks(super->bbm_log, ord_to_idx(ord)); @@ -8135,38 +8279,33 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev) static unsigned long long imsm_set_array_size(struct imsm_dev *dev, long long new_size) { - int used_disks = imsm_num_data_members(dev, MAP_0); unsigned long long array_blocks; - struct imsm_map *map; + struct imsm_map *map = get_imsm_map(dev, MAP_0); + int used_disks = imsm_num_data_members(map); if (used_disks == 0) { /* when problems occures * return current array_blocks value */ - array_blocks = __le32_to_cpu(dev->size_high); - array_blocks = array_blocks << 32; - array_blocks += __le32_to_cpu(dev->size_low); + array_blocks = imsm_dev_size(dev); return array_blocks; } /* set array size in metadata */ - if (new_size <= 0) { + if (new_size <= 0) /* OLCE size change is caused by added disks */ - map = get_imsm_map(dev, MAP_0); - array_blocks = blocks_per_member(map) * used_disks; - } else { + array_blocks = per_dev_array_size(map) * used_disks; + else /* Online Volume Size Change * Using available free space */ array_blocks = new_size; - } array_blocks = round_size_to_mb(array_blocks, used_disks); - dev->size_low = __cpu_to_le32((__u32)array_blocks); - dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32)); + set_imsm_dev_size(dev, array_blocks); return array_blocks; } @@ -8272,10 +8411,10 @@ static int imsm_set_array_state(struct active_array *a, int consistent) int used_disks; struct mdinfo *mdi; - used_disks = imsm_num_data_members(dev, MAP_0); + used_disks = imsm_num_data_members(map); if (used_disks > 0) { array_blocks = - blocks_per_member(map) * + per_dev_array_size(map) * used_disks; array_blocks = round_size_to_mb(array_blocks, @@ -8419,7 +8558,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state) disk = get_imsm_disk(super, ord_to_idx(ord)); /* check for new failures */ - if (state & DS_FAULTY) { + if (disk && (state & DS_FAULTY)) { if (mark_failure(super, dev, disk, ord_to_idx(ord))) super->updates_pending++; } @@ -8487,15 +8626,23 @@ static void imsm_set_disk(struct active_array *a, int n, int state) break; } if (is_rebuilding(dev)) { - dprintf_cont("while rebuilding."); - if (map->map_state != map_state) { - dprintf_cont(" Map state change"); - end_migration(dev, super, map_state); + dprintf_cont("while rebuilding "); + if (state & DS_FAULTY) { + dprintf_cont("removing failed drive "); + if (n == map->failed_disk_num) { + dprintf_cont("end migration"); + end_migration(dev, super, map_state); + a->last_checkpoint = 0; + } else { + dprintf_cont("fail detected during rebuild, changing map state"); + map->map_state = map_state; + } super->updates_pending++; - } else if (!rebuild_done) { - break; } + if (!rebuild_done) + break; + /* check if recovery is really finished */ for (mdi = a->info.devs; mdi ; mdi = mdi->next) if (mdi->recovery_start != MaxSector) { @@ -8504,7 +8651,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state) } if (recovery_not_finished) { dprintf_cont("\n"); - dprintf("Rebuild has not finished yet, state not changed"); + dprintf_cont("Rebuild has not finished yet"); if (a->last_checkpoint < mdi->recovery_start) { a->last_checkpoint = mdi->recovery_start; @@ -8514,9 +8661,9 @@ static void imsm_set_disk(struct active_array *a, int n, int state) } dprintf_cont(" Rebuild done, still degraded"); - dev->vol.migr_state = 0; - set_migr_type(dev, 0); - dev->vol.curr_migr_unit = 0; + end_migration(dev, super, map_state); + a->last_checkpoint = 0; + super->updates_pending++; for (i = 0; i < map->num_members; i++) { int idx = get_imsm_ord_tbl_ent(dev, i, MAP_0); @@ -8697,7 +8844,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, /* Does this unused device have the requisite free space? * It needs to be able to cover all member volumes */ - ex = get_extents(super, dl); + ex = get_extents(super, dl, 1); if (!ex) { dprintf("cannot get extents\n"); continue; @@ -8717,11 +8864,11 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, pos = 0; array_start = pba_of_lba0(map); array_end = array_start + - blocks_per_member(map) - 1; + per_dev_array_size(map) - 1; do { /* check that we can start at pba_of_lba0 with - * blocks_per_member of space + * num_data_stripes*blocks_per_stripe of space */ if (array_start >= pos && array_end < ex[j].start) { found = 1; @@ -9051,6 +9198,9 @@ static int add_remove_disk_update(struct intel_super *super) remove_disk_super(super, disk_cfg->major, disk_cfg->minor); + } else { + disk_cfg->fd = disk->fd; + disk->fd = -1; } } /* release allocate disk structure */ @@ -9130,8 +9280,10 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration * */ if (u->new_chunksize > 0) { unsigned long long num_data_stripes; + struct imsm_map *dest_map = + get_imsm_map(dev, MAP_0); int used_disks = - imsm_num_data_members(dev, MAP_0); + imsm_num_data_members(dest_map); if (used_disks == 0) return ret_val; @@ -9139,13 +9291,18 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration * map->blocks_per_strip = __cpu_to_le16(u->new_chunksize * 2); num_data_stripes = - (join_u32(dev->size_low, dev->size_high) - / used_disks); + imsm_dev_size(dev) / used_disks; num_data_stripes /= map->blocks_per_strip; num_data_stripes /= map->num_domains; set_num_data_stripes(map, num_data_stripes); } + /* ensure blocks_per_member has valid value + */ + set_blocks_per_member(map, + per_dev_array_size(map) + + NUM_BLOCKS_DIRTY_STRIPE_REGION); + /* add disk */ if (u->new_level != 5 || migr_map->raid_level != 0 || @@ -9209,18 +9366,24 @@ static int apply_size_change_update(struct imsm_update_size_change *u, if (id->index == (unsigned)u->subdev) { struct imsm_dev *dev = get_imsm_dev(super, u->subdev); struct imsm_map *map = get_imsm_map(dev, MAP_0); - int used_disks = imsm_num_data_members(dev, MAP_0); + int used_disks = imsm_num_data_members(map); unsigned long long blocks_per_member; unsigned long long num_data_stripes; + unsigned long long new_size_per_disk; + + if (used_disks == 0) + return 0; /* calculate new size */ - blocks_per_member = u->new_size / used_disks; - num_data_stripes = blocks_per_member / + new_size_per_disk = u->new_size / used_disks; + blocks_per_member = new_size_per_disk + + NUM_BLOCKS_DIRTY_STRIPE_REGION; + num_data_stripes = new_size_per_disk / map->blocks_per_strip; num_data_stripes /= map->num_domains; dprintf("(size: %llu, blocks per member: %llu, num_data_stipes: %llu)\n", - u->new_size, blocks_per_member, + u->new_size, new_size_per_disk, num_data_stripes); set_blocks_per_member(map, blocks_per_member); set_num_data_stripes(map, num_data_stripes); @@ -9476,12 +9639,6 @@ static int apply_takeover_update(struct imsm_update_takeover *u, if (u->direction == R10_TO_R0) { unsigned long long num_data_stripes; - map->num_domains = 1; - num_data_stripes = blocks_per_member(map); - num_data_stripes /= map->blocks_per_strip; - num_data_stripes /= map->num_domains; - set_num_data_stripes(map, num_data_stripes); - /* Number of failed disks must be half of initial disk number */ if (imsm_count_failed(super, dev, MAP_0) != (map->num_members / 2)) @@ -9507,10 +9664,15 @@ static int apply_takeover_update(struct imsm_update_takeover *u, map->num_domains = 1; map->raid_level = 0; map->failed_disk_num = -1; + num_data_stripes = imsm_dev_size(dev) / 2; + num_data_stripes /= map->blocks_per_strip; + set_num_data_stripes(map, num_data_stripes); } if (u->direction == R0_TO_R10) { void **space; + unsigned long long num_data_stripes; + /* update slots in current disk list */ for (dm = super->disks; dm; dm = dm->next) { if (dm->index >= 0) @@ -9548,6 +9710,11 @@ static int apply_takeover_update(struct imsm_update_takeover *u, map->map_state = IMSM_T_STATE_DEGRADED; map->num_domains = 2; map->raid_level = 1; + num_data_stripes = imsm_dev_size(dev) / 2; + num_data_stripes /= map->blocks_per_strip; + num_data_stripes /= map->num_domains; + set_num_data_stripes(map, num_data_stripes); + /* replace dev<->dev_new */ dv->dev = dev_new; } @@ -9693,7 +9860,7 @@ static void imsm_process_update(struct supertype *st, new_map = get_imsm_map(&u->dev, MAP_0); new_start = pba_of_lba0(new_map); - new_end = new_start + blocks_per_member(new_map); + new_end = new_start + per_dev_array_size(new_map); inf = get_disk_info(u); /* handle activate_spare versus create race: @@ -9704,7 +9871,7 @@ static void imsm_process_update(struct supertype *st, dev = get_imsm_dev(super, i); map = get_imsm_map(dev, MAP_0); start = pba_of_lba0(map); - end = start + blocks_per_member(map); + end = start + per_dev_array_size(map); if ((new_start >= start && new_start <= end) || (start >= new_start && start <= new_end)) /* overlap */; @@ -9820,6 +9987,7 @@ static void imsm_process_update(struct supertype *st, /* sanity check that we are not affecting the uuid of * an active array */ + memset(name, 0, sizeof(name)); snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name); name[MAX_RAID_SERIAL_LEN] = '\0'; for (a = st->arrays; a; a = a->next) @@ -9831,7 +9999,7 @@ static void imsm_process_update(struct supertype *st, break; } - snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name); + memcpy(dev->volume, name, MAX_RAID_SERIAL_LEN); super->updates_pending++; break; } @@ -9867,7 +10035,7 @@ static void imsm_process_update(struct supertype *st, break; } default: - pr_err("error: unsuported process update type:(type: %d)\n", type); + pr_err("error: unsupported process update type:(type: %d)\n", type); } } @@ -10493,7 +10661,7 @@ static struct md_bb *imsm_get_badblocks(struct active_array *a, int slot) return NULL; get_volume_badblocks(super->bbm_log, ord_to_idx(ord), pba_of_lba0(map), - blocks_per_member(map), &super->bb); + per_dev_array_size(map), &super->bb); return &super->bb; } @@ -10588,7 +10756,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, max(map_dest->blocks_per_strip, map_src->blocks_per_strip); migr_rec->dest_depth_per_unit *= max(map_dest->blocks_per_strip, map_src->blocks_per_strip); - new_data_disks = imsm_num_data_members(dev, MAP_0); + new_data_disks = imsm_num_data_members(map_dest); migr_rec->blocks_per_unit = __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks); migr_rec->dest_depth_per_unit = @@ -10599,7 +10767,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit)) num_migr_units++; - migr_rec->num_migr_units = __cpu_to_le32(num_migr_units); + set_num_migr_units(migr_rec, num_migr_units); migr_rec->post_migr_vol_cap = dev->size_low; migr_rec->post_migr_vol_cap_hi = dev->size_high; @@ -10616,7 +10784,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, min_dev_sectors = dev_sectors; close(fd); } - migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors - + set_migr_chkp_area_pba(migr_rec, min_dev_sectors - RAID_DISK_RESERVED_BLOCKS_IMSM_HI); write_imsm_migr_rec(st); @@ -10656,7 +10824,7 @@ int save_backup_imsm(struct supertype *st, int dest_layout = 0; int dest_chunk; unsigned long long start; - int data_disks = imsm_num_data_members(dev, MAP_0); + int data_disks = imsm_num_data_members(map_dest); targets = xmalloc(new_disks * sizeof(int)); @@ -10667,8 +10835,7 @@ int save_backup_imsm(struct supertype *st, start = info->reshape_progress * 512; for (i = 0; i < new_disks; i++) { - target_offsets[i] = (unsigned long long) - __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512; + target_offsets[i] = migr_chkp_area_pba(super->migr_rec) * 512; /* move back copy area adderss, it will be moved forward * in restore_stripes() using start input variable */ @@ -10747,12 +10914,11 @@ int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state) if (info->reshape_progress % blocks_per_unit) curr_migr_unit++; - super->migr_rec->curr_migr_unit = - __cpu_to_le32(curr_migr_unit); + set_current_migr_unit(super->migr_rec, curr_migr_unit); super->migr_rec->rec_status = __cpu_to_le32(state); - super->migr_rec->dest_1st_member_lba = - __cpu_to_le32(curr_migr_unit * - __le32_to_cpu(super->migr_rec->dest_depth_per_unit)); + set_migr_dest_1st_member_lba(super->migr_rec, + super->migr_rec->dest_depth_per_unit * curr_migr_unit); + if (write_imsm_migr_rec(st) < 0) { dprintf("imsm: Cannot write migration record outside backup area\n"); return 1; @@ -10786,8 +10952,8 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) char *buf = NULL; int retval = 1; unsigned int sector_size = super->sector_size; - unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit); - unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units); + unsigned long curr_migr_unit = current_migr_unit(migr_rec); + unsigned long num_migr_units = get_num_migr_units(migr_rec); char buffer[20]; int skipped_disks = 0; @@ -10814,11 +10980,9 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) map_dest = get_imsm_map(id->dev, MAP_0); new_disks = map_dest->num_members; - read_offset = (unsigned long long) - __le32_to_cpu(migr_rec->ckpt_area_pba) * 512; + read_offset = migr_chkp_area_pba(migr_rec) * 512; - write_offset = ((unsigned long long) - __le32_to_cpu(migr_rec->dest_1st_member_lba) + + write_offset = (migr_dest_1st_member_lba(migr_rec) + pba_of_lba0(map_dest)) * 512; unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512; @@ -11278,6 +11442,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, int imsm_layout = -1; int data_disks; struct imsm_dev *dev; + struct imsm_map *map; struct intel_super *super; unsigned long long current_size; unsigned long long free_size; @@ -11368,7 +11533,8 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, super = st->sb; dev = get_imsm_dev(super, super->current_vol); - data_disks = imsm_num_data_members(dev , MAP_0); + map = get_imsm_map(dev, MAP_0); + data_disks = imsm_num_data_members(map); /* compute current size per disk member */ current_size = info.custom_array_size / data_disks; @@ -11376,7 +11542,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, if (geo->size > 0 && geo->size != MAX_SIZE) { /* align component size */ - geo->size = imsm_component_size_aligment_check( + geo->size = imsm_component_size_alignment_check( get_imsm_raid_level(dev->vol.map), chunk * 1024, super->sector_size, geo->size * 2); @@ -11410,7 +11576,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, max_size = free_size + current_size; /* align component size */ - max_size = imsm_component_size_aligment_check( + max_size = imsm_component_size_alignment_check( get_imsm_raid_level(dev->vol.map), chunk * 1024, super->sector_size, max_size); @@ -11511,6 +11677,68 @@ int imsm_takeover(struct supertype *st, struct geo_params *geo) return 0; } +/* Flush size update if size calculated by num_data_stripes is higher than + * imsm_dev_size to eliminate differences during reshape. + * Mdmon will recalculate them correctly. + * If subarray index is not set then check whole container. + * Returns: + * 0 - no error occurred + * 1 - error detected + */ +static int imsm_fix_size_mismatch(struct supertype *st, int subarray_index) +{ + struct intel_super *super = st->sb; + int tmp = super->current_vol; + int ret_val = 1; + int i; + + for (i = 0; i < super->anchor->num_raid_devs; i++) { + if (subarray_index >= 0 && i != subarray_index) + continue; + super->current_vol = i; + struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + unsigned int disc_count = imsm_num_data_members(map); + struct geo_params geo; + struct imsm_update_size_change *update; + unsigned long long calc_size = per_dev_array_size(map) * disc_count; + unsigned long long d_size = imsm_dev_size(dev); + int u_size; + + if (calc_size == d_size || dev->vol.migr_type == MIGR_GEN_MIGR) + continue; + + /* There is a difference, verify that imsm_dev_size is + * rounded correctly and push update. + */ + if (d_size != round_size_to_mb(d_size, disc_count)) { + dprintf("imsm: Size of volume %d is not rounded correctly\n", + i); + goto exit; + } + memset(&geo, 0, sizeof(struct geo_params)); + geo.size = d_size; + u_size = imsm_create_metadata_update_for_size_change(st, &geo, + &update); + if (u_size < 1) { + dprintf("imsm: Cannot prepare size change update\n"); + goto exit; + } + imsm_update_metadata_locally(st, update, u_size); + if (st->update_tail) { + append_metadata_update(st, update, u_size); + flush_metadata_updates(st); + st->update_tail = &st->updates; + } else { + imsm_sync_metadata(st); + } + } + ret_val = 0; +exit: + super->current_vol = tmp; + return ret_val; +} + static int imsm_reshape_super(struct supertype *st, unsigned long long size, int level, int layout, int chunksize, int raid_disks, @@ -11537,9 +11765,6 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size, dprintf("for level : %i\n", geo.level); dprintf("for raid_disks : %i\n", geo.raid_disks); - if (experimental() == 0) - return ret_val; - if (strcmp(st->container_devnm, st->devnm) == 0) { /* On container level we can only increase number of devices. */ dprintf("imsm: info: Container operation\n"); @@ -11550,6 +11775,11 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size, struct imsm_update_reshape *u = NULL; int len; + if (imsm_fix_size_mismatch(st, -1)) { + dprintf("imsm: Cannot fix size mismatch\n"); + goto exit_imsm_reshape_super; + } + len = imsm_create_metadata_update_for_reshape( st, &geo, old_raid_disks, &u); @@ -11837,7 +12067,7 @@ static int imsm_manage_reshape( struct intel_dev *dv; unsigned int sector_size = super->sector_size; struct imsm_dev *dev = NULL; - struct imsm_map *map_src; + struct imsm_map *map_src, *map_dest; int migr_vol_qan = 0; int ndata, odata; /* [bytes] */ int chunk; /* [bytes] */ @@ -11852,6 +12082,7 @@ static int imsm_manage_reshape( unsigned long long start_buf_shift; /* [bytes] */ int degraded = 0; int source_layout = 0; + int subarray_index = -1; if (!sra) return ret_val; @@ -11865,6 +12096,7 @@ static int imsm_manage_reshape( dv->dev->vol.migr_state == 1) { dev = dv->dev; migr_vol_qan++; + subarray_index = dv->index; } } /* Only one volume can migrate at the same time */ @@ -11875,12 +12107,13 @@ static int imsm_manage_reshape( goto abort; } + map_dest = get_imsm_map(dev, MAP_0); map_src = get_imsm_map(dev, MAP_1); if (map_src == NULL) goto abort; - ndata = imsm_num_data_members(dev, MAP_0); - odata = imsm_num_data_members(dev, MAP_1); + ndata = imsm_num_data_members(map_dest); + odata = imsm_num_data_members(map_src); chunk = __le16_to_cpu(map_src->blocks_per_strip) * 512; old_data_stripe_length = odata * chunk; @@ -11911,7 +12144,7 @@ static int imsm_manage_reshape( buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512; /* extend buffer size for parity disk */ buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512; - /* add space for stripe aligment */ + /* add space for stripe alignment */ buf_size += old_data_stripe_length; if (posix_memalign((void **)&buf, MAX_SECTOR_SIZE, buf_size)) { dprintf("imsm: Cannot allocate checkpoint buffer\n"); @@ -11921,12 +12154,12 @@ static int imsm_manage_reshape( max_position = sra->component_size * ndata; source_layout = imsm_level_to_layout(map_src->raid_level); - while (__le32_to_cpu(migr_rec->curr_migr_unit) < - __le32_to_cpu(migr_rec->num_migr_units)) { + while (current_migr_unit(migr_rec) < + get_num_migr_units(migr_rec)) { /* current reshape position [blocks] */ unsigned long long current_position = __le32_to_cpu(migr_rec->blocks_per_unit) - * __le32_to_cpu(migr_rec->curr_migr_unit); + * current_migr_unit(migr_rec); unsigned long long border; /* Check that array hasn't become failed. @@ -12048,6 +12281,14 @@ static int imsm_manage_reshape( /* return '1' if done */ ret_val = 1; + + /* After the reshape eliminate size mismatch in metadata. + * Don't update md/component_size here, volume hasn't + * to take whole space. It is allowed by kernel. + * md/component_size will be set propoperly after next assembly. + */ + imsm_fix_size_mismatch(st, subarray_index); + abort: free(buf); /* See Grow.c: abort_reshape() for further explanation */ @@ -12079,7 +12320,6 @@ struct superswitch super_imsm = { .reshape_super = imsm_reshape_super, .manage_reshape = imsm_manage_reshape, .recover_backup = recover_backup_imsm, - .copy_metadata = copy_metadata_imsm, .examine_badblocks = examine_badblocks_imsm, .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm,