X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=super-intel.c;h=c9a1af5ba3019cf18b93df2f52c9af25397b61ea;hb=45c43276d02a32876c7e1f9f0d04580595141b3d;hp=1f79eab82886046e45b1fddfea2ccbaf6cd86a05;hpb=de44e46fd4703ea286987d1d0cf775efa62700fd;p=thirdparty%2Fmdadm.git diff --git a/super-intel.c b/super-intel.c index 1f79eab8..c9a1af5b 100644 --- a/super-intel.c +++ b/super-intel.c @@ -81,16 +81,33 @@ MPB_ATTRIB_RAID1 | \ MPB_ATTRIB_RAID10 | \ MPB_ATTRIB_RAID5 | \ - MPB_ATTRIB_EXP_STRIPE_SIZE) + MPB_ATTRIB_EXP_STRIPE_SIZE | \ + MPB_ATTRIB_BBM) /* Define attributes that are unused but not harmful */ #define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE) #define MPB_SECTOR_CNT 2210 -#define IMSM_RESERVED_SECTORS 4096 -#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056 +#define IMSM_RESERVED_SECTORS 8192 +#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2048 #define SECT_PER_MB_SHIFT 11 #define MAX_SECTOR_SIZE 4096 +#define MULTIPLE_PPL_AREA_SIZE_IMSM (1024 * 1024) /* Size of the whole + * mutliple PPL area + */ + +/* + * This macro let's us ensure that no-one accidentally + * changes the size of a struct + */ +#define ASSERT_SIZE(_struct, size) \ +static inline void __assert_size_##_struct(void) \ +{ \ + switch (0) { \ + case 0: break; \ + case (sizeof(struct _struct) == size): break; \ + } \ +} /* Disk configuration info. */ #define IMSM_MAX_DEVICES 255 @@ -101,12 +118,14 @@ struct imsm_disk { #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */ #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */ #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */ +#define JOURNAL_DISK __cpu_to_le32(0x2000000) /* Device marked as Journaling Drive */ __u32 status; /* 0xF0 - 0xF3 */ __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */ __u32 total_blocks_hi; /* 0xF4 - 0xF5 total blocks hi */ #define IMSM_DISK_FILLERS 3 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */ }; +ASSERT_SIZE(imsm_disk, 48) /* map selector for map managment */ @@ -141,7 +160,8 @@ struct imsm_map { __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members], * top byte contains some flags */ -} __attribute__ ((packed)); +}; +ASSERT_SIZE(imsm_map, 52) struct imsm_vol { __u32 curr_migr_unit; @@ -154,6 +174,9 @@ struct imsm_vol { #define MIGR_STATE_CHANGE 4 #define MIGR_REPAIR 5 __u8 migr_type; /* Initializing, Rebuilding, ... */ +#define RAIDVOL_CLEAN 0 +#define RAIDVOL_DIRTY 1 +#define RAIDVOL_DSRECORD_VALID 2 __u8 dirty; __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */ __u16 verify_errors; /* number of mismatches */ @@ -161,7 +184,8 @@ struct imsm_vol { __u32 filler[4]; struct imsm_map map[1]; /* here comes another one if migr_state */ -} __attribute__ ((packed)); +}; +ASSERT_SIZE(imsm_vol, 84) struct imsm_dev { __u8 volume[MAX_RAID_SERIAL_LEN]; @@ -189,10 +213,31 @@ struct imsm_dev { __u16 cache_policy; __u8 cng_state; __u8 cng_sub_state; -#define IMSM_DEV_FILLERS 10 + __u16 my_vol_raid_dev_num; /* Used in Unique volume Id for this RaidDev */ + + /* NVM_EN */ + __u8 nv_cache_mode; + __u8 nv_cache_flags; + + /* Unique Volume Id of the NvCache Volume associated with this volume */ + __u32 nvc_vol_orig_family_num; + __u16 nvc_vol_raid_dev_num; + +#define RWH_OFF 0 +#define RWH_DISTRIBUTED 1 +#define RWH_JOURNALING_DRIVE 2 +#define RWH_MULTIPLE_DISTRIBUTED 3 +#define RWH_MULTIPLE_PPLS_JOURNALING_DRIVE 4 +#define RWH_MULTIPLE_OFF 5 + __u8 rwh_policy; /* Raid Write Hole Policy */ + __u8 jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */ + __u8 filler1; + +#define IMSM_DEV_FILLERS 3 __u32 filler[IMSM_DEV_FILLERS]; struct imsm_vol vol; -} __attribute__ ((packed)); +}; +ASSERT_SIZE(imsm_dev, 164) struct imsm_super { __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */ @@ -210,35 +255,44 @@ struct imsm_super { __u32 orig_family_num; /* 0x40 - 0x43 original family num */ __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */ __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */ -#define IMSM_FILLERS 35 - __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */ + __u16 num_raid_devs_created; /* 0x4C - 0x4D Used for generating unique + * volume IDs for raid_dev created in this array + * (starts at 1) + */ + __u16 filler1; /* 0x4E - 0x4F */ +#define IMSM_FILLERS 34 + __u32 filler[IMSM_FILLERS]; /* 0x50 - 0xD7 RAID_MPB_FILLERS */ struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */ /* here comes imsm_dev[num_raid_devs] */ /* here comes BBM logs */ -} __attribute__ ((packed)); +}; +ASSERT_SIZE(imsm_super, 264) #define BBM_LOG_MAX_ENTRIES 254 +#define BBM_LOG_MAX_LBA_ENTRY_VAL 256 /* Represents 256 LBAs */ +#define BBM_LOG_SIGNATURE 0xabadb10c + +struct bbm_log_block_addr { + __u16 w1; + __u32 dw1; +} __attribute__ ((__packed__)); struct bbm_log_entry { - __u64 defective_block_start; -#define UNREADABLE 0xFFFFFFFF - __u32 spare_block_offset; - __u16 remapped_marked_count; - __u16 disk_ordinal; + __u8 marked_count; /* Number of blocks marked - 1 */ + __u8 disk_ordinal; /* Disk entry within the imsm_super */ + struct bbm_log_block_addr defective_block_start; } __attribute__ ((__packed__)); struct bbm_log { __u32 signature; /* 0xABADB10C */ __u32 entry_count; - __u32 reserved_spare_block_count; /* 0 */ - __u32 reserved; /* 0xFFFF */ - __u64 first_spare_lba; - struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES]; -} __attribute__ ((__packed__)); + struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES]; +}; +ASSERT_SIZE(bbm_log, 2040) -#ifndef MDASSEMBLE static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" }; -#endif + +#define BLOCKS_PER_KB (1024/512) #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209 @@ -254,11 +308,14 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" #define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has * already been migrated and must * be recovered from checkpoint area */ + +#define PPL_ENTRY_SPACE (128 * 1024) /* Size of single PPL, without the header */ + struct migr_record { __u32 rec_status; /* Status used to determine how to restart * migration in case it aborts * in some fashion */ - __u32 curr_migr_unit; /* 0..numMigrUnits-1 */ + __u32 curr_migr_unit_lo; /* 0..numMigrUnits-1 */ __u32 family_num; /* Family number of MPB * containing the RaidDev * that is migrating */ @@ -268,17 +325,25 @@ struct migr_record { __u32 dest_depth_per_unit; /* Num member blocks each destMap * member disk * advances per unit-of-operation */ - __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */ - __u32 dest_1st_member_lba; /* First member lba on first - * stripe of destination */ - __u32 num_migr_units; /* Total num migration units-of-op */ + __u32 ckpt_area_pba_lo; /* Pba of first block of ckpt copy area */ + __u32 dest_1st_member_lba_lo; /* First member lba on first + * stripe of destination */ + __u32 num_migr_units_lo; /* Total num migration units-of-op */ __u32 post_migr_vol_cap; /* Size of volume after * migration completes */ __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */ __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the * migration ckpt record was read from * (for recovered migrations) */ -} __attribute__ ((__packed__)); + __u32 curr_migr_unit_hi; /* 0..numMigrUnits-1 high order 32 bits */ + __u32 ckpt_area_pba_hi; /* Pba of first block of ckpt copy area + * high order 32 bits */ + __u32 dest_1st_member_lba_hi; /* First member lba on first stripe of + * destination - high order 32 bits */ + __u32 num_migr_units_hi; /* Total num migration units-of-op + * high order 32 bits */ +}; +ASSERT_SIZE(migr_record, 64) struct md_list { /* usage marker: @@ -361,6 +426,7 @@ struct intel_super { array, it indicates that mdmon is allowed to clean migration record */ size_t len; /* size of the 'buf' allocation */ + size_t extra_space; /* extra space in 'buf' that is not used yet */ void *next_buf; /* for realloc'ing buf from the manager */ size_t next_len; int updates_pending; /* count of pending updates for mdmon */ @@ -389,6 +455,7 @@ struct intel_super { struct intel_hba *hba; /* device path of the raid controller for this metadata */ const struct imsm_orom *orom; /* platform firmware support */ struct intel_super *next; /* (temp) list for disambiguating family_num */ + struct md_bb bb; /* memory for get_bad_blocks call */ }; struct intel_disk { @@ -421,6 +488,8 @@ enum imsm_update_type { update_takeover, update_general_migration_checkpoint, update_size_change, + update_prealloc_badblocks_mem, + update_rwh_policy, }; struct imsm_update_activate_spare { @@ -509,6 +578,16 @@ struct imsm_update_add_remove_disk { enum imsm_update_type type; }; +struct imsm_update_prealloc_bb_mem { + enum imsm_update_type type; +}; + +struct imsm_update_rwh_policy { + enum imsm_update_type type; + int new_policy; + int dev_idx; +}; + static const char *_sys_dev_type[] = { [SYS_DEV_UNKNOWN] = "Unknown", [SYS_DEV_SAS] = "SAS", @@ -629,12 +708,10 @@ static struct supertype *match_metadata_desc_imsm(char *arg) return st; } -#ifndef MDASSEMBLE static __u8 *get_imsm_version(struct imsm_super *mpb) { return &mpb->sig[MPB_SIG_LEN]; } -#endif /* retrieve a disk directly from the anchor when the anchor is known to be * up-to-date, currently only at load time @@ -741,7 +818,6 @@ static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state) return size; } -#ifndef MDASSEMBLE /* retrieve disk serial number list from a metadata update */ static struct disk_info *get_disk_info(struct imsm_update_create_array *update) { @@ -753,7 +829,6 @@ static struct disk_info *get_disk_info(struct imsm_update_create_array *update) return inf; } -#endif static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index) { @@ -788,6 +863,242 @@ static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index) return NULL; } +static inline unsigned long long __le48_to_cpu(const struct bbm_log_block_addr + *addr) +{ + return ((((__u64)__le32_to_cpu(addr->dw1)) << 16) | + __le16_to_cpu(addr->w1)); +} + +static inline struct bbm_log_block_addr __cpu_to_le48(unsigned long long sec) +{ + struct bbm_log_block_addr addr; + + addr.w1 = __cpu_to_le16((__u16)(sec & 0xffff)); + addr.dw1 = __cpu_to_le32((__u32)(sec >> 16) & 0xffffffff); + return addr; +} + +/* get size of the bbm log */ +static __u32 get_imsm_bbm_log_size(struct bbm_log *log) +{ + if (!log || log->entry_count == 0) + return 0; + + return sizeof(log->signature) + + sizeof(log->entry_count) + + log->entry_count * sizeof(struct bbm_log_entry); +} + +/* check if bad block is not partially stored in bbm log */ +static int is_stored_in_bbm(struct bbm_log *log, const __u8 idx, const unsigned + long long sector, const int length, __u32 *pos) +{ + __u32 i; + + for (i = *pos; i < log->entry_count; i++) { + struct bbm_log_entry *entry = &log->marked_block_entries[i]; + unsigned long long bb_start; + unsigned long long bb_end; + + bb_start = __le48_to_cpu(&entry->defective_block_start); + bb_end = bb_start + (entry->marked_count + 1); + + if ((entry->disk_ordinal == idx) && (bb_start >= sector) && + (bb_end <= sector + length)) { + *pos = i; + return 1; + } + } + return 0; +} + +/* record new bad block in bbm log */ +static int record_new_badblock(struct bbm_log *log, const __u8 idx, unsigned + long long sector, int length) +{ + int new_bb = 0; + __u32 pos = 0; + struct bbm_log_entry *entry = NULL; + + while (is_stored_in_bbm(log, idx, sector, length, &pos)) { + struct bbm_log_entry *e = &log->marked_block_entries[pos]; + + if ((e->marked_count + 1 == BBM_LOG_MAX_LBA_ENTRY_VAL) && + (__le48_to_cpu(&e->defective_block_start) == sector)) { + sector += BBM_LOG_MAX_LBA_ENTRY_VAL; + length -= BBM_LOG_MAX_LBA_ENTRY_VAL; + pos = pos + 1; + continue; + } + entry = e; + break; + } + + if (entry) { + int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length : + BBM_LOG_MAX_LBA_ENTRY_VAL; + entry->defective_block_start = __cpu_to_le48(sector); + entry->marked_count = cnt - 1; + if (cnt == length) + return 1; + sector += cnt; + length -= cnt; + } + + new_bb = ROUND_UP(length, BBM_LOG_MAX_LBA_ENTRY_VAL) / + BBM_LOG_MAX_LBA_ENTRY_VAL; + if (log->entry_count + new_bb > BBM_LOG_MAX_ENTRIES) + return 0; + + while (length > 0) { + int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length : + BBM_LOG_MAX_LBA_ENTRY_VAL; + struct bbm_log_entry *entry = + &log->marked_block_entries[log->entry_count]; + + entry->defective_block_start = __cpu_to_le48(sector); + entry->marked_count = cnt - 1; + entry->disk_ordinal = idx; + + sector += cnt; + length -= cnt; + + log->entry_count++; + } + + return new_bb; +} + +/* clear all bad blocks for given disk */ +static void clear_disk_badblocks(struct bbm_log *log, const __u8 idx) +{ + __u32 i = 0; + + while (i < log->entry_count) { + struct bbm_log_entry *entries = log->marked_block_entries; + + if (entries[i].disk_ordinal == idx) { + if (i < log->entry_count - 1) + entries[i] = entries[log->entry_count - 1]; + log->entry_count--; + } else { + i++; + } + } +} + +/* clear given bad block */ +static int clear_badblock(struct bbm_log *log, const __u8 idx, const unsigned + long long sector, const int length) { + __u32 i = 0; + + while (i < log->entry_count) { + struct bbm_log_entry *entries = log->marked_block_entries; + + if ((entries[i].disk_ordinal == idx) && + (__le48_to_cpu(&entries[i].defective_block_start) == + sector) && (entries[i].marked_count + 1 == length)) { + if (i < log->entry_count - 1) + entries[i] = entries[log->entry_count - 1]; + log->entry_count--; + break; + } + i++; + } + + return 1; +} + +/* allocate and load BBM log from metadata */ +static int load_bbm_log(struct intel_super *super) +{ + struct imsm_super *mpb = super->anchor; + __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size); + + super->bbm_log = xcalloc(1, sizeof(struct bbm_log)); + if (!super->bbm_log) + return 1; + + if (bbm_log_size) { + struct bbm_log *log = (void *)mpb + + __le32_to_cpu(mpb->mpb_size) - bbm_log_size; + + __u32 entry_count; + + if (bbm_log_size < sizeof(log->signature) + + sizeof(log->entry_count)) + return 2; + + entry_count = __le32_to_cpu(log->entry_count); + if ((__le32_to_cpu(log->signature) != BBM_LOG_SIGNATURE) || + (entry_count > BBM_LOG_MAX_ENTRIES)) + return 3; + + if (bbm_log_size != + sizeof(log->signature) + sizeof(log->entry_count) + + entry_count * sizeof(struct bbm_log_entry)) + return 4; + + memcpy(super->bbm_log, log, bbm_log_size); + } else { + super->bbm_log->signature = __cpu_to_le32(BBM_LOG_SIGNATURE); + super->bbm_log->entry_count = 0; + } + + return 0; +} + +/* checks if bad block is within volume boundaries */ +static int is_bad_block_in_volume(const struct bbm_log_entry *entry, + const unsigned long long start_sector, + const unsigned long long size) +{ + unsigned long long bb_start; + unsigned long long bb_end; + + bb_start = __le48_to_cpu(&entry->defective_block_start); + bb_end = bb_start + (entry->marked_count + 1); + + if (((bb_start >= start_sector) && (bb_start < start_sector + size)) || + ((bb_end >= start_sector) && (bb_end <= start_sector + size))) + return 1; + + return 0; +} + +/* get list of bad blocks on a drive for a volume */ +static void get_volume_badblocks(const struct bbm_log *log, const __u8 idx, + const unsigned long long start_sector, + const unsigned long long size, + struct md_bb *bbs) +{ + __u32 count = 0; + __u32 i; + + for (i = 0; i < log->entry_count; i++) { + const struct bbm_log_entry *ent = + &log->marked_block_entries[i]; + struct md_bb_entry *bb; + + if ((ent->disk_ordinal == idx) && + is_bad_block_in_volume(ent, start_sector, size)) { + + if (!bbs->entries) { + bbs->entries = xmalloc(BBM_LOG_MAX_ENTRIES * + sizeof(*bb)); + if (!bbs->entries) + break; + } + + bb = &bbs->entries[count++]; + bb->sector = __le48_to_cpu(&ent->defective_block_start); + bb->length = ent->marked_count + 1; + } + } + bbs->count = count; +} + /* * for second_map: * == MAP_0 get first map @@ -874,12 +1185,12 @@ static int count_memberships(struct dl *dl, struct intel_super *super) static __u32 imsm_min_reserved_sectors(struct intel_super *super); -static int split_ull(unsigned long long n, __u32 *lo, __u32 *hi) +static int split_ull(unsigned long long n, void *lo, void *hi) { if (lo == 0 || hi == 0) return 1; - *lo = __le32_to_cpu((unsigned)n); - *hi = __le32_to_cpu((unsigned)(n >> 32)); + __put_unaligned32(__cpu_to_le32((__u32)n), lo); + __put_unaligned32(__cpu_to_le32((n >> 32)), hi); return 0; } @@ -910,14 +1221,51 @@ static unsigned long long blocks_per_member(struct imsm_map *map) return join_u32(map->blocks_per_member_lo, map->blocks_per_member_hi); } -#ifndef MDASSEMBLE static unsigned long long num_data_stripes(struct imsm_map *map) { if (map == NULL) return 0; return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi); } -#endif + +static unsigned long long imsm_dev_size(struct imsm_dev *dev) +{ + if (dev == NULL) + return 0; + return join_u32(dev->size_low, dev->size_high); +} + +static unsigned long long migr_chkp_area_pba(struct migr_record *migr_rec) +{ + if (migr_rec == NULL) + return 0; + return join_u32(migr_rec->ckpt_area_pba_lo, + migr_rec->ckpt_area_pba_hi); +} + +static unsigned long long current_migr_unit(struct migr_record *migr_rec) +{ + if (migr_rec == NULL) + return 0; + return join_u32(migr_rec->curr_migr_unit_lo, + migr_rec->curr_migr_unit_hi); +} + +static unsigned long long migr_dest_1st_member_lba(struct migr_record *migr_rec) +{ + if (migr_rec == NULL) + return 0; + return join_u32(migr_rec->dest_1st_member_lba_lo, + migr_rec->dest_1st_member_lba_hi); +} + +static unsigned long long get_num_migr_units(struct migr_record *migr_rec) +{ + if (migr_rec == NULL) + return 0; + return join_u32(migr_rec->num_migr_units_lo, + migr_rec->num_migr_units_hi); +} static void set_total_blocks(struct imsm_disk *disk, unsigned long long n) { @@ -939,7 +1287,54 @@ static void set_num_data_stripes(struct imsm_map *map, unsigned long long n) split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi); } -static struct extent *get_extents(struct intel_super *super, struct dl *dl) +static void set_imsm_dev_size(struct imsm_dev *dev, unsigned long long n) +{ + split_ull(n, &dev->size_low, &dev->size_high); +} + +static void set_migr_chkp_area_pba(struct migr_record *migr_rec, + unsigned long long n) +{ + split_ull(n, &migr_rec->ckpt_area_pba_lo, &migr_rec->ckpt_area_pba_hi); +} + +static void set_current_migr_unit(struct migr_record *migr_rec, + unsigned long long n) +{ + split_ull(n, &migr_rec->curr_migr_unit_lo, + &migr_rec->curr_migr_unit_hi); +} + +static void set_migr_dest_1st_member_lba(struct migr_record *migr_rec, + unsigned long long n) +{ + split_ull(n, &migr_rec->dest_1st_member_lba_lo, + &migr_rec->dest_1st_member_lba_hi); +} + +static void set_num_migr_units(struct migr_record *migr_rec, + unsigned long long n) +{ + split_ull(n, &migr_rec->num_migr_units_lo, + &migr_rec->num_migr_units_hi); +} + +static unsigned long long per_dev_array_size(struct imsm_map *map) +{ + unsigned long long array_size = 0; + + if (map == NULL) + return array_size; + + array_size = num_data_stripes(map) * map->blocks_per_strip; + if (get_imsm_raid_level(map) == 1 || get_imsm_raid_level(map) == 10) + array_size *= 2; + + return array_size; +} + +static struct extent *get_extents(struct intel_super *super, struct dl *dl, + int get_minimal_reservation) { /* find a list of used extents on the given physical device */ struct extent *rv, *e; @@ -951,7 +1346,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) * regardless of whether the OROM has assigned sectors from the * IMSM_RESERVED_SECTORS region */ - if (dl->index == -1) + if (dl->index == -1 || get_minimal_reservation) reservation = imsm_min_reserved_sectors(super); else reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; @@ -965,7 +1360,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl) if (get_imsm_disk_slot(map, dl->index) >= 0) { e->start = pba_of_lba0(map); - e->size = blocks_per_member(map); + e->size = per_dev_array_size(map); e++; } } @@ -1012,7 +1407,7 @@ static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl) if (dl->index == -1) return MPB_SECTOR_CNT; - e = get_extents(super, dl); + e = get_extents(super, dl, 0); if (!e) return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; @@ -1042,6 +1437,38 @@ static int is_failed(struct imsm_disk *disk) return (disk->status & FAILED_DISK) == FAILED_DISK; } +static int is_journal(struct imsm_disk *disk) +{ + return (disk->status & JOURNAL_DISK) == JOURNAL_DISK; +} + +/* round array size down to closest MB and ensure it splits evenly + * between members + */ +static unsigned long long round_size_to_mb(unsigned long long size, unsigned int + disk_count) +{ + size /= disk_count; + size = (size >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT; + size *= disk_count; + + return size; +} + +static int able_to_resync(int raid_level, int missing_disks) +{ + int max_missing_disks = 0; + + switch (raid_level) { + case 10: + max_missing_disks = 1; + break; + default: + max_missing_disks = 0; + } + return missing_disks <= max_missing_disks; +} + /* try to determine how much space is reserved for metadata from * the last get_extents() entry on the smallest active disk, * otherwise fallback to the default @@ -1072,7 +1499,7 @@ static __u32 imsm_min_reserved_sectors(struct intel_super *super) return rv; /* find last lba used by subarrays on the smallest active disk */ - e = get_extents(super, dl_min); + e = get_extents(super, dl_min, 0); if (!e) return rv; for (i = 0; e[i].size; i++) @@ -1089,44 +1516,52 @@ static __u32 imsm_min_reserved_sectors(struct intel_super *super) return (remainder < rv) ? remainder : rv; } -/* Return minimum size of a spare that can be used in this array*/ -static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st) +/* + * Return minimum size of a spare and sector size + * that can be used in this array + */ +int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c) { struct intel_super *super = st->sb; struct dl *dl; struct extent *e; int i; - unsigned long long rv = 0; + unsigned long long size = 0; + + c->min_size = 0; + c->sector_size = 0; if (!super) - return rv; + return -EINVAL; /* find first active disk in array */ dl = super->disks; while (dl && (is_failed(&dl->disk) || dl->index == -1)) dl = dl->next; if (!dl) - return rv; + return -EINVAL; /* find last lba used by subarrays */ - e = get_extents(super, dl); + e = get_extents(super, dl, 0); if (!e) - return rv; + return -EINVAL; for (i = 0; e[i].size; i++) continue; if (i > 0) - rv = e[i-1].start + e[i-1].size; + size = e[i-1].start + e[i-1].size; free(e); /* add the amount of space needed for metadata */ - rv = rv + imsm_min_reserved_sectors(super); + size += imsm_min_reserved_sectors(super); - return rv * 512; + c->min_size = size * 512; + c->sector_size = super->sector_size; + + return 0; } static int is_gen_migration(struct imsm_dev *dev); #define IMSM_4K_DIV 8 -#ifndef MDASSEMBLE static __u64 blocks_per_migr_unit(struct intel_super *super, struct imsm_dev *dev); @@ -1180,13 +1615,14 @@ static void print_imsm_dev(struct intel_super *super, ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : ""); } else printf(" This Slot : ?\n"); - sz = __le32_to_cpu(dev->size_high); - sz <<= 32; - sz += __le32_to_cpu(dev->size_low); - printf(" Array Size : %llu%s\n", (unsigned long long)sz, + printf(" Sector Size : %u\n", super->sector_size); + sz = imsm_dev_size(dev); + printf(" Array Size : %llu%s\n", + (unsigned long long)sz * 512 / super->sector_size, human_size(sz * 512)); sz = blocks_per_member(map); - printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz, + printf(" Per Dev Size : %llu%s\n", + (unsigned long long)sz * 512 / super->sector_size, human_size(sz * 512)); printf(" Sector Offset : %llu\n", pba_of_lba0(map)); @@ -1231,11 +1667,27 @@ static void print_imsm_dev(struct intel_super *super, blocks_per_migr_unit(super, dev)); } printf("\n"); - printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean"); + printf(" Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ? + "dirty" : "clean"); + printf(" RWH Policy : "); + if (dev->rwh_policy == RWH_OFF || dev->rwh_policy == RWH_MULTIPLE_OFF) + printf("off\n"); + else if (dev->rwh_policy == RWH_DISTRIBUTED) + printf("PPL distributed\n"); + else if (dev->rwh_policy == RWH_JOURNALING_DRIVE) + printf("PPL journaling drive\n"); + else if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED) + printf("Multiple distributed PPLs\n"); + else if (dev->rwh_policy == RWH_MULTIPLE_PPLS_JOURNALING_DRIVE) + printf("Multiple PPLs on journaling drive\n"); + else + printf("\n", dev->rwh_policy); } -static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved) -{ +static void print_imsm_disk(struct imsm_disk *disk, + int index, + __u32 reserved, + unsigned int sector_size) { char str[MAX_RAID_SERIAL_LEN + 1]; __u64 sz; @@ -1248,12 +1700,14 @@ static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved) printf(" Disk%02d Serial : %s\n", index, str); else printf(" Disk Serial : %s\n", str); - printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", - is_configured(disk) ? " active" : "", - is_failed(disk) ? " failed" : ""); + printf(" State :%s%s%s%s\n", is_spare(disk) ? " spare" : "", + is_configured(disk) ? " active" : "", + is_failed(disk) ? " failed" : "", + is_journal(disk) ? " journal" : ""); printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id)); sz = total_blocks(disk) - reserved; - printf(" Usable Size : %llu%s\n", (unsigned long long)sz, + printf(" Usable Size : %llu%s\n", + (unsigned long long)sz * 512 / sector_size, human_size(sz * 512)); } @@ -1262,12 +1716,14 @@ void convert_to_4k_imsm_migr_rec(struct intel_super *super) struct migr_record *migr_rec = super->migr_rec; migr_rec->blocks_per_unit /= IMSM_4K_DIV; - migr_rec->ckpt_area_pba /= IMSM_4K_DIV; - migr_rec->dest_1st_member_lba /= IMSM_4K_DIV; migr_rec->dest_depth_per_unit /= IMSM_4K_DIV; split_ull((join_u32(migr_rec->post_migr_vol_cap, migr_rec->post_migr_vol_cap_hi) / IMSM_4K_DIV), &migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi); + set_migr_chkp_area_pba(migr_rec, + migr_chkp_area_pba(migr_rec) / IMSM_4K_DIV); + set_migr_dest_1st_member_lba(migr_rec, + migr_dest_1st_member_lba(migr_rec) / IMSM_4K_DIV); } void convert_to_4k_imsm_disk(struct imsm_disk *disk) @@ -1280,6 +1736,7 @@ void convert_to_4k(struct intel_super *super) struct imsm_super *mpb = super->anchor; struct imsm_disk *disk; int i; + __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size); for (i = 0; i < mpb->num_disks ; i++) { disk = __get_imsm_disk(mpb, i); @@ -1290,8 +1747,7 @@ void convert_to_4k(struct intel_super *super) struct imsm_dev *dev = __get_imsm_dev(mpb, i); struct imsm_map *map = get_imsm_map(dev, MAP_0); /* dev */ - split_ull((join_u32(dev->size_low, dev->size_high)/IMSM_4K_DIV), - &dev->size_low, &dev->size_high); + set_imsm_dev_size(dev, imsm_dev_size(dev)/IMSM_4K_DIV); dev->vol.curr_migr_unit /= IMSM_4K_DIV; /* map0 */ @@ -1308,6 +1764,24 @@ void convert_to_4k(struct intel_super *super) set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV); } } + if (bbm_log_size) { + struct bbm_log *log = (void *)mpb + + __le32_to_cpu(mpb->mpb_size) - bbm_log_size; + __u32 i; + + for (i = 0; i < log->entry_count; i++) { + struct bbm_log_entry *entry = + &log->marked_block_entries[i]; + + __u8 count = entry->marked_count + 1; + unsigned long long sector = + __le48_to_cpu(&entry->defective_block_start); + + entry->defective_block_start = + __cpu_to_le48(sector/IMSM_4K_DIV); + entry->marked_count = max(count/IMSM_4K_DIV, 1) - 1; + } + } mpb->check_sum = __gen_imsm_checksum(mpb); } @@ -1342,8 +1816,8 @@ void examine_migr_rec_imsm(struct intel_super *super) printf("Normal\n"); else printf("Contains Data\n"); - printf(" Current Unit : %u\n", - __le32_to_cpu(migr_rec->curr_migr_unit)); + printf(" Current Unit : %llu\n", + current_migr_unit(migr_rec)); printf(" Family : %u\n", __le32_to_cpu(migr_rec->family_num)); printf(" Ascending : %u\n", @@ -1352,36 +1826,36 @@ void examine_migr_rec_imsm(struct intel_super *super) __le32_to_cpu(migr_rec->blocks_per_unit)); printf(" Dest. Depth Per Unit : %u\n", __le32_to_cpu(migr_rec->dest_depth_per_unit)); - printf(" Checkpoint Area pba : %u\n", - __le32_to_cpu(migr_rec->ckpt_area_pba)); - printf(" First member lba : %u\n", - __le32_to_cpu(migr_rec->dest_1st_member_lba)); - printf(" Total Number of Units : %u\n", - __le32_to_cpu(migr_rec->num_migr_units)); - printf(" Size of volume : %u\n", - __le32_to_cpu(migr_rec->post_migr_vol_cap)); - printf(" Expansion space for LBA64 : %u\n", - __le32_to_cpu(migr_rec->post_migr_vol_cap_hi)); + printf(" Checkpoint Area pba : %llu\n", + migr_chkp_area_pba(migr_rec)); + printf(" First member lba : %llu\n", + migr_dest_1st_member_lba(migr_rec)); + printf(" Total Number of Units : %llu\n", + get_num_migr_units(migr_rec)); + printf(" Size of volume : %llu\n", + join_u32(migr_rec->post_migr_vol_cap, + migr_rec->post_migr_vol_cap_hi)); printf(" Record was read from : %u\n", __le32_to_cpu(migr_rec->ckpt_read_disk_num)); break; } } -#endif /* MDASSEMBLE */ void convert_from_4k_imsm_migr_rec(struct intel_super *super) { struct migr_record *migr_rec = super->migr_rec; migr_rec->blocks_per_unit *= IMSM_4K_DIV; - migr_rec->ckpt_area_pba *= IMSM_4K_DIV; - migr_rec->dest_1st_member_lba *= IMSM_4K_DIV; migr_rec->dest_depth_per_unit *= IMSM_4K_DIV; split_ull((join_u32(migr_rec->post_migr_vol_cap, migr_rec->post_migr_vol_cap_hi) * IMSM_4K_DIV), &migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi); + set_migr_chkp_area_pba(migr_rec, + migr_chkp_area_pba(migr_rec) * IMSM_4K_DIV); + set_migr_dest_1st_member_lba(migr_rec, + migr_dest_1st_member_lba(migr_rec) * IMSM_4K_DIV); } void convert_from_4k(struct intel_super *super) @@ -1389,6 +1863,7 @@ void convert_from_4k(struct intel_super *super) struct imsm_super *mpb = super->anchor; struct imsm_disk *disk; int i; + __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size); for (i = 0; i < mpb->num_disks ; i++) { disk = __get_imsm_disk(mpb, i); @@ -1400,8 +1875,7 @@ void convert_from_4k(struct intel_super *super) struct imsm_dev *dev = __get_imsm_dev(mpb, i); struct imsm_map *map = get_imsm_map(dev, MAP_0); /* dev */ - split_ull((join_u32(dev->size_low, dev->size_high)*IMSM_4K_DIV), - &dev->size_low, &dev->size_high); + set_imsm_dev_size(dev, imsm_dev_size(dev)*IMSM_4K_DIV); dev->vol.curr_migr_unit *= IMSM_4K_DIV; /* map0 */ @@ -1418,6 +1892,24 @@ void convert_from_4k(struct intel_super *super) set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV); } } + if (bbm_log_size) { + struct bbm_log *log = (void *)mpb + + __le32_to_cpu(mpb->mpb_size) - bbm_log_size; + __u32 i; + + for (i = 0; i < log->entry_count; i++) { + struct bbm_log_entry *entry = + &log->marked_block_entries[i]; + + __u8 count = entry->marked_count + 1; + unsigned long long sector = + __le48_to_cpu(&entry->defective_block_start); + + entry->defective_block_start = + __cpu_to_le48(sector*IMSM_4K_DIV); + entry->marked_count = count*IMSM_4K_DIV - 1; + } + } mpb->check_sum = __gen_imsm_checksum(mpb); } @@ -1509,7 +2001,6 @@ static int imsm_check_attributes(__u32 attributes) return ret_val; } -#ifndef MDASSEMBLE static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map); static void examine_super_imsm(struct supertype *st, char *homehost) @@ -1524,9 +2015,9 @@ static void examine_super_imsm(struct supertype *st, char *homehost) __u32 reserved = imsm_reserved_sectors(super, super->disks); struct dl *dl; - snprintf(str, MPB_SIG_LEN, "%s", mpb->sig); + strncpy(str, (char *)mpb->sig, MPB_SIG_LEN); + str[MPB_SIG_LEN-1] = '\0'; printf(" Magic : %s\n", str); - snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb)); printf(" Version : %s\n", get_imsm_version(mpb)); printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num)); printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num)); @@ -1545,8 +2036,9 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" MPB Sectors : %d\n", mpb_sectors(mpb, super->sector_size)); printf(" Disks : %d\n", mpb->num_disks); printf(" RAID Devices : %d\n", mpb->num_raid_devs); - print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), super->disks->index, reserved); - if (super->bbm_log) { + print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), + super->disks->index, reserved, super->sector_size); + if (get_imsm_bbm_log_size(super->bbm_log)) { struct bbm_log *log = super->bbm_log; printf("\n"); @@ -1554,9 +2046,6 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size)); printf(" Signature : %x\n", __le32_to_cpu(log->signature)); printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count)); - printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count)); - printf(" First Spare : %llx\n", - (unsigned long long) __le64_to_cpu(log->first_spare_lba)); } for (i = 0; i < mpb->num_raid_devs; i++) { struct mdinfo info; @@ -1570,12 +2059,14 @@ static void examine_super_imsm(struct supertype *st, char *homehost) for (i = 0; i < mpb->num_disks; i++) { if (i == super->disks->index) continue; - print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved); + print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved, + super->sector_size); } for (dl = super->disks; dl; dl = dl->next) if (dl->index == -1) - print_imsm_disk(&dl->disk, -1, reserved); + print_imsm_disk(&dl->disk, -1, reserved, + super->sector_size); examine_migr_rec_imsm(super); } @@ -1637,81 +2128,43 @@ static void export_examine_super_imsm(struct supertype *st) printf("MD_DEVICES=%u\n", mpb->num_disks); } -static int copy_metadata_imsm(struct supertype *st, int from, int to) -{ - /* The second last sector of the device contains - * the "struct imsm_super" metadata. - * This contains mpb_size which is the size in bytes of the - * extended metadata. This is located immediately before - * the imsm_super. - * We want to read all that, plus the last sector which - * may contain a migration record, and write it all - * to the target. - */ - void *buf; - unsigned long long dsize, offset; - int sectors; - struct imsm_super *sb; - struct intel_super *super = st->sb; - unsigned int sector_size = super->sector_size; - unsigned int written = 0; - - if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE) != 0) - return 1; - - if (!get_dev_size(from, NULL, &dsize)) - goto err; - - if (lseek64(from, dsize-(2*sector_size), 0) < 0) - goto err; - if (read(from, buf, sector_size) != sector_size) - goto err; - sb = buf; - if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) - goto err; - - sectors = mpb_sectors(sb, sector_size) + 2; - offset = dsize - sectors * sector_size; - if (lseek64(from, offset, 0) < 0 || - lseek64(to, offset, 0) < 0) - goto err; - while (written < sectors * sector_size) { - int n = sectors*sector_size - written; - if (n > 4096) - n = 4096; - if (read(from, buf, n) != n) - goto err; - if (write(to, buf, n) != n) - goto err; - written += n; - } - free(buf); - return 0; -err: - free(buf); - return 1; -} - -static void detail_super_imsm(struct supertype *st, char *homehost) +static void detail_super_imsm(struct supertype *st, char *homehost, + char *subarray) { struct mdinfo info; char nbuf[64]; + struct intel_super *super = st->sb; + int temp_vol = super->current_vol; + + if (subarray) + super->current_vol = strtoul(subarray, NULL, 10); getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); - printf("\n UUID : %s\n", nbuf + 5); + printf("\n UUID : %s\n", nbuf + 5); + + super->current_vol = temp_vol; } -static void brief_detail_super_imsm(struct supertype *st) +static void brief_detail_super_imsm(struct supertype *st, char *subarray) { struct mdinfo info; char nbuf[64]; + struct intel_super *super = st->sb; + int temp_vol = super->current_vol; + + if (subarray) + super->current_vol = strtoul(subarray, NULL, 10); + getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); printf(" UUID=%s", nbuf + 5); + + super->current_vol = temp_vol; } -static int imsm_read_serial(int fd, char *devname, __u8 *serial); +static int imsm_read_serial(int fd, char *devname, __u8 *serial, + size_t serial_buf_len); static void fd2devname(int fd, char *name); static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose) @@ -1857,8 +2310,9 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b else { fd2devname(fd, buf); printf(" Port%d : %s", port, buf); - if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0) - printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf); + if (imsm_read_serial(fd, NULL, (__u8 *)buf, + sizeof(buf)) == 0) + printf(" (%s)\n", buf); else printf(" ()\n"); close(fd); @@ -1881,55 +2335,45 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b return err; } -static int print_vmd_attached_devs(struct sys_dev *hba) +static int print_nvme_info(struct sys_dev *hba) { + char buf[1024]; struct dirent *ent; DIR *dir; - char path[292]; - char link[256]; - char *c, *rp; - - if (hba->type != SYS_DEV_VMD) - return 1; + char *rp; + int fd; - /* scroll through /sys/dev/block looking for devices attached to - * this hba - */ - dir = opendir("/sys/bus/pci/drivers/nvme"); + dir = opendir("/sys/block/"); if (!dir) return 1; for (ent = readdir(dir); ent; ent = readdir(dir)) { - int n; - - /* is 'ent' a device? check that the 'subsystem' link exists and - * that its target matches 'bus' - */ - sprintf(path, "/sys/bus/pci/drivers/nvme/%s/subsystem", - ent->d_name); - n = readlink(path, link, sizeof(link)); - if (n < 0 || n >= (int)sizeof(link)) - continue; - link[n] = '\0'; - c = strrchr(link, '/'); - if (!c) - continue; - if (strncmp("pci", c+1, strlen("pci")) != 0) - continue; - - sprintf(path, "/sys/bus/pci/drivers/nvme/%s", ent->d_name); - /* if not a intel NVMe - skip it*/ - if (devpath_to_vendor(path) != 0x8086) - continue; - - rp = realpath(path, NULL); - if (!rp) - continue; + if (strstr(ent->d_name, "nvme")) { + sprintf(buf, "/sys/block/%s", ent->d_name); + rp = realpath(buf, NULL); + if (!rp) + continue; + if (path_attached_to_hba(rp, hba->path)) { + fd = open_dev(ent->d_name); + if (fd < 0) { + free(rp); + continue; + } - if (path_attached_to_hba(rp, hba->path)) { - printf(" NVMe under VMD : %s\n", rp); + fd2devname(fd, buf); + if (hba->type == SYS_DEV_VMD) + printf(" NVMe under VMD : %s", buf); + else if (hba->type == SYS_DEV_NVME) + printf(" NVMe Device : %s", buf); + if (!imsm_read_serial(fd, NULL, (__u8 *)buf, + sizeof(buf))) + printf(" (%s)\n", buf); + else + printf("()\n"); + close(fd); + } + free(rp); } - free(rp); } closedir(dir); @@ -1992,6 +2436,8 @@ static void print_imsm_capability(const struct imsm_orom *orom) printf(" Platform : Intel(R) "); if (orom->capabilities == 0 && orom->driver_features == 0) printf("Matrix Storage Manager\n"); + else if (imsm_orom_is_enterprise(orom) && orom->major_ver >= 6) + printf("Virtual RAID on CPU\n"); else printf("Rapid Storage Technology%s\n", imsm_orom_is_enterprise(orom) ? " enterprise" : ""); @@ -2135,12 +2581,14 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle for (entry = orom_entries; entry; entry = entry->next) { if (entry->type == SYS_DEV_VMD) { print_imsm_capability(&entry->orom); + printf(" 3rd party NVMe :%s supported\n", + imsm_orom_has_tpv_support(&entry->orom)?"":" not"); for (hba = list; hba; hba = hba->next) { if (hba->type == SYS_DEV_VMD) { char buf[PATH_MAX]; printf(" I/O Controller : %s (%s)\n", vmd_domain_to_controller(hba, buf), get_sys_dev_type(hba->type)); - if (print_vmd_attached_devs(hba)) { + if (print_nvme_info(hba)) { if (verbose > 0) pr_err("failed to get devices attached to VMD domain.\n"); result |= 2; @@ -2155,7 +2603,7 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle if (entry->type == SYS_DEV_NVME) { for (hba = list; hba; hba = hba->next) { if (hba->type == SYS_DEV_NVME) - printf(" NVMe Device : %s\n", hba->path); + print_nvme_info(hba); } printf("\n"); continue; @@ -2223,8 +2671,6 @@ static int export_detail_platform_imsm(int verbose, char *controller_path) return result; } -#endif - static int match_home_imsm(struct supertype *st, char *homehost) { /* the imsm metadata format does not specify any host @@ -2380,13 +2826,11 @@ static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev) return num_stripes_per_unit_resync(dev); } -static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map) +static __u8 imsm_num_data_members(struct imsm_map *map) { /* named 'imsm_' because raid0, raid1 and raid10 * counter-intuitively have the same number of data disks */ - struct imsm_map *map = get_imsm_map(dev, second_map); - switch (get_imsm_raid_level(map)) { case 0: return map->num_members; @@ -2402,6 +2846,36 @@ static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map) } } +static unsigned long long calc_component_size(struct imsm_map *map, + struct imsm_dev *dev) +{ + unsigned long long component_size; + unsigned long long dev_size = imsm_dev_size(dev); + long long calc_dev_size = 0; + unsigned int member_disks = imsm_num_data_members(map); + + if (member_disks == 0) + return 0; + + component_size = per_dev_array_size(map); + calc_dev_size = component_size * member_disks; + + /* Component size is rounded to 1MB so difference between size from + * metadata and size calculated from num_data_stripes equals up to + * 2048 blocks per each device. If the difference is higher it means + * that array size was expanded and num_data_stripes was not updated. + */ + if (llabs(calc_dev_size - (long long)dev_size) > + (1 << SECT_PER_MB_SHIFT) * member_disks) { + component_size = dev_size / member_disks; + dprintf("Invalid num_data_stripes in metadata; expected=%llu, found=%llu\n", + component_size / map->blocks_per_strip, + num_data_stripes(map)); + } + + return component_size; +} + static __u32 parity_segment_depth(struct imsm_dev *dev) { struct imsm_map *map = get_imsm_map(dev, MAP_0); @@ -2475,7 +2949,7 @@ static __u64 blocks_per_migr_unit(struct intel_super *super, */ stripes_per_unit = num_stripes_per_unit_resync(dev); migr_chunk = migr_strip_blocks_resync(dev); - disks = imsm_num_data_members(dev, MAP_0); + disks = imsm_num_data_members(map); blocks_per_unit = stripes_per_unit * migr_chunk * disks; stripe = __le16_to_cpu(map->blocks_per_strip) * disks; segment = blocks_per_unit / stripe; @@ -2536,7 +3010,7 @@ static int read_imsm_migr_rec(int fd, struct intel_super *super) strerror(errno)); goto out; } - if (read(fd, super->migr_rec_buf, + if ((unsigned int)read(fd, super->migr_rec_buf, MIGR_REC_BUF_SECTORS*sector_size) != MIGR_REC_BUF_SECTORS*sector_size) { pr_err("Cannot read migr record block: %s\n", @@ -2635,7 +3109,6 @@ out: return retval; } -#ifndef MDASSEMBLE /******************************************************************************* * function: imsm_create_metadata_checkpoint_update * Description: It creates update for checkpoint change. @@ -2669,7 +3142,7 @@ static int imsm_create_metadata_checkpoint_update( return 0; } (*u)->type = update_general_migration_checkpoint; - (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit); + (*u)->curr_migr_unit = current_migr_unit(super->migr_rec); dprintf("prepared for %u\n", (*u)->curr_migr_unit); return update_memory_size; @@ -2738,7 +3211,7 @@ static int write_imsm_migr_rec(struct supertype *st) strerror(errno)); goto out; } - if (write(fd, super->migr_rec_buf, + if ((unsigned int)write(fd, super->migr_rec_buf, MIGR_REC_BUF_SECTORS*sector_size) != MIGR_REC_BUF_SECTORS*sector_size) { pr_err("Cannot write migr record block: %s\n", @@ -2776,7 +3249,6 @@ static int write_imsm_migr_rec(struct supertype *st) close(fd); return retval; } -#endif /* MDASSEMBLE */ /* spare/missing disks activations are not allowe when * array/container performs reshape operation, because @@ -2801,32 +3273,41 @@ int imsm_reshape_blocks_arrays_changes(struct intel_super *super) } return rv; } -static unsigned long long imsm_component_size_aligment_check(int level, +static unsigned long long imsm_component_size_alignment_check(int level, int chunk_size, unsigned int sector_size, unsigned long long component_size) { - unsigned int component_size_alligment; + unsigned int component_size_alignment; - /* check component size aligment + /* check component size alignment */ - component_size_alligment = component_size % (chunk_size/sector_size); + component_size_alignment = component_size % (chunk_size/sector_size); - dprintf("(Level: %i, chunk_size = %i, component_size = %llu), component_size_alligment = %u\n", + dprintf("(Level: %i, chunk_size = %i, component_size = %llu), component_size_alignment = %u\n", level, chunk_size, component_size, - component_size_alligment); + component_size_alignment); - if (component_size_alligment && (level != 1) && (level != UnSet)) { - dprintf("imsm: reported component size alligned from %llu ", + if (component_size_alignment && (level != 1) && (level != UnSet)) { + dprintf("imsm: reported component size aligned from %llu ", component_size); - component_size -= component_size_alligment; + component_size -= component_size_alignment; dprintf_cont("to %llu (%i).\n", - component_size, component_size_alligment); + component_size, component_size_alignment); } return component_size; } +static unsigned long long get_ppl_sector(struct intel_super *super, int dev_idx) +{ + struct imsm_dev *dev = get_imsm_dev(super, dev_idx); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + + return pba_of_lba0(map) + + (num_data_stripes(map) * map->blocks_per_strip); +} + static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap) { struct intel_super *super = st->sb; @@ -2853,10 +3334,8 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, info->array.utime = 0; info->array.chunk_size = __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9; - info->array.state = !dev->vol.dirty; - info->custom_array_size = __le32_to_cpu(dev->size_high); - info->custom_array_size <<= 32; - info->custom_array_size |= __le32_to_cpu(dev->size_low); + info->array.state = !(dev->vol.dirty & RAIDVOL_DIRTY); + info->custom_array_size = imsm_dev_size(dev); info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb); if (is_gen_migration(dev)) { @@ -2916,21 +3395,37 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, } info->data_offset = pba_of_lba0(map_to_analyse); - info->component_size = blocks_per_member(map_to_analyse); - - info->component_size = imsm_component_size_aligment_check( + info->component_size = calc_component_size(map, dev); + info->component_size = imsm_component_size_alignment_check( info->array.level, info->array.chunk_size, super->sector_size, info->component_size); + info->bb.supported = 1; memset(info->uuid, 0, sizeof(info->uuid)); info->recovery_start = MaxSector; + if (info->array.level == 5 && + (dev->rwh_policy == RWH_DISTRIBUTED || + dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)) { + info->consistency_policy = CONSISTENCY_POLICY_PPL; + info->ppl_sector = get_ppl_sector(super, super->current_vol); + if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED) + info->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9; + else + info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) + >> 9; + } else if (info->array.level <= 0) { + info->consistency_policy = CONSISTENCY_POLICY_NONE; + } else { + info->consistency_policy = CONSISTENCY_POLICY_RESYNC; + } + info->reshape_progress = 0; info->resync_start = MaxSector; if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED || - dev->vol.dirty) && + !(info->array.state & 1)) && imsm_reshape_blocks_arrays_changes(super) == 0) { info->resync_start = 0; } @@ -2948,13 +3443,13 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, case MIGR_GEN_MIGR: { __u64 blocks_per_unit = blocks_per_migr_unit(super, dev); - __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit); + __u64 units = current_migr_unit(migr_rec); unsigned long long array_blocks; int used_disks; if (__le32_to_cpu(migr_rec->ascending_migr) && (units < - (__le32_to_cpu(migr_rec->num_migr_units)-1)) && + (get_num_migr_units(migr_rec)-1)) && (super->migr_rec->rec_status == __cpu_to_le32(UNIT_SRC_IN_CP_AREA))) units++; @@ -2966,15 +3461,14 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, (unsigned long long)blocks_per_unit, info->reshape_progress); - used_disks = imsm_num_data_members(dev, MAP_1); + used_disks = imsm_num_data_members(prev_map); if (used_disks > 0) { - array_blocks = blocks_per_member(map) * + array_blocks = per_dev_array_size(map) * used_disks; - /* round array size down to closest MB - */ - info->custom_array_size = (array_blocks - >> SECT_PER_MB_SHIFT) - << SECT_PER_MB_SHIFT; + info->custom_array_size = + round_size_to_mb(array_blocks, + used_disks); + } } case MIGR_VERIFY: @@ -3022,7 +3516,6 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev, int look_in_map); -#ifndef MDASSEMBLE static void manage_second_map(struct intel_super *super, struct imsm_dev *dev) { if (is_gen_migration(dev)) { @@ -3038,7 +3531,6 @@ static void manage_second_map(struct intel_super *super, struct imsm_dev *dev) } } } -#endif static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index) { @@ -3089,9 +3581,11 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * info->name[0] = 0; info->recovery_start = MaxSector; info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb); + info->bb.supported = 1; /* do we have the all the insync disks that we expect? */ mpb = super->anchor; + info->events = __le32_to_cpu(mpb->generation_num); for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); @@ -3110,6 +3604,9 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * __u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0); __u32 idx = ord_to_idx(ord); + if (super->disks && super->disks->index == (int)idx) + info->disk.raid_disk = j; + if (!(ord & IMSM_ORD_REBUILD) && get_imsm_missing(super, idx)) { missing = 1; @@ -3155,7 +3652,8 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * * found the 'most fresh' version of the metadata */ info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0; - info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC); + info->disk.state |= (is_spare(disk) || is_journal(disk)) ? + 0 : (1 << MD_DISK_SYNC); } /* only call uuid_from_super_imsm when this disk is part of a populated container, @@ -3293,6 +3791,8 @@ static size_t disks_to_mpb_size(int disks) size += (4 - 2) * sizeof(struct imsm_map); /* 4 possible disk_ord_tbl's */ size += 4 * (disks - 1) * sizeof(__u32); + /* maximum bbm log */ + size += sizeof(struct bbm_log); return size; } @@ -3468,11 +3968,11 @@ static int nvme_get_serial(int fd, void *buf, size_t buf_len) extern int scsi_get_serial(int fd, void *buf, size_t buf_len); static int imsm_read_serial(int fd, char *devname, - __u8 serial[MAX_RAID_SERIAL_LEN]) + __u8 *serial, size_t serial_buf_len) { char buf[50]; int rv; - int len; + size_t len; char *dest; char *src; unsigned int i; @@ -3515,13 +4015,13 @@ static int imsm_read_serial(int fd, char *devname, len = dest - buf; dest = buf; - /* truncate leading characters */ - if (len > MAX_RAID_SERIAL_LEN) { - dest += len - MAX_RAID_SERIAL_LEN; - len = MAX_RAID_SERIAL_LEN; + if (len > serial_buf_len) { + /* truncate leading characters */ + dest += len - serial_buf_len; + len = serial_buf_len; } - memset(serial, 0, MAX_RAID_SERIAL_LEN); + memset(serial, 0, serial_buf_len); memcpy(serial, dest, len); return 0; @@ -3576,7 +4076,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) char name[40]; __u8 serial[MAX_RAID_SERIAL_LEN]; - rv = imsm_read_serial(fd, devname, serial); + rv = imsm_read_serial(fd, devname, serial, MAX_RAID_SERIAL_LEN); if (rv != 0) return 2; @@ -3608,14 +4108,13 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) */ if (is_failed(&dl->disk)) dl->index = -2; - else if (is_spare(&dl->disk)) + else if (is_spare(&dl->disk) || is_journal(&dl->disk)) dl->index = -1; } return 0; } -#ifndef MDASSEMBLE /* When migrating map0 contains the 'destination' state while map1 * contains the current state. When not migrating map0 contains the * current state. This routine assumes that map[0].map_state is set to @@ -3645,7 +4144,7 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super, /* duplicate and then set the target end state in map[0] */ memcpy(dest, src, sizeof_imsm_map(src)); - if (migr_type == MIGR_REBUILD || migr_type == MIGR_GEN_MIGR) { + if (migr_type == MIGR_GEN_MIGR) { __u32 ord; int i; @@ -3703,7 +4202,6 @@ static void end_migration(struct imsm_dev *dev, struct intel_super *super, dev->vol.curr_migr_unit = 0; map->map_state = map_state; } -#endif static int parse_raid_devices(struct intel_super *super) { @@ -3754,20 +4252,9 @@ static int parse_raid_devices(struct intel_super *super) super->len = len; } - return 0; -} - -/* retrieve a pointer to the bbm log which starts after all raid devices */ -struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb) -{ - void *ptr = NULL; + super->extra_space += space_needed; - if (__le32_to_cpu(mpb->bbm_log_size)) { - ptr = mpb; - ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size); - } - - return ptr; + return 0; } /******************************************************************************* @@ -3799,8 +4286,8 @@ int check_mpb_migr_compatibility(struct intel_super *super) if (pba_of_lba0(map0) != pba_of_lba0(map1)) /* migration optimization area was used */ return -1; - if (migr_rec->ascending_migr == 0 - && migr_rec->dest_depth_per_unit > 0) + if (migr_rec->ascending_migr == 0 && + migr_rec->dest_depth_per_unit > 0) /* descending reshape not supported yet */ return -1; } @@ -3842,7 +4329,7 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) pr_err("Failed to allocate imsm anchor buffer on %s\n", devname); return 1; } - if (read(fd, anchor, sector_size) != sector_size) { + if ((unsigned int)read(fd, anchor, sector_size) != sector_size) { if (devname) pr_err("Cannot read anchor block on %s: %s\n", devname, strerror(errno)); @@ -3875,8 +4362,8 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) sectors = mpb_sectors(anchor, sector_size) - 1; free(anchor); - if (posix_memalign(&super->migr_rec_buf, sector_size, - MIGR_REC_BUF_SECTORS*sector_size) != 0) { + if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE, + MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE) != 0) { pr_err("could not allocate migr_rec buffer\n"); free(super->buf); return 2; @@ -3922,12 +4409,6 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) return 3; } - /* FIXME the BBM log is disk specific so we cannot use this global - * buffer for all disks. Ok for now since we only look at the global - * bbm_log_size parameter to gate assembly - */ - super->bbm_log = __get_imsm_bbm_log(super->anchor); - return 0; } @@ -3973,6 +4454,9 @@ load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd if (err) return err; err = parse_raid_devices(super); + if (err) + return err; + err = load_bbm_log(super); clear_hi(super); return err; } @@ -4037,12 +4521,15 @@ static void __free_imsm(struct intel_super *super, int free_disks) free(elem); elem = next; } + if (super->bbm_log) + free(super->bbm_log); super->hba = NULL; } static void free_imsm(struct intel_super *super) { __free_imsm(super, 1); + free(super->bb.entries); free(super); } @@ -4063,6 +4550,14 @@ static struct intel_super *alloc_super(void) super->current_vol = -1; super->create_offset = ~((unsigned long long) 0); + + super->bb.entries = xmalloc(BBM_LOG_MAX_ENTRIES * + sizeof(struct md_bb_entry)); + if (!super->bb.entries) { + free(super); + return NULL; + } + return super; } @@ -4074,6 +4569,11 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de struct sys_dev *hba_name; int rv = 0; + if (fd >= 0 && test_partition(fd)) { + pr_err("imsm: %s is a partition, cannot be used in IMSM\n", + devname); + return 1; + } if (fd < 0 || check_env("IMSM_NO_PLATFORM")) { super->orom = NULL; super->hba = NULL; @@ -4107,8 +4607,7 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de hba = hba->next; } fprintf(stderr, ").\n" - " Mixing devices attached to different %s is not allowed.\n", - hba_name->type == SYS_DEV_VMD ? "VMD domains" : "controllers"); + " Mixing devices attached to different controllers is not allowed.\n"); } return 2; } @@ -4152,7 +4651,6 @@ static int find_missing(struct intel_super *super) return 0; } -#ifndef MDASSEMBLE static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list) { struct intel_disk *idisk = disk_list; @@ -4643,7 +5141,7 @@ static int get_super_block(struct intel_super **super_list, char *devnm, char *d *super_list = s; } else { if (s) - free(s); + free_imsm(s); if (dfd >= 0) close(dfd); } @@ -4690,7 +5188,6 @@ static int load_container_imsm(struct supertype *st, int fd, char *devname) { return load_super_imsm_all(st, fd, &st->sb, devname, NULL, 1); } -#endif static int load_super_imsm(struct supertype *st, int fd, char *devname) { @@ -4706,6 +5203,8 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) super = alloc_super(); get_dev_sector_size(fd, NULL, &super->sector_size); + if (!super) + return 1; /* Load hba and capabilities if they exist. * But do not preclude loading metadata in case capabilities or hba are * non-compliant and ignore_hw_compat is set. @@ -4837,10 +5336,22 @@ static int check_name(struct intel_super *super, char *name, int quiet) { struct imsm_super *mpb = super->anchor; char *reason = NULL; + char *start = name; + size_t len = strlen(name); int i; - if (strlen(name) > MAX_RAID_SERIAL_LEN) + if (len > 0) { + while (isspace(start[len - 1])) + start[--len] = 0; + while (*start && isspace(*start)) + ++start, --len; + memmove(name, start, len + 1); + } + + if (len > MAX_RAID_SERIAL_LEN) reason = "must be 16 characters or less"; + else if (len == 0) + reason = "must be a non-empty string"; for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); @@ -4858,7 +5369,7 @@ static int check_name(struct intel_super *super, char *name, int quiet) } static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, - unsigned long long size, char *name, + struct shape *s, char *name, char *homehost, int *uuid, long long data_offset) { @@ -4874,9 +5385,12 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, struct imsm_map *map; int idx = mpb->num_raid_devs; int i; + int namelen; unsigned long long array_blocks; size_t size_old, size_new; unsigned long long num_data_stripes; + unsigned int data_disks; + unsigned long long size_per_member; if (super->orom && mpb->num_raid_devs >= super->orom->vpa) { pr_err("This imsm-container already has the maximum of %d volumes\n", super->orom->vpa); @@ -4894,8 +5408,9 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, pr_err("could not allocate new mpb\n"); return 0; } - if (posix_memalign(&super->migr_rec_buf, sector_size, - MIGR_REC_BUF_SECTORS*sector_size) != 0) { + if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE, + MIGR_REC_BUF_SECTORS* + MAX_SECTOR_SIZE) != 0) { pr_err("could not allocate migr_rec buffer\n"); free(super->buf); free(super); @@ -4908,6 +5423,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, super->anchor = mpb_new; mpb->mpb_size = __cpu_to_le32(size_new); memset(mpb_new + size_old, 0, size_round - size_old); + super->len = size_round; } super->current_vol = idx; @@ -4930,7 +5446,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, disk->status = CONFIGURED_DISK | FAILED_DISK; disk->scsi_id = __cpu_to_le32(~(__u32)0); snprintf((char *) disk->serial, MAX_RAID_SERIAL_LEN, - "missing:%d", i); + "missing:%d", (__u8)i); } find_missing(super); } else { @@ -4949,15 +5465,21 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, return 0; dv = xmalloc(sizeof(*dv)); dev = xcalloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1)); - strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN); + /* + * Explicitly allow truncating to not confuse gcc's + * -Werror=stringop-truncation + */ + namelen = min((int) strlen(name), MAX_RAID_SERIAL_LEN); + memcpy(dev->volume, name, namelen); array_blocks = calc_array_size(info->level, info->raid_disks, info->layout, info->chunk_size, - size * 2); - /* round array size down to closest MB */ - array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT; + s->size * BLOCKS_PER_KB); + data_disks = get_data_disks(info->level, info->layout, + info->raid_disks); + array_blocks = round_size_to_mb(array_blocks, data_disks); + size_per_member = array_blocks / data_disks; - dev->size_low = __cpu_to_le32((__u32) array_blocks); - dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32)); + set_imsm_dev_size(dev, array_blocks); dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING); vol = &dev->vol; vol->migr_state = 0; @@ -4966,7 +5488,6 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, vol->curr_migr_unit = 0; map = get_imsm_map(dev, MAP_0); set_pba_of_lba0(map, super->create_offset); - set_blocks_per_member(map, info_to_blocks_per_member(info, size)); map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info)); map->failed_disk_num = ~0; if (info->level > 0) @@ -4994,16 +5515,35 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, map->num_domains = 1; /* info->size is only int so use the 'size' parameter instead */ - num_data_stripes = (size * 2) / info_to_blocks_per_strip(info); + num_data_stripes = size_per_member / info_to_blocks_per_strip(info); num_data_stripes /= map->num_domains; set_num_data_stripes(map, num_data_stripes); + size_per_member += NUM_BLOCKS_DIRTY_STRIPE_REGION; + set_blocks_per_member(map, info_to_blocks_per_member(info, + size_per_member / + BLOCKS_PER_KB)); + map->num_members = info->raid_disks; for (i = 0; i < map->num_members; i++) { /* initialized in add_to_super */ set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD); } mpb->num_raid_devs++; + mpb->num_raid_devs_created++; + dev->my_vol_raid_dev_num = mpb->num_raid_devs_created; + + if (s->consistency_policy <= CONSISTENCY_POLICY_RESYNC) { + dev->rwh_policy = RWH_MULTIPLE_OFF; + } else if (s->consistency_policy == CONSISTENCY_POLICY_PPL) { + dev->rwh_policy = RWH_MULTIPLE_DISTRIBUTED; + } else { + free(dev); + free(dv); + pr_err("imsm does not support consistency policy %s\n", + map_num(consistency_policies, s->consistency_policy)); + return 0; + } dv->dev = dev; dv->index = super->current_vol; @@ -5016,7 +5556,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, } static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, - unsigned long long size, char *name, + struct shape *s, char *name, char *homehost, int *uuid, unsigned long long data_offset) { @@ -5039,7 +5579,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, } if (st->sb) - return init_super_imsm_volume(st, info, size, name, homehost, uuid, + return init_super_imsm_volume(st, info, s, name, homehost, uuid, data_offset); if (info) @@ -5050,7 +5590,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, super = alloc_super(); if (super && posix_memalign(&super->buf, MAX_SECTOR_SIZE, mpb_size) != 0) { - free(super); + free_imsm(super); super = NULL; } if (!super) { @@ -5061,7 +5601,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE) != 0) { pr_err("could not allocate migr_rec buffer\n"); free(super->buf); - free(super); + free_imsm(super); return 0; } memset(super->buf, 0, mpb_size); @@ -5084,7 +5624,22 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, return 1; } -#ifndef MDASSEMBLE +static int drive_validate_sector_size(struct intel_super *super, struct dl *dl) +{ + unsigned int member_sector_size; + + if (dl->fd < 0) { + pr_err("Invalid file descriptor for %s\n", dl->devname); + return 0; + } + + if (!get_dev_sector_size(dl->fd, dl->devname, &member_sector_size)) + return 0; + if (member_sector_size != super->sector_size) + return 0; + return 1; +} + static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, int fd, char *devname) { @@ -5124,6 +5679,16 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, return 1; } + if (mpb->num_disks == 0) + if (!get_dev_sector_size(dl->fd, dl->devname, + &super->sector_size)) + return 1; + + if (!drive_validate_sector_size(super, dl)) { + pr_err("Combining drives of different sector size in one volume is not allowed\n"); + return 1; + } + /* add a pristine spare to the metadata */ if (dl->index < 0) { dl->index = super->anchor->num_disks; @@ -5219,7 +5784,7 @@ int mark_spare(struct dl *disk) return ret_val; ret_val = 0; - if (!imsm_read_serial(disk->fd, NULL, serial)) { + if (!imsm_read_serial(disk->fd, NULL, serial, MAX_RAID_SERIAL_LEN)) { /* Restore disk serial number, because takeover marks disk * as failed and adds to serial ':0' before it becomes * a spare disk. @@ -5270,7 +5835,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, dd->fd = fd; dd->e = NULL; dd->action = DISK_ADD; - rv = imsm_read_serial(fd, devname, dd->serial); + rv = imsm_read_serial(fd, devname, dd->serial, MAX_RAID_SERIAL_LEN); if (rv) { pr_err("failed to retrieve scsi serial, aborting\n"); if (dd->devname) @@ -5318,6 +5883,13 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, "\tRAID 0 is the only supported configuration for this type of x8 device.\n"); break; } + } else if (super->hba->type == SYS_DEV_VMD && super->orom && + !imsm_orom_has_tpv_support(super->orom)) { + pr_err("\tPlatform configuration does not support non-Intel NVMe drives.\n" + "\tPlease refer to Intel(R) RSTe/VROC user guide.\n"); + free(dd->devname); + free(dd); + return 1; } } @@ -5327,21 +5899,15 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, if (super->sector_size == 0) { /* this a first device, so sector_size is not set yet */ super->sector_size = member_sector_size; - } else if (member_sector_size != super->sector_size) { - pr_err("Mixing between different sector size is forbidden, aborting...\n"); - if (dd->devname) - free(dd->devname); - free(dd); - return 1; } /* clear migr_rec when adding disk to container */ - memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*super->sector_size); - if (lseek64(fd, size - MIGR_REC_SECTOR_POSITION*super->sector_size, + memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE); + if (lseek64(fd, size - MIGR_REC_SECTOR_POSITION*member_sector_size, SEEK_SET) >= 0) { - if (write(fd, super->migr_rec_buf, - MIGR_REC_BUF_SECTORS*super->sector_size) != - MIGR_REC_BUF_SECTORS*super->sector_size) + if ((unsigned int)write(fd, super->migr_rec_buf, + MIGR_REC_BUF_SECTORS*member_sector_size) != + MIGR_REC_BUF_SECTORS*member_sector_size) perror("Write migr_rec failed"); } @@ -5468,6 +6034,7 @@ static int write_super_imsm(struct supertype *st, int doclose) __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk); int num_disks = 0; int clear_migration_record = 1; + __u32 bbm_log_size; /* 'generation' is incremented everytime the metadata is written */ generation = __le32_to_cpu(mpb->generation_num); @@ -5505,9 +6072,23 @@ static int write_super_imsm(struct supertype *st, int doclose) if (is_gen_migration(dev2)) clear_migration_record = 0; } - mpb_size += __le32_to_cpu(mpb->bbm_log_size); + + bbm_log_size = get_imsm_bbm_log_size(super->bbm_log); + + if (bbm_log_size) { + memcpy((void *)mpb + mpb_size, super->bbm_log, bbm_log_size); + mpb->attributes |= MPB_ATTRIB_BBM; + } else + mpb->attributes &= ~MPB_ATTRIB_BBM; + + super->anchor->bbm_log_size = __cpu_to_le32(bbm_log_size); + mpb_size += bbm_log_size; mpb->mpb_size = __cpu_to_le32(mpb_size); +#ifdef DEBUG + assert(super->len == 0 || mpb_size <= super->len); +#endif + /* recalculate checksum */ sum = __gen_imsm_checksum(mpb); mpb->check_sum = __cpu_to_le32(sum); @@ -5518,7 +6099,7 @@ static int write_super_imsm(struct supertype *st, int doclose) } if (clear_migration_record) memset(super->migr_rec_buf, 0, - MIGR_REC_BUF_SECTORS*sector_size); + MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE); if (sector_size == 4096) convert_to_4k(super); @@ -5534,7 +6115,8 @@ static int write_super_imsm(struct supertype *st, int doclose) get_dev_size(d->fd, NULL, &dsize); if (lseek64(d->fd, dsize - sector_size, SEEK_SET) >= 0) { - if (write(d->fd, super->migr_rec_buf, + if ((unsigned int)write(d->fd, + super->migr_rec_buf, MIGR_REC_BUF_SECTORS*sector_size) != MIGR_REC_BUF_SECTORS*sector_size) perror("Write migr_rec failed"); @@ -5607,10 +6189,256 @@ static int mgmt_disk(struct supertype *st) return 0; } +__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len); + +static int write_ppl_header(unsigned long long ppl_sector, int fd, void *buf) +{ + struct ppl_header *ppl_hdr = buf; + int ret; + + ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE)); + + if (lseek64(fd, ppl_sector * 512, SEEK_SET) < 0) { + ret = -errno; + perror("Failed to seek to PPL header location"); + return ret; + } + + if (write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) { + ret = -errno; + perror("Write PPL header failed"); + return ret; + } + + fsync(fd); + + return 0; +} + +static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd) +{ + struct intel_super *super = st->sb; + void *buf; + struct ppl_header *ppl_hdr; + int ret; + + /* first clear entire ppl space */ + ret = zero_disk_range(fd, info->ppl_sector, info->ppl_size); + if (ret) + return ret; + + ret = posix_memalign(&buf, MAX_SECTOR_SIZE, PPL_HEADER_SIZE); + if (ret) { + pr_err("Failed to allocate PPL header buffer\n"); + return -ret; + } + + memset(buf, 0, PPL_HEADER_SIZE); + ppl_hdr = buf; + memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED); + ppl_hdr->signature = __cpu_to_le32(super->anchor->orig_family_num); + + if (info->mismatch_cnt) { + /* + * We are overwriting an invalid ppl. Make one entry with wrong + * checksum to prevent the kernel from skipping resync. + */ + ppl_hdr->entries_count = __cpu_to_le32(1); + ppl_hdr->entries[0].checksum = ~0; + } + + ret = write_ppl_header(info->ppl_sector, fd, buf); + + free(buf); + return ret; +} + +static int is_rebuilding(struct imsm_dev *dev); + +static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info, + struct mdinfo *disk) +{ + struct intel_super *super = st->sb; + struct dl *d; + void *buf_orig, *buf, *buf_prev = NULL; + int ret = 0; + struct ppl_header *ppl_hdr = NULL; + __u32 crc; + struct imsm_dev *dev; + __u32 idx; + unsigned int i; + unsigned long long ppl_offset = 0; + unsigned long long prev_gen_num = 0; + + if (disk->disk.raid_disk < 0) + return 0; + + dev = get_imsm_dev(super, info->container_member); + idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_0); + d = get_imsm_dl_disk(super, idx); + + if (!d || d->index < 0 || is_failed(&d->disk)) + return 0; + + if (posix_memalign(&buf_orig, MAX_SECTOR_SIZE, PPL_HEADER_SIZE * 2)) { + pr_err("Failed to allocate PPL header buffer\n"); + return -1; + } + buf = buf_orig; + + ret = 1; + while (ppl_offset < MULTIPLE_PPL_AREA_SIZE_IMSM) { + void *tmp; + + dprintf("Checking potential PPL at offset: %llu\n", ppl_offset); + + if (lseek64(d->fd, info->ppl_sector * 512 + ppl_offset, + SEEK_SET) < 0) { + perror("Failed to seek to PPL header location"); + ret = -1; + break; + } + + if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) { + perror("Read PPL header failed"); + ret = -1; + break; + } + + ppl_hdr = buf; + + crc = __le32_to_cpu(ppl_hdr->checksum); + ppl_hdr->checksum = 0; + + if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) { + dprintf("Wrong PPL header checksum on %s\n", + d->devname); + break; + } + + if (prev_gen_num > __le64_to_cpu(ppl_hdr->generation)) { + /* previous was newest, it was already checked */ + break; + } + + if ((__le32_to_cpu(ppl_hdr->signature) != + super->anchor->orig_family_num)) { + dprintf("Wrong PPL header signature on %s\n", + d->devname); + ret = 1; + break; + } + + ret = 0; + prev_gen_num = __le64_to_cpu(ppl_hdr->generation); + + ppl_offset += PPL_HEADER_SIZE; + for (i = 0; i < __le32_to_cpu(ppl_hdr->entries_count); i++) + ppl_offset += + __le32_to_cpu(ppl_hdr->entries[i].pp_size); + + if (!buf_prev) + buf_prev = buf + PPL_HEADER_SIZE; + tmp = buf_prev; + buf_prev = buf; + buf = tmp; + } + + if (buf_prev) { + buf = buf_prev; + ppl_hdr = buf_prev; + } + + /* + * Update metadata to use mutliple PPLs area (1MB). + * This is done once for all RAID members + */ + if (info->consistency_policy == CONSISTENCY_POLICY_PPL && + info->ppl_size != (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9)) { + char subarray[20]; + struct mdinfo *member_dev; + + sprintf(subarray, "%d", info->container_member); + + if (mdmon_running(st->container_devnm)) + st->update_tail = &st->updates; + + if (st->ss->update_subarray(st, subarray, "ppl", NULL)) { + pr_err("Failed to update subarray %s\n", + subarray); + } else { + if (st->update_tail) + flush_metadata_updates(st); + else + st->ss->sync_metadata(st); + info->ppl_size = (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9); + for (member_dev = info->devs; member_dev; + member_dev = member_dev->next) + member_dev->ppl_size = + (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9); + } + } + + if (ret == 1) { + struct imsm_map *map = get_imsm_map(dev, MAP_X); + + if (map->map_state == IMSM_T_STATE_UNINITIALIZED || + (map->map_state == IMSM_T_STATE_NORMAL && + !(dev->vol.dirty & RAIDVOL_DIRTY)) || + (is_rebuilding(dev) && + dev->vol.curr_migr_unit == 0 && + get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_1) != idx)) + ret = st->ss->write_init_ppl(st, info, d->fd); + else + info->mismatch_cnt++; + } else if (ret == 0 && + ppl_hdr->entries_count == 0 && + is_rebuilding(dev) && + info->resync_start == 0) { + /* + * The header has no entries - add a single empty entry and + * rewrite the header to prevent the kernel from going into + * resync after an interrupted rebuild. + */ + ppl_hdr->entries_count = __cpu_to_le32(1); + ret = write_ppl_header(info->ppl_sector, d->fd, buf); + } + + free(buf_orig); + + return ret; +} + +static int write_init_ppl_imsm_all(struct supertype *st, struct mdinfo *info) +{ + struct intel_super *super = st->sb; + struct dl *d; + int ret = 0; + + if (info->consistency_policy != CONSISTENCY_POLICY_PPL || + info->array.level != 5) + return 0; + + for (d = super->disks; d ; d = d->next) { + if (d->index < 0 || is_failed(&d->disk)) + continue; + + ret = st->ss->write_init_ppl(st, info, d->fd); + if (ret) + break; + } + + return ret; +} + static int write_init_super_imsm(struct supertype *st) { struct intel_super *super = st->sb; int current_vol = super->current_vol; + int rv = 0; + struct mdinfo info; + + getinfo_super_imsm(st, &info, NULL); /* we are done with current_vol reset it to point st at the container */ super->current_vol = -1; @@ -5618,26 +6446,30 @@ static int write_init_super_imsm(struct supertype *st) if (st->update_tail) { /* queue the recently created array / added disk * as a metadata update */ - int rv; /* determine if we are creating a volume or adding a disk */ if (current_vol < 0) { /* in the mgmt (add/remove) disk case we are running * in mdmon context, so don't close fd's */ - return mgmt_disk(st); - } else - rv = create_array(st, current_vol); - - return rv; + rv = mgmt_disk(st); + } else { + rv = write_init_ppl_imsm_all(st, &info); + if (!rv) + rv = create_array(st, current_vol); + } } else { struct dl *d; for (d = super->disks; d; d = d->next) Kill(d->devname, NULL, 0, -1, 1); - return write_super_imsm(st, 1); + if (current_vol >= 0) + rv = write_init_ppl_imsm_all(st, &info); + if (!rv) + rv = write_super_imsm(st, 1); } + + return rv; } -#endif static int store_super_imsm(struct supertype *st, int fd) { @@ -5647,21 +6479,11 @@ static int store_super_imsm(struct supertype *st, int fd) if (!mpb) return 1; -#ifndef MDASSEMBLE if (super->sector_size == 4096) convert_to_4k(super); return store_imsm_mpb(fd, mpb); -#else - return 1; -#endif } -static int imsm_bbm_log_size(struct imsm_super *mpb) -{ - return __le32_to_cpu(mpb->bbm_log_size); -} - -#ifndef MDASSEMBLE static int validate_geometry_imsm_container(struct supertype *st, int level, int layout, int raiddisks, int chunk, unsigned long long size, @@ -5696,6 +6518,10 @@ static int validate_geometry_imsm_container(struct supertype *st, int level, * note that there is no fd for the disks in array. */ super = alloc_super(); + if (!super) { + close(fd); + return 0; + } if (!get_dev_sector_size(fd, NULL, &super->sector_size)) { close(fd); free_imsm(super); @@ -5879,7 +6705,7 @@ active_arrays_by_format(char *name, char* hba, struct md_list **devlist, for (memb = mdstat ; memb ; memb = memb->next) { if (memb->metadata_version && - (strncmp(memb->metadata_version, "external:", 9) == 0) && + (strncmp(memb->metadata_version, "external:", 9) == 0) && (strcmp(&memb->metadata_version[9], name) == 0) && !is_subarray(memb->metadata_version+9) && memb->members) { @@ -6027,7 +6853,7 @@ count_volumes_list(struct md_list *devlist, char *homehost, for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) { char *devname = tmpdev->devname; - struct stat stb; + dev_t rdev; struct supertype *tst; int dfd; if (tmpdev->used > 1) @@ -6043,14 +6869,7 @@ count_volumes_list(struct md_list *devlist, char *homehost, dprintf("cannot open device %s: %s\n", devname, strerror(errno)); tmpdev->used = 2; - } else if (fstat(dfd, &stb)< 0) { - /* Impossible! */ - dprintf("fstat failed for %s: %s\n", - devname, strerror(errno)); - tmpdev->used = 2; - } else if ((stb.st_mode & S_IFMT) != S_IFBLK) { - dprintf("%s is not a block device.\n", - devname); + } else if (!fstat_is_blkdev(dfd, devname, &rdev)) { tmpdev->used = 2; } else if (must_be_container(dfd)) { struct supertype *cst; @@ -6072,7 +6891,7 @@ count_volumes_list(struct md_list *devlist, char *homehost, if (cst) cst->ss->free_super(cst); } else { - tmpdev->st_rdev = stb.st_rdev; + tmpdev->st_rdev = rdev; if (tst->ss->load_super(tst,dfd, NULL)) { dprintf("no RAID superblock on %s\n", devname); @@ -6170,20 +6989,20 @@ count_volumes_list(struct md_list *devlist, char *homehost, return count; } -static int -count_volumes(struct intel_hba *hba, int dpa, int verbose) +static int __count_volumes(char *hba_path, int dpa, int verbose, + int cmp_hba_path) { struct sys_dev *idev, *intel_devices = find_intel_devices(); int count = 0; const struct orom_entry *entry; struct devid_list *dv, *devid_list; - if (!hba || !hba->path) + if (!hba_path) return 0; for (idev = intel_devices; idev; idev = idev->next) { - if (strstr(idev->path, hba->path)) - break; + if (strstr(idev->path, hba_path)) + break; } if (!idev || !idev->dev_id) @@ -6197,22 +7016,28 @@ count_volumes(struct intel_hba *hba, int dpa, int verbose) devid_list = entry->devid_list; for (dv = devid_list; dv; dv = dv->next) { struct md_list *devlist; - struct sys_dev *device = device_by_id(dv->devid); - char *hba_path; + struct sys_dev *device = NULL; + char *hpath; int found = 0; + if (cmp_hba_path) + device = device_by_id_and_path(dv->devid, hba_path); + else + device = device_by_id(dv->devid); + if (device) - hba_path = device->path; + hpath = device->path; else return 0; - devlist = get_devices(hba_path); + devlist = get_devices(hpath); /* if no intel devices return zero volumes */ if (devlist == NULL) return 0; - count += active_arrays_by_format("imsm", hba_path, &devlist, dpa, verbose); - dprintf("path: %s active arrays: %d\n", hba_path, count); + count += active_arrays_by_format("imsm", hpath, &devlist, dpa, + verbose); + dprintf("path: %s active arrays: %d\n", hpath, count); if (devlist == NULL) return 0; do { @@ -6224,7 +7049,7 @@ count_volumes(struct intel_hba *hba, int dpa, int verbose) dprintf("found %d count: %d\n", found, count); } while (found); - dprintf("path: %s total number of volumes: %d\n", hba_path, count); + dprintf("path: %s total number of volumes: %d\n", hpath, count); while (devlist) { struct md_list *dv = devlist; @@ -6236,6 +7061,24 @@ count_volumes(struct intel_hba *hba, int dpa, int verbose) return count; } +static int count_volumes(struct intel_hba *hba, int dpa, int verbose) +{ + if (!hba) + return 0; + if (hba->type == SYS_DEV_VMD) { + struct sys_dev *dev; + int count = 0; + + for (dev = find_intel_devices(); dev; dev = dev->next) { + if (dev->type == SYS_DEV_VMD) + count += __count_volumes(dev->path, dpa, + verbose, 1); + } + return count; + } + return __count_volumes(hba->path, dpa, verbose, 0); +} + static int imsm_default_chunk(const struct imsm_orom *orom) { /* up to 512 if the plaform supports it, otherwise the platform max. @@ -6303,7 +7146,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, unsigned long long *freesize, int verbose) { - struct stat stb; + dev_t rdev; struct intel_super *super = st->sb; struct imsm_super *mpb; struct dl *dl; @@ -6319,7 +7162,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, mpb = super->anchor; if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, size, verbose)) { - pr_err("RAID gemetry validation failed. Cannot proceed with the action(s).\n"); + pr_err("RAID geometry validation failed. Cannot proceed with the action(s).\n"); return 0; } if (!dev) { @@ -6337,7 +7180,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, pos = 0; i = 0; - e = get_extents(super, dl); + e = get_extents(super, dl, 0); if (!e) continue; do { unsigned long long esize; @@ -6368,13 +7211,11 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, } /* This device must be a member of the set */ - if (stat(dev, &stb) < 0) - return 0; - if ((S_IFMT & stb.st_mode) != S_IFBLK) + if (!stat_is_blkdev(dev, &rdev)) return 0; for (dl = super->disks ; dl ; dl = dl->next) { - if (dl->major == (int)major(stb.st_rdev) && - dl->minor == (int)minor(stb.st_rdev)) + if (dl->major == (int)major(rdev) && + dl->minor == (int)minor(rdev)) break; } if (!dl) { @@ -6397,7 +7238,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, } /* retrieve the largest free space block */ - e = get_extents(super, dl); + e = get_extents(super, dl, 0); maxsize = 0; i = 0; if (e) { @@ -6433,11 +7274,8 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, maxsize = merge_extents(super, i); - if (!check_env("IMSM_NO_PLATFORM") && - mpb->num_raid_devs > 0 && size && size != maxsize) { - pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n"); - return 0; - } + if (mpb->num_raid_devs > 0 && size && size != maxsize) + pr_err("attempting to create a second volume with size less then remaining space.\n"); if (maxsize < size || maxsize == 0) { if (verbose) { @@ -6495,7 +7333,7 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks, if (super->orom && dl->index < 0 && mpb->num_raid_devs) continue; - e = get_extents(super, dl); + e = get_extents(super, dl, 0); if (!e) continue; for (i = 1; e[i-1].size; i++) @@ -6528,11 +7366,8 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks, } maxsize = size; } - if (!check_env("IMSM_NO_PLATFORM") && - mpb->num_raid_devs > 0 && size && size != maxsize) { - pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n"); - return 0; - } + if (mpb->num_raid_devs > 0 && size && size != maxsize) + pr_err("attempting to create a second volume with size less then remaining space.\n"); cnt = 0; for (dl = super->disks; dl; dl = dl->next) if (dl->e) @@ -6570,7 +7405,7 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, int raiddisks, int *chunk, unsigned long long size, unsigned long long data_offset, char *dev, unsigned long long *freesize, - int verbose) + int consistency_policy, int verbose) { int fd, cfd; struct mdinfo *sra; @@ -6590,6 +7425,15 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, verbose); } + if (size && (size < 1024)) { + pr_err("Given size must be greater than 1M.\n"); + /* Depends on algorithm in Create.c : + * if container was given (dev == NULL) return -1, + * if block device was given ( dev != NULL) return 0. + */ + return dev ? -1 : 0; + } + if (!dev) { if (st->sb) { struct intel_super *super = st->sb; @@ -6701,18 +7545,17 @@ static void default_geometry_imsm(struct supertype *st, int *level, int *layout, static void handle_missing(struct intel_super *super, struct imsm_dev *dev); -static int kill_subarray_imsm(struct supertype *st) +static int kill_subarray_imsm(struct supertype *st, char *subarray_id) { - /* remove the subarray currently referenced by ->current_vol */ + /* remove the subarray currently referenced by subarray_id */ __u8 i; struct intel_dev **dp; struct intel_super *super = st->sb; - __u8 current_vol = super->current_vol; + __u8 current_vol = strtoul(subarray_id, NULL, 10); struct imsm_super *mpb = super->anchor; - if (super->current_vol < 0) + if (mpb->num_raid_devs == 0) return 2; - super->current_vol = -1; /* invalidate subarray cursor */ /* block deletions that would change the uuid of active subarrays * @@ -6799,26 +7642,56 @@ static int update_subarray_imsm(struct supertype *st, char *subarray, u->type = update_rename_array; u->dev_idx = vol; - snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name); + strncpy((char *) u->name, name, MAX_RAID_SERIAL_LEN); + u->name[MAX_RAID_SERIAL_LEN-1] = '\0'; append_metadata_update(st, u, sizeof(*u)); } else { struct imsm_dev *dev; - int i; + int i, namelen; dev = get_imsm_dev(super, vol); - snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name); + memset(dev->volume, '\0', MAX_RAID_SERIAL_LEN); + namelen = min((int)strlen(name), MAX_RAID_SERIAL_LEN); + memcpy(dev->volume, name, namelen); for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); handle_missing(super, dev); } super->updates_pending++; } + } else if (strcmp(update, "ppl") == 0 || + strcmp(update, "no-ppl") == 0) { + int new_policy; + char *ep; + int vol = strtoul(subarray, &ep, 10); + + if (*ep != '\0' || vol >= super->anchor->num_raid_devs) + return 2; + + if (strcmp(update, "ppl") == 0) + new_policy = RWH_MULTIPLE_DISTRIBUTED; + else + new_policy = RWH_MULTIPLE_OFF; + + if (st->update_tail) { + struct imsm_update_rwh_policy *u = xmalloc(sizeof(*u)); + + u->type = update_rwh_policy; + u->dev_idx = vol; + u->new_policy = new_policy; + append_metadata_update(st, u, sizeof(*u)); + } else { + struct imsm_dev *dev; + + dev = get_imsm_dev(super, vol); + dev->rwh_policy = new_policy; + super->updates_pending++; + } } else return 2; return 0; } -#endif /* MDASSEMBLE */ static int is_gen_migration(struct imsm_dev *dev) { @@ -6852,7 +7725,6 @@ static int is_rebuilding(struct imsm_dev *dev) return 0; } -#ifndef MDASSEMBLE static int is_initializing(struct imsm_dev *dev) { struct imsm_map *migr_map; @@ -6870,7 +7742,6 @@ static int is_initializing(struct imsm_dev *dev) return 0; } -#endif static void update_recovery_start(struct intel_super *super, struct imsm_dev *dev, @@ -6904,9 +7775,7 @@ static void update_recovery_start(struct intel_super *super, rebuild->recovery_start = units * blocks_per_migr_unit(super, dev); } -#ifndef MDASSEMBLE static int recover_backup_imsm(struct supertype *st, struct mdinfo *info); -#endif static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray) { @@ -6926,6 +7795,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra int sb_errors = 0; struct dl *d; int spare_disks = 0; + int current_vol = super->current_vol; /* do not assemble arrays when not all attributes are supported */ if (imsm_check_attributes(mpb->attributes) == 0) { @@ -6933,12 +7803,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra pr_err("Unsupported attributes in IMSM metadata.Arrays activation is blocked.\n"); } - /* check for bad blocks */ - if (imsm_bbm_log_size(super->anchor)) { - pr_err("BBM log found in IMSM metadata.Arrays activation is blocked.\n"); - sb_errors = 1; - } - /* count spare devices, not used in maps */ for (d = super->disks; d; d = d->next) @@ -6951,10 +7815,9 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra struct imsm_map *map2; struct mdinfo *this; int slot; -#ifndef MDASSEMBLE int chunk; -#endif char *ep; + int level; if (subarray && (i != strtoul(subarray, &ep, 10) || *ep != '\0')) @@ -6963,6 +7826,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra dev = get_imsm_dev(super, i); map = get_imsm_map(dev, MAP_0); map2 = get_imsm_map(dev, MAP_1); + level = get_imsm_raid_level(map); /* do not publish arrays that are in the middle of an * unsupported migration @@ -6982,14 +7846,13 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra super->current_vol = i; getinfo_super_imsm_volume(st, this, NULL); this->next = rest; -#ifndef MDASSEMBLE chunk = __le16_to_cpu(map->blocks_per_strip) >> 1; /* mdadm does not support all metadata features- set the bit in all arrays state */ if (!validate_geometry_imsm_orom(super, - get_imsm_raid_level(map), /* RAID level */ - imsm_level_to_layout(get_imsm_raid_level(map)), + level, /* RAID level */ + imsm_level_to_layout(level), map->num_members, /* raid disks */ - &chunk, join_u32(dev->size_low, dev->size_high), + &chunk, imsm_dev_size(dev), 1 /* verbose */)) { pr_err("IMSM RAID geometry validation failed. Array %s activation is blocked.\n", dev->volume); @@ -6997,7 +7860,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra (1<disk)) skip = 1; - if (ord & IMSM_ORD_REBUILD) + if (!skip && (ord & IMSM_ORD_REBUILD)) recovery_start = 0; - + if (!(ord & IMSM_ORD_REBUILD)) + this->array.working_disks++; /* * if we skip some disks the array will be assmebled degraded; * reset resync start to avoid a dirty-degraded * situation when performing the intial sync - * - * FIXME handle dirty degraded */ - if ((skip || recovery_start == 0) && !dev->vol.dirty) - this->resync_start = MaxSector; + if (skip) + missing++; + + if (!(dev->vol.dirty & RAIDVOL_DIRTY)) { + if ((!able_to_resync(level, missing) || + recovery_start == 0)) + this->resync_start = MaxSector; + } else { + /* + * FIXME handle dirty degraded + */ + } + if (skip) continue; @@ -7060,25 +7933,36 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra else this->array.spare_disks++; } - if (info_d->recovery_start == MaxSector) - this->array.working_disks++; info_d->events = __le32_to_cpu(mpb->generation_num); info_d->data_offset = pba_of_lba0(map); - info_d->component_size = blocks_per_member(map); + info_d->component_size = calc_component_size(map, dev); + + if (map->raid_level == 5) { + info_d->ppl_sector = this->ppl_sector; + info_d->ppl_size = this->ppl_size; + if (this->consistency_policy == CONSISTENCY_POLICY_PPL && + recovery_start == 0) + this->resync_start = 0; + } + + info_d->bb.supported = 1; + get_volume_badblocks(super->bbm_log, ord_to_idx(ord), + info_d->data_offset, + info_d->component_size, + &info_d->bb); } /* now that the disk list is up-to-date fixup recovery_start */ update_recovery_start(super, dev, this); this->array.spare_disks += spare_disks; -#ifndef MDASSEMBLE /* check for reshape */ if (this->reshape_active == 1) recover_backup_imsm(st, this); -#endif rest = this; } + super->current_vol = current_vol; return rest; } @@ -7205,12 +8089,12 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev, return failed; } -#ifndef MDASSEMBLE static int imsm_open_new(struct supertype *c, struct active_array *a, char *inst) { struct intel_super *super = c->sb; struct imsm_super *mpb = super->anchor; + struct imsm_update_prealloc_bb_mem u; if (atoi(inst) >= mpb->num_raid_devs) { pr_err("subarry index %d, out of range\n", atoi(inst)); @@ -7219,6 +8103,10 @@ static int imsm_open_new(struct supertype *c, struct active_array *a, dprintf("imsm: open_new %s\n", inst); a->info.container_member = atoi(inst); + + u.type = update_prealloc_badblocks_mem; + imsm_update_metadata_locally(c, &u, sizeof(u)); + return 0; } @@ -7246,7 +8134,8 @@ static int is_resyncing(struct imsm_dev *dev) } /* return true if we recorded new information */ -static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) +static int mark_failure(struct intel_super *super, + struct imsm_dev *dev, struct imsm_disk *disk, int idx) { __u32 ord; int slot; @@ -7270,7 +8159,7 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) strcat(buf, ":0"); if ((len = strlen(buf)) >= MAX_RAID_SERIAL_LEN) shift = len - MAX_RAID_SERIAL_LEN + 1; - strncpy((char *)disk->serial, &buf[shift], MAX_RAID_SERIAL_LEN); + memcpy(disk->serial, &buf[shift], len + 1 - shift); disk->status |= FAILED_DISK; set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD); @@ -7286,14 +8175,19 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) set_imsm_ord_tbl_ent(map2, slot2, idx | IMSM_ORD_REBUILD); } - if (map->failed_disk_num == 0xff) + if (map->failed_disk_num == 0xff || + (!is_rebuilding(dev) && map->failed_disk_num > slot)) map->failed_disk_num = slot; + + clear_disk_badblocks(super->bbm_log, ord_to_idx(ord)); + return 1; } -static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx) +static void mark_missing(struct intel_super *super, + struct imsm_dev *dev, struct imsm_disk *disk, int idx) { - mark_failure(dev, disk, idx); + mark_failure(super, dev, disk, idx); if (disk->scsi_id == __cpu_to_le32(~(__u32)0)) return; @@ -7319,57 +8213,71 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev) /* end process for initialization and rebuild only */ if (is_gen_migration(dev) == 0) { - __u8 map_state; - int failed; + int failed = imsm_count_failed(super, dev, MAP_0); - failed = imsm_count_failed(super, dev, MAP_0); - map_state = imsm_check_degraded(super, dev, failed, MAP_0); + if (failed) { + __u8 map_state; + struct imsm_map *map = get_imsm_map(dev, MAP_0); + struct imsm_map *map1; + int i, ord, ord_map1; + int rebuilt = 1; - if (failed) - end_migration(dev, super, map_state); + for (i = 0; i < map->num_members; i++) { + ord = get_imsm_ord_tbl_ent(dev, i, MAP_0); + if (!(ord & IMSM_ORD_REBUILD)) + continue; + + map1 = get_imsm_map(dev, MAP_1); + if (!map1) + continue; + + ord_map1 = __le32_to_cpu(map1->disk_ord_tbl[i]); + if (ord_map1 & IMSM_ORD_REBUILD) + rebuilt = 0; + } + + if (rebuilt) { + map_state = imsm_check_degraded(super, dev, + failed, MAP_0); + end_migration(dev, super, map_state); + } + } } for (dl = super->missing; dl; dl = dl->next) - mark_missing(dev, &dl->disk, dl->index); + mark_missing(super, dev, &dl->disk, dl->index); super->updates_pending++; } static unsigned long long imsm_set_array_size(struct imsm_dev *dev, long long new_size) { - int used_disks = imsm_num_data_members(dev, MAP_0); unsigned long long array_blocks; - struct imsm_map *map; + struct imsm_map *map = get_imsm_map(dev, MAP_0); + int used_disks = imsm_num_data_members(map); if (used_disks == 0) { /* when problems occures * return current array_blocks value */ - array_blocks = __le32_to_cpu(dev->size_high); - array_blocks = array_blocks << 32; - array_blocks += __le32_to_cpu(dev->size_low); + array_blocks = imsm_dev_size(dev); return array_blocks; } /* set array size in metadata */ - if (new_size <= 0) { + if (new_size <= 0) /* OLCE size change is caused by added disks */ - map = get_imsm_map(dev, MAP_0); - array_blocks = blocks_per_member(map) * used_disks; - } else { + array_blocks = per_dev_array_size(map) * used_disks; + else /* Online Volume Size Change * Using available free space */ array_blocks = new_size; - } - /* round array size down to closest MB - */ - array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT; - dev->size_low = __cpu_to_le32((__u32)array_blocks); - dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32)); + array_blocks = round_size_to_mb(array_blocks, used_disks); + set_imsm_dev_size(dev, array_blocks); return array_blocks; } @@ -7475,16 +8383,14 @@ static int imsm_set_array_state(struct active_array *a, int consistent) int used_disks; struct mdinfo *mdi; - used_disks = imsm_num_data_members(dev, MAP_0); + used_disks = imsm_num_data_members(map); if (used_disks > 0) { array_blocks = - blocks_per_member(map) * + per_dev_array_size(map) * used_disks; - /* round array size down to closest MB - */ - array_blocks = (array_blocks - >> SECT_PER_MB_SHIFT) - << SECT_PER_MB_SHIFT; + array_blocks = + round_size_to_mb(array_blocks, + used_disks); a->info.custom_array_size = array_blocks; /* encourage manager to update array * size @@ -7565,18 +8471,42 @@ mark_checkpoint: skip_mark_checkpoint: /* mark dirty / clean */ - if (dev->vol.dirty != !consistent) { + if (((dev->vol.dirty & RAIDVOL_DIRTY) && consistent) || + (!(dev->vol.dirty & RAIDVOL_DIRTY) && !consistent)) { dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty"); - if (consistent) - dev->vol.dirty = 0; - else - dev->vol.dirty = 1; + if (consistent) { + dev->vol.dirty = RAIDVOL_CLEAN; + } else { + dev->vol.dirty = RAIDVOL_DIRTY; + if (dev->rwh_policy == RWH_DISTRIBUTED || + dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED) + dev->vol.dirty |= RAIDVOL_DSRECORD_VALID; + } super->updates_pending++; } return consistent; } +static int imsm_disk_slot_to_ord(struct active_array *a, int slot) +{ + int inst = a->info.container_member; + struct intel_super *super = a->container->sb; + struct imsm_dev *dev = get_imsm_dev(super, inst); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + + if (slot > map->num_members) { + pr_err("imsm: imsm_disk_slot_to_ord %d out of range 0..%d\n", + slot, map->num_members - 1); + return -1; + } + + if (slot < 0) + return -1; + + return get_imsm_ord_tbl_ent(dev, slot, MAP_0); +} + static void imsm_set_disk(struct active_array *a, int n, int state) { int inst = a->info.container_member; @@ -7587,24 +8517,21 @@ static void imsm_set_disk(struct active_array *a, int n, int state) struct mdinfo *mdi; int recovery_not_finished = 0; int failed; - __u32 ord; + int ord; __u8 map_state; + int rebuild_done = 0; + int i; - if (n > map->num_members) - pr_err("imsm: set_disk %d out of range 0..%d\n", - n, map->num_members - 1); - - if (n < 0) + ord = get_imsm_ord_tbl_ent(dev, n, MAP_X); + if (ord < 0) return; dprintf("imsm: set_disk %d:%x\n", n, state); - - ord = get_imsm_ord_tbl_ent(dev, n, MAP_0); disk = get_imsm_disk(super, ord_to_idx(ord)); /* check for new failures */ - if (state & DS_FAULTY) { - if (mark_failure(dev, disk, ord_to_idx(ord))) + if (disk && (state & DS_FAULTY)) { + if (mark_failure(super, dev, disk, ord_to_idx(ord))) super->updates_pending++; } @@ -7613,6 +8540,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state) struct imsm_map *migr_map = get_imsm_map(dev, MAP_1); set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord)); + rebuild_done = 1; super->updates_pending++; } @@ -7670,12 +8598,52 @@ static void imsm_set_disk(struct active_array *a, int n, int state) break; } if (is_rebuilding(dev)) { - dprintf_cont("while rebuilding."); - if (map->map_state != map_state) { - dprintf_cont(" Map state change"); - end_migration(dev, super, map_state); + dprintf_cont("while rebuilding "); + if (state & DS_FAULTY) { + dprintf_cont("removing failed drive "); + if (n == map->failed_disk_num) { + dprintf_cont("end migration"); + end_migration(dev, super, map_state); + a->last_checkpoint = 0; + } else { + dprintf_cont("fail detected during rebuild, changing map state"); + map->map_state = map_state; + } super->updates_pending++; } + + if (!rebuild_done) + break; + + /* check if recovery is really finished */ + for (mdi = a->info.devs; mdi ; mdi = mdi->next) + if (mdi->recovery_start != MaxSector) { + recovery_not_finished = 1; + break; + } + if (recovery_not_finished) { + dprintf_cont("\n"); + dprintf_cont("Rebuild has not finished yet"); + if (a->last_checkpoint < mdi->recovery_start) { + a->last_checkpoint = + mdi->recovery_start; + super->updates_pending++; + } + break; + } + + dprintf_cont(" Rebuild done, still degraded"); + end_migration(dev, super, map_state); + a->last_checkpoint = 0; + super->updates_pending++; + + for (i = 0; i < map->num_members; i++) { + int idx = get_imsm_ord_tbl_ent(dev, i, MAP_0); + + if (idx & IMSM_ORD_REBUILD) + map->failed_disk_num = i; + } + super->updates_pending++; break; } if (is_gen_migration(dev)) { @@ -7747,7 +8715,7 @@ static int store_imsm_mpb(int fd, struct imsm_super *mpb) if (lseek64(fd, dsize - (sector_size * 2), SEEK_SET) < 0) return 1; - if (write(fd, buf, sector_size) != sector_size) + if ((unsigned int)write(fd, buf, sector_size) != sector_size) return 1; return 0; @@ -7842,10 +8810,13 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, if (dl->index == -1 && !activate_new) continue; + if (!drive_validate_sector_size(super, dl)) + continue; + /* Does this unused device have the requisite free space? * It needs to be able to cover all member volumes */ - ex = get_extents(super, dl); + ex = get_extents(super, dl, 1); if (!ex) { dprintf("cannot get extents\n"); continue; @@ -7865,11 +8836,11 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, pos = 0; array_start = pba_of_lba0(map); array_end = array_start + - blocks_per_member(map) - 1; + per_dev_array_size(map) - 1; do { /* check that we can start at pba_of_lba0 with - * blocks_per_member of space + * num_data_stripes*blocks_per_stripe of space */ if (array_start >= pos && array_end < ex[j].start) { found = 1; @@ -8067,6 +9038,11 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, di->data_offset = pba_of_lba0(map); di->component_size = a->info.component_size; di->container_member = inst; + di->bb.supported = 1; + if (a->info.consistency_policy == CONSISTENCY_POLICY_PPL) { + di->ppl_sector = get_ppl_sector(super, inst); + di->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9; + } super->random = random32(); di->next = rv; rv = di; @@ -8194,6 +9170,9 @@ static int add_remove_disk_update(struct intel_super *super) remove_disk_super(super, disk_cfg->major, disk_cfg->minor); + } else { + disk_cfg->fd = disk->fd; + disk->fd = -1; } } /* release allocate disk structure */ @@ -8271,9 +9250,30 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration * /* update chunk size */ - if (u->new_chunksize > 0) + if (u->new_chunksize > 0) { + unsigned long long num_data_stripes; + struct imsm_map *dest_map = + get_imsm_map(dev, MAP_0); + int used_disks = + imsm_num_data_members(dest_map); + + if (used_disks == 0) + return ret_val; + map->blocks_per_strip = __cpu_to_le16(u->new_chunksize * 2); + num_data_stripes = + imsm_dev_size(dev) / used_disks; + num_data_stripes /= map->blocks_per_strip; + num_data_stripes /= map->num_domains; + set_num_data_stripes(map, num_data_stripes); + } + + /* ensure blocks_per_member has valid value + */ + set_blocks_per_member(map, + per_dev_array_size(map) + + NUM_BLOCKS_DIRTY_STRIPE_REGION); /* add disk */ @@ -8338,15 +9338,27 @@ static int apply_size_change_update(struct imsm_update_size_change *u, if (id->index == (unsigned)u->subdev) { struct imsm_dev *dev = get_imsm_dev(super, u->subdev); struct imsm_map *map = get_imsm_map(dev, MAP_0); - int used_disks = imsm_num_data_members(dev, MAP_0); + int used_disks = imsm_num_data_members(map); unsigned long long blocks_per_member; + unsigned long long num_data_stripes; + unsigned long long new_size_per_disk; + + if (used_disks == 0) + return 0; /* calculate new size */ - blocks_per_member = u->new_size / used_disks; - dprintf("(size: %llu, blocks per member: %llu)\n", - u->new_size, blocks_per_member); + new_size_per_disk = u->new_size / used_disks; + blocks_per_member = new_size_per_disk + + NUM_BLOCKS_DIRTY_STRIPE_REGION; + num_data_stripes = new_size_per_disk / + map->blocks_per_strip; + num_data_stripes /= map->num_domains; + dprintf("(size: %llu, blocks per member: %llu, num_data_stipes: %llu)\n", + u->new_size, new_size_per_disk, + num_data_stripes); set_blocks_per_member(map, blocks_per_member); + set_num_data_stripes(map, num_data_stripes); imsm_set_array_size(dev, u->new_size); ret_val = 1; @@ -8597,6 +9609,8 @@ static int apply_takeover_update(struct imsm_update_takeover *u, map = get_imsm_map(dev, MAP_0); if (u->direction == R10_TO_R0) { + unsigned long long num_data_stripes; + /* Number of failed disks must be half of initial disk number */ if (imsm_count_failed(super, dev, MAP_0) != (map->num_members / 2)) @@ -8622,10 +9636,15 @@ static int apply_takeover_update(struct imsm_update_takeover *u, map->num_domains = 1; map->raid_level = 0; map->failed_disk_num = -1; + num_data_stripes = imsm_dev_size(dev) / 2; + num_data_stripes /= map->blocks_per_strip; + set_num_data_stripes(map, num_data_stripes); } if (u->direction == R0_TO_R10) { void **space; + unsigned long long num_data_stripes; + /* update slots in current disk list */ for (dm = super->disks; dm; dm = dm->next) { if (dm->index >= 0) @@ -8663,6 +9682,11 @@ static int apply_takeover_update(struct imsm_update_takeover *u, map->map_state = IMSM_T_STATE_DEGRADED; map->num_domains = 2; map->raid_level = 1; + num_data_stripes = imsm_dev_size(dev) / 2; + num_data_stripes /= map->blocks_per_strip; + num_data_stripes /= map->num_domains; + set_num_data_stripes(map, num_data_stripes); + /* replace dev<->dev_new */ dv->dev = dev_new; } @@ -8673,7 +9697,7 @@ static int apply_takeover_update(struct imsm_update_takeover *u, for (du = super->missing; du; du = du->next) if (du->index >= 0) { set_imsm_ord_tbl_ent(map, du->index, du->index); - mark_missing(dv->dev, &du->disk, du->index); + mark_missing(super, dv->dev, &du->disk, du->index); } return 1; @@ -8808,7 +9832,7 @@ static void imsm_process_update(struct supertype *st, new_map = get_imsm_map(&u->dev, MAP_0); new_start = pba_of_lba0(new_map); - new_end = new_start + blocks_per_member(new_map); + new_end = new_start + per_dev_array_size(new_map); inf = get_disk_info(u); /* handle activate_spare versus create race: @@ -8819,7 +9843,7 @@ static void imsm_process_update(struct supertype *st, dev = get_imsm_dev(super, i); map = get_imsm_map(dev, MAP_0); start = pba_of_lba0(map); - end = start + blocks_per_member(map); + end = start + per_dev_array_size(map); if ((new_start >= start && new_start <= end) || (start >= new_start && start <= new_end)) /* overlap */; @@ -8935,6 +9959,7 @@ static void imsm_process_update(struct supertype *st, /* sanity check that we are not affecting the uuid of * an active array */ + memset(name, 0, sizeof(name)); snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name); name[MAX_RAID_SERIAL_LEN] = '\0'; for (a = st->arrays; a; a = a->next) @@ -8946,7 +9971,7 @@ static void imsm_process_update(struct supertype *st, break; } - snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name); + memcpy(dev->volume, name, MAX_RAID_SERIAL_LEN); super->updates_pending++; break; } @@ -8964,8 +9989,25 @@ static void imsm_process_update(struct supertype *st, } break; } + case update_prealloc_badblocks_mem: + break; + case update_rwh_policy: { + struct imsm_update_rwh_policy *u = (void *)update->buf; + int target = u->dev_idx; + struct imsm_dev *dev = get_imsm_dev(super, target); + if (!dev) { + dprintf("could not find subarray-%d\n", target); + break; + } + + if (dev->rwh_policy != u->new_policy) { + dev->rwh_policy = u->new_policy; + super->updates_pending++; + } + break; + } default: - pr_err("error: unsuported process update type:(type: %d)\n", type); + pr_err("error: unsupported process update type:(type: %d)\n", type); } } @@ -9205,6 +10247,15 @@ static int imsm_prepare_update(struct supertype *st, case update_add_remove_disk: /* no update->len needed */ break; + case update_prealloc_badblocks_mem: + super->extra_space += sizeof(struct bbm_log) - + get_imsm_bbm_log_size(super->bbm_log); + break; + case update_rwh_policy: { + if (update->len < (int)sizeof(struct imsm_update_rwh_policy)) + return 0; + break; + } default: return 0; } @@ -9215,13 +10266,13 @@ static int imsm_prepare_update(struct supertype *st, else buf_len = super->len; - if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) { + if (__le32_to_cpu(mpb->mpb_size) + super->extra_space + len > buf_len) { /* ok we need a larger buf than what is currently allocated * if this allocation fails process_update will notice that * ->next_len is set and ->next_buf is NULL */ - buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, - sector_size); + buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + + super->extra_space + len, sector_size); if (super->next_buf) free(super->next_buf); @@ -9241,8 +10292,9 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind struct dl *iter; struct imsm_dev *dev; struct imsm_map *map; - int i, j, num_members; - __u32 ord; + unsigned int i, j, num_members; + __u32 ord, ord_map0; + struct bbm_log *log = super->bbm_log; dprintf("deleting device[%d] from imsm_super\n", index); @@ -9263,18 +10315,27 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind * ord-flags to the first map */ ord = get_imsm_ord_tbl_ent(dev, j, MAP_X); + ord_map0 = get_imsm_ord_tbl_ent(dev, j, MAP_0); if (ord_to_idx(ord) <= index) continue; map = get_imsm_map(dev, MAP_0); - set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1)); + set_imsm_ord_tbl_ent(map, j, ord_map0 - 1); map = get_imsm_map(dev, MAP_1); if (map) set_imsm_ord_tbl_ent(map, j, ord - 1); } } + for (i = 0; i < log->entry_count; i++) { + struct bbm_log_entry *entry = &log->marked_block_entries[i]; + + if (entry->disk_ordinal <= index) + continue; + entry->disk_ordinal--; + } + mpb->num_disks--; super->updates_pending++; if (*dlp) { @@ -9284,7 +10345,6 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind __free_imsm_disk(dl); } } -#endif /* MDASSEMBLE */ static void close_targets(int *targets, int new_disks) { @@ -9487,7 +10547,151 @@ int validate_container_imsm(struct mdinfo *info) return 0; } -#ifndef MDASSEMBLE + +/******************************************************************************* +* Function: imsm_record_badblock +* Description: This routine stores new bad block record in BBM log +* +* Parameters: +* a : array containing a bad block +* slot : disk number containing a bad block +* sector : bad block sector +* length : bad block sectors range +* Returns: +* 1 : Success +* 0 : Error +******************************************************************************/ +static int imsm_record_badblock(struct active_array *a, int slot, + unsigned long long sector, int length) +{ + struct intel_super *super = a->container->sb; + int ord; + int ret; + + ord = imsm_disk_slot_to_ord(a, slot); + if (ord < 0) + return 0; + + ret = record_new_badblock(super->bbm_log, ord_to_idx(ord), sector, + length); + if (ret) + super->updates_pending++; + + return ret; +} +/******************************************************************************* +* Function: imsm_clear_badblock +* Description: This routine clears bad block record from BBM log +* +* Parameters: +* a : array containing a bad block +* slot : disk number containing a bad block +* sector : bad block sector +* length : bad block sectors range +* Returns: +* 1 : Success +* 0 : Error +******************************************************************************/ +static int imsm_clear_badblock(struct active_array *a, int slot, + unsigned long long sector, int length) +{ + struct intel_super *super = a->container->sb; + int ord; + int ret; + + ord = imsm_disk_slot_to_ord(a, slot); + if (ord < 0) + return 0; + + ret = clear_badblock(super->bbm_log, ord_to_idx(ord), sector, length); + if (ret) + super->updates_pending++; + + return ret; +} +/******************************************************************************* +* Function: imsm_get_badblocks +* Description: This routine get list of bad blocks for an array +* +* Parameters: +* a : array +* slot : disk number +* Returns: +* bb : structure containing bad blocks +* NULL : error +******************************************************************************/ +static struct md_bb *imsm_get_badblocks(struct active_array *a, int slot) +{ + int inst = a->info.container_member; + struct intel_super *super = a->container->sb; + struct imsm_dev *dev = get_imsm_dev(super, inst); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + int ord; + + ord = imsm_disk_slot_to_ord(a, slot); + if (ord < 0) + return NULL; + + get_volume_badblocks(super->bbm_log, ord_to_idx(ord), pba_of_lba0(map), + per_dev_array_size(map), &super->bb); + + return &super->bb; +} +/******************************************************************************* +* Function: examine_badblocks_imsm +* Description: Prints list of bad blocks on a disk to the standard output +* +* Parameters: +* st : metadata handler +* fd : open file descriptor for device +* devname : device name +* Returns: +* 0 : Success +* 1 : Error +******************************************************************************/ +static int examine_badblocks_imsm(struct supertype *st, int fd, char *devname) +{ + struct intel_super *super = st->sb; + struct bbm_log *log = super->bbm_log; + struct dl *d = NULL; + int any = 0; + + for (d = super->disks; d ; d = d->next) { + if (strcmp(d->devname, devname) == 0) + break; + } + + if ((d == NULL) || (d->index < 0)) { /* serial mismatch probably */ + pr_err("%s doesn't appear to be part of a raid array\n", + devname); + return 1; + } + + if (log != NULL) { + unsigned int i; + struct bbm_log_entry *entry = &log->marked_block_entries[0]; + + for (i = 0; i < log->entry_count; i++) { + if (entry[i].disk_ordinal == d->index) { + unsigned long long sector = __le48_to_cpu( + &entry[i].defective_block_start); + int cnt = entry[i].marked_count + 1; + + if (!any) { + printf("Bad-blocks on %s:\n", devname); + any = 1; + } + + printf("%20llu for %d sectors\n", sector, cnt); + } + } + } + + if (!any) + printf("No bad-blocks list configured on %s\n", devname); + + return 0; +} /******************************************************************************* * Function: init_migr_record_imsm * Description: Function inits imsm migration record @@ -9524,7 +10728,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, max(map_dest->blocks_per_strip, map_src->blocks_per_strip); migr_rec->dest_depth_per_unit *= max(map_dest->blocks_per_strip, map_src->blocks_per_strip); - new_data_disks = imsm_num_data_members(dev, MAP_0); + new_data_disks = imsm_num_data_members(map_dest); migr_rec->blocks_per_unit = __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks); migr_rec->dest_depth_per_unit = @@ -9535,7 +10739,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit)) num_migr_units++; - migr_rec->num_migr_units = __cpu_to_le32(num_migr_units); + set_num_migr_units(migr_rec, num_migr_units); migr_rec->post_migr_vol_cap = dev->size_low; migr_rec->post_migr_vol_cap_hi = dev->size_high; @@ -9552,7 +10756,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev, min_dev_sectors = dev_sectors; close(fd); } - migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors - + set_migr_chkp_area_pba(migr_rec, min_dev_sectors - RAID_DISK_RESERVED_BLOCKS_IMSM_HI); write_imsm_migr_rec(st); @@ -9592,7 +10796,7 @@ int save_backup_imsm(struct supertype *st, int dest_layout = 0; int dest_chunk; unsigned long long start; - int data_disks = imsm_num_data_members(dev, MAP_0); + int data_disks = imsm_num_data_members(map_dest); targets = xmalloc(new_disks * sizeof(int)); @@ -9603,8 +10807,7 @@ int save_backup_imsm(struct supertype *st, start = info->reshape_progress * 512; for (i = 0; i < new_disks; i++) { - target_offsets[i] = (unsigned long long) - __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512; + target_offsets[i] = migr_chkp_area_pba(super->migr_rec) * 512; /* move back copy area adderss, it will be moved forward * in restore_stripes() using start input variable */ @@ -9683,12 +10886,11 @@ int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state) if (info->reshape_progress % blocks_per_unit) curr_migr_unit++; - super->migr_rec->curr_migr_unit = - __cpu_to_le32(curr_migr_unit); + set_current_migr_unit(super->migr_rec, curr_migr_unit); super->migr_rec->rec_status = __cpu_to_le32(state); - super->migr_rec->dest_1st_member_lba = - __cpu_to_le32(curr_migr_unit * - __le32_to_cpu(super->migr_rec->dest_depth_per_unit)); + set_migr_dest_1st_member_lba(super->migr_rec, + super->migr_rec->dest_depth_per_unit * curr_migr_unit); + if (write_imsm_migr_rec(st) < 0) { dprintf("imsm: Cannot write migration record outside backup area\n"); return 1; @@ -9722,8 +10924,8 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) char *buf = NULL; int retval = 1; unsigned int sector_size = super->sector_size; - unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit); - unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units); + unsigned long curr_migr_unit = current_migr_unit(migr_rec); + unsigned long num_migr_units = get_num_migr_units(migr_rec); char buffer[20]; int skipped_disks = 0; @@ -9750,11 +10952,9 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) map_dest = get_imsm_map(id->dev, MAP_0); new_disks = map_dest->num_members; - read_offset = (unsigned long long) - __le32_to_cpu(migr_rec->ckpt_area_pba) * 512; + read_offset = migr_chkp_area_pba(migr_rec) * 512; - write_offset = ((unsigned long long) - __le32_to_cpu(migr_rec->dest_1st_member_lba) + + write_offset = (migr_dest_1st_member_lba(migr_rec) + pba_of_lba0(map_dest)) * 512; unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512; @@ -9846,6 +11046,10 @@ static const char *imsm_get_disk_controller_domain(const char *path) drv = "isci"; else if (hba && hba->type == SYS_DEV_SATA) drv = "ahci"; + else if (hba && hba->type == SYS_DEV_VMD) + drv = "vmd"; + else if (hba && hba->type == SYS_DEV_NVME) + drv = "nvme"; else drv = "unknown"; dprintf("path: %s hba: %s attached: %s\n", @@ -9983,8 +11187,10 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, */ static struct mdinfo *get_spares_for_grow(struct supertype *st) { - unsigned long long min_size = min_acceptable_spare_size_imsm(st); - return container_choose_spares(st, min_size, NULL, NULL, NULL, 0); + struct spare_criteria sc; + + get_spare_criteria_imsm(st, &sc); + return container_choose_spares(st, &sc, NULL, NULL, NULL, 0); } /****************************************************************************** @@ -10026,8 +11232,7 @@ static int imsm_create_metadata_update_for_reshape( */ spares = get_spares_for_grow(st); - if (spares == NULL - || delta_disks > spares->array.spare_disks) { + if (spares == NULL || delta_disks > spares->array.spare_disks) { pr_err("imsm: ERROR: Cannot get spare devices for %s.\n", geo->dev_name); i = -1; goto abort; @@ -10156,7 +11361,7 @@ static int imsm_create_metadata_update_for_migration( free(u); sysfs_free(spares); update_memory_size = 0; - dprintf("error: cannot get spare device for requested migration"); + pr_err("cannot get spare device for requested migration\n"); return 0; } sysfs_free(spares); @@ -10209,6 +11414,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, int imsm_layout = -1; int data_disks; struct imsm_dev *dev; + struct imsm_map *map; struct intel_super *super; unsigned long long current_size; unsigned long long free_size; @@ -10284,6 +11490,11 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, pr_err("Error. Chunk size change for RAID 10 is not supported.\n"); change = -1; goto analyse_change_exit; + } else if (info.component_size % (geo->chunksize/512)) { + pr_err("New chunk size (%dK) does not evenly divide device size (%lluk). Aborting...\n", + geo->chunksize/1024, info.component_size/2); + change = -1; + goto analyse_change_exit; } change = CH_MIGRATION; } else { @@ -10294,7 +11505,8 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, super = st->sb; dev = get_imsm_dev(super, super->current_vol); - data_disks = imsm_num_data_members(dev , MAP_0); + map = get_imsm_map(dev, MAP_0); + data_disks = imsm_num_data_members(map); /* compute current size per disk member */ current_size = info.custom_array_size / data_disks; @@ -10302,7 +11514,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, if (geo->size > 0 && geo->size != MAX_SIZE) { /* align component size */ - geo->size = imsm_component_size_aligment_check( + geo->size = imsm_component_size_alignment_check( get_imsm_raid_level(dev->vol.map), chunk * 1024, super->sector_size, geo->size * 2); @@ -10336,7 +11548,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, max_size = free_size + current_size; /* align component size */ - max_size = imsm_component_size_aligment_check( + max_size = imsm_component_size_alignment_check( get_imsm_raid_level(dev->vol.map), chunk * 1024, super->sector_size, max_size); @@ -10383,7 +11595,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, geo->raid_disks + devNumChange, &chunk, geo->size, INVALID_SECTORS, - 0, 0, 1)) + 0, 0, info.consistency_policy, 1)) change = -1; if (check_devs) { @@ -10463,9 +11675,6 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size, dprintf("for level : %i\n", geo.level); dprintf("for raid_disks : %i\n", geo.raid_disks); - if (experimental() == 0) - return ret_val; - if (strcmp(st->container_devnm, st->devnm) == 0) { /* On container level we can only increase number of devices. */ dprintf("imsm: info: Container operation\n"); @@ -10708,9 +11917,10 @@ int check_degradation_change(struct mdinfo *info, if (sd->disk.state & (1<disk.state & (1<sector_size; struct imsm_dev *dev = NULL; - struct imsm_map *map_src; + struct imsm_map *map_src, *map_dest; int migr_vol_qan = 0; int ndata, odata; /* [bytes] */ int chunk; /* [bytes] */ @@ -10786,8 +11996,8 @@ static int imsm_manage_reshape( /* Find volume during the reshape */ for (dv = super->devlist; dv; dv = dv->next) { - if (dv->dev->vol.migr_type == MIGR_GEN_MIGR - && dv->dev->vol.migr_state == 1) { + if (dv->dev->vol.migr_type == MIGR_GEN_MIGR && + dv->dev->vol.migr_state == 1) { dev = dv->dev; migr_vol_qan++; } @@ -10800,12 +12010,13 @@ static int imsm_manage_reshape( goto abort; } + map_dest = get_imsm_map(dev, MAP_0); map_src = get_imsm_map(dev, MAP_1); if (map_src == NULL) goto abort; - ndata = imsm_num_data_members(dev, MAP_0); - odata = imsm_num_data_members(dev, MAP_1); + ndata = imsm_num_data_members(map_dest); + odata = imsm_num_data_members(map_src); chunk = __le16_to_cpu(map_src->blocks_per_strip) * 512; old_data_stripe_length = odata * chunk; @@ -10836,7 +12047,7 @@ static int imsm_manage_reshape( buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512; /* extend buffer size for parity disk */ buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512; - /* add space for stripe aligment */ + /* add space for stripe alignment */ buf_size += old_data_stripe_length; if (posix_memalign((void **)&buf, MAX_SECTOR_SIZE, buf_size)) { dprintf("imsm: Cannot allocate checkpoint buffer\n"); @@ -10846,12 +12057,12 @@ static int imsm_manage_reshape( max_position = sra->component_size * ndata; source_layout = imsm_level_to_layout(map_src->raid_level); - while (__le32_to_cpu(migr_rec->curr_migr_unit) < - __le32_to_cpu(migr_rec->num_migr_units)) { + while (current_migr_unit(migr_rec) < + get_num_migr_units(migr_rec)) { /* current reshape position [blocks] */ unsigned long long current_position = __le32_to_cpu(migr_rec->blocks_per_unit) - * __le32_to_cpu(migr_rec->curr_migr_unit); + * current_migr_unit(migr_rec); unsigned long long border; /* Check that array hasn't become failed. @@ -10955,7 +12166,7 @@ static int imsm_manage_reshape( /* clear migr_rec on disks after successful migration */ struct dl *d; - memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*sector_size); + memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE); for (d = super->disks; d; d = d->next) { if (d->index < 0 || is_failed(&d->disk)) continue; @@ -10964,7 +12175,7 @@ static int imsm_manage_reshape( get_dev_size(d->fd, NULL, &dsize); if (lseek64(d->fd, dsize - MIGR_REC_SECTOR_POSITION*sector_size, SEEK_SET) >= 0) { - if (write(d->fd, super->migr_rec_buf, + if ((unsigned int)write(d->fd, super->migr_rec_buf, MIGR_REC_BUF_SECTORS*sector_size) != MIGR_REC_BUF_SECTORS*sector_size) perror("Write migr_rec failed"); @@ -10983,10 +12194,7 @@ abort: return ret_val; } -#endif /* MDASSEMBLE */ - struct superswitch super_imsm = { -#ifndef MDASSEMBLE .examine_super = examine_super_imsm, .brief_examine_super = brief_examine_super_imsm, .brief_examine_subarrays = brief_examine_subarrays_imsm, @@ -11007,8 +12215,7 @@ struct superswitch super_imsm = { .reshape_super = imsm_reshape_super, .manage_reshape = imsm_manage_reshape, .recover_backup = recover_backup_imsm, - .copy_metadata = copy_metadata_imsm, -#endif + .examine_badblocks = examine_badblocks_imsm, .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm, .getinfo_super = getinfo_super_imsm, @@ -11016,7 +12223,7 @@ struct superswitch super_imsm = { .update_super = update_super_imsm, .avail_size = avail_size_imsm, - .min_acceptable_spare_size = min_acceptable_spare_size_imsm, + .get_spare_criteria = get_spare_criteria_imsm, .compare_super = compare_super_imsm, @@ -11028,10 +12235,12 @@ struct superswitch super_imsm = { .container_content = container_content_imsm, .validate_container = validate_container_imsm, + .write_init_ppl = write_init_ppl_imsm, + .validate_ppl = validate_ppl_imsm, + .external = 1, .name = "imsm", -#ifndef MDASSEMBLE /* for mdmon */ .open_new = imsm_open_new, .set_array_state= imsm_set_array_state, @@ -11040,5 +12249,7 @@ struct superswitch super_imsm = { .activate_spare = imsm_activate_spare, .process_update = imsm_process_update, .prepare_update = imsm_prepare_update, -#endif /* MDASSEMBLE */ + .record_bad_block = imsm_record_badblock, + .clear_bad_block = imsm_clear_badblock, + .get_bad_blocks = imsm_get_badblocks, };