X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=super-intel.c;h=0aed57c8e2c5b431d5b7fb873e2da4ecb6f6bc97;hb=32141c1765967e37d6f1accdf124c166bc103c3b;hp=d2ee1c669321e7a6c66254ce780bfa8720938a4c;hpb=614902f64e856b4cffc26687fac74412c4a6d91c;p=thirdparty%2Fmdadm.git diff --git a/super-intel.c b/super-intel.c index d2ee1c66..0aed57c8 100644 --- a/super-intel.c +++ b/super-intel.c @@ -81,7 +81,8 @@ MPB_ATTRIB_RAID1 | \ MPB_ATTRIB_RAID10 | \ MPB_ATTRIB_RAID5 | \ - MPB_ATTRIB_EXP_STRIPE_SIZE) + MPB_ATTRIB_EXP_STRIPE_SIZE | \ + MPB_ATTRIB_BBM) /* Define attributes that are unused but not harmful */ #define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE) @@ -90,6 +91,7 @@ #define IMSM_RESERVED_SECTORS 4096 #define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056 #define SECT_PER_MB_SHIFT 11 +#define MAX_SECTOR_SIZE 4096 /* Disk configuration info. */ #define IMSM_MAX_DEVICES 255 @@ -100,6 +102,7 @@ struct imsm_disk { #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */ #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */ #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */ +#define JOURNAL_DISK __cpu_to_le32(0x2000000) /* Device marked as Journaling Drive */ __u32 status; /* 0xF0 - 0xF3 */ __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */ __u32 total_blocks_hi; /* 0xF4 - 0xF5 total blocks hi */ @@ -153,6 +156,9 @@ struct imsm_vol { #define MIGR_STATE_CHANGE 4 #define MIGR_REPAIR 5 __u8 migr_type; /* Initializing, Rebuilding, ... */ +#define RAIDVOL_CLEAN 0 +#define RAIDVOL_DIRTY 1 +#define RAIDVOL_DSRECORD_VALID 2 __u8 dirty; __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */ __u16 verify_errors; /* number of mismatches */ @@ -188,7 +194,24 @@ struct imsm_dev { __u16 cache_policy; __u8 cng_state; __u8 cng_sub_state; -#define IMSM_DEV_FILLERS 10 + __u16 my_vol_raid_dev_num; /* Used in Unique volume Id for this RaidDev */ + + /* NVM_EN */ + __u8 nv_cache_mode; + __u8 nv_cache_flags; + + /* Unique Volume Id of the NvCache Volume associated with this volume */ + __u32 nvc_vol_orig_family_num; + __u16 nvc_vol_raid_dev_num; + +#define RWH_OFF 0 +#define RWH_DISTRIBUTED 1 +#define RWH_JOURNALING_DRIVE 2 + __u8 rwh_policy; /* Raid Write Hole Policy */ + __u8 jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */ + __u8 filler1; + +#define IMSM_DEV_FILLERS 3 __u32 filler[IMSM_DEV_FILLERS]; struct imsm_vol vol; } __attribute__ ((packed)); @@ -217,35 +240,37 @@ struct imsm_super { } __attribute__ ((packed)); #define BBM_LOG_MAX_ENTRIES 254 +#define BBM_LOG_MAX_LBA_ENTRY_VAL 256 /* Represents 256 LBAs */ +#define BBM_LOG_SIGNATURE 0xabadb10c + +struct bbm_log_block_addr { + __u16 w1; + __u32 dw1; +} __attribute__ ((__packed__)); struct bbm_log_entry { - __u64 defective_block_start; -#define UNREADABLE 0xFFFFFFFF - __u32 spare_block_offset; - __u16 remapped_marked_count; - __u16 disk_ordinal; + __u8 marked_count; /* Number of blocks marked - 1 */ + __u8 disk_ordinal; /* Disk entry within the imsm_super */ + struct bbm_log_block_addr defective_block_start; } __attribute__ ((__packed__)); struct bbm_log { __u32 signature; /* 0xABADB10C */ __u32 entry_count; - __u32 reserved_spare_block_count; /* 0 */ - __u32 reserved; /* 0xFFFF */ - __u64 first_spare_lba; - struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES]; + struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES]; } __attribute__ ((__packed__)); -#ifndef MDASSEMBLE static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" }; -#endif + +#define BLOCKS_PER_KB (1024/512) #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */ -#define MIGR_REC_BUF_SIZE 512 /* size of migr_record i/o buffer */ -#define MIGR_REC_POSITION 512 /* migr_record position offset on disk, - * MIGR_REC_BUF_SIZE <= MIGR_REC_POSITION +#define MIGR_REC_BUF_SECTORS 1 /* size of migr_record i/o buffer in sectors */ +#define MIGR_REC_SECTOR_POSITION 1 /* migr_record position offset on disk, + * MIGR_REC_BUF_SECTORS <= MIGR_REC_SECTOR_POS */ #define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must @@ -253,6 +278,9 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" #define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has * already been migrated and must * be recovered from checkpoint area */ + +#define PPL_ENTRY_SPACE (128 * 1024) /* Size of the PPL, without the header */ + struct migr_record { __u32 rec_status; /* Status used to determine how to restart * migration in case it aborts @@ -318,14 +346,15 @@ static void set_migr_type(struct imsm_dev *dev, __u8 migr_type) } } -static unsigned int sector_count(__u32 bytes) +static unsigned int sector_count(__u32 bytes, unsigned int sector_size) { - return ROUND_UP(bytes, 512) / 512; + return ROUND_UP(bytes, sector_size) / sector_size; } -static unsigned int mpb_sectors(struct imsm_super *mpb) +static unsigned int mpb_sectors(struct imsm_super *mpb, + unsigned int sector_size) { - return sector_count(__le32_to_cpu(mpb->mpb_size)); + return sector_count(__le32_to_cpu(mpb->mpb_size), sector_size); } struct intel_dev { @@ -359,6 +388,7 @@ struct intel_super { array, it indicates that mdmon is allowed to clean migration record */ size_t len; /* size of the 'buf' allocation */ + size_t extra_space; /* extra space in 'buf' that is not used yet */ void *next_buf; /* for realloc'ing buf from the manager */ size_t next_len; int updates_pending; /* count of pending updates for mdmon */ @@ -366,6 +396,7 @@ struct intel_super { unsigned long long create_offset; /* common start for 'current_vol' */ __u32 random; /* random data for seeding new family numbers */ struct intel_dev *devlist; + unsigned int sector_size; /* sector size of used member drives */ struct dl { struct dl *next; int index; @@ -386,6 +417,7 @@ struct intel_super { struct intel_hba *hba; /* device path of the raid controller for this metadata */ const struct imsm_orom *orom; /* platform firmware support */ struct intel_super *next; /* (temp) list for disambiguating family_num */ + struct md_bb bb; /* memory for get_bad_blocks call */ }; struct intel_disk { @@ -418,6 +450,8 @@ enum imsm_update_type { update_takeover, update_general_migration_checkpoint, update_size_change, + update_prealloc_badblocks_mem, + update_rwh_policy, }; struct imsm_update_activate_spare { @@ -506,11 +540,22 @@ struct imsm_update_add_remove_disk { enum imsm_update_type type; }; +struct imsm_update_prealloc_bb_mem { + enum imsm_update_type type; +}; + +struct imsm_update_rwh_policy { + enum imsm_update_type type; + int new_policy; + int dev_idx; +}; + static const char *_sys_dev_type[] = { [SYS_DEV_UNKNOWN] = "Unknown", [SYS_DEV_SAS] = "SAS", [SYS_DEV_SATA] = "SATA", - [SYS_DEV_NVME] = "NVMe" + [SYS_DEV_NVME] = "NVMe", + [SYS_DEV_VMD] = "VMD" }; const char *get_sys_dev_type(enum sys_dev_type type) @@ -536,7 +581,8 @@ static struct intel_hba * alloc_intel_hba(struct sys_dev *device) static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device) { - struct intel_hba *result=NULL; + struct intel_hba *result; + for (result = hba; result; result = result->next) { if (result->type == device->type && strcmp(result->path, device->path) == 0) break; @@ -624,12 +670,10 @@ static struct supertype *match_metadata_desc_imsm(char *arg) return st; } -#ifndef MDASSEMBLE static __u8 *get_imsm_version(struct imsm_super *mpb) { return &mpb->sig[MPB_SIG_LEN]; } -#endif /* retrieve a disk directly from the anchor when the anchor is known to be * up-to-date, currently only at load time @@ -736,7 +780,6 @@ static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state) return size; } -#ifndef MDASSEMBLE /* retrieve disk serial number list from a metadata update */ static struct disk_info *get_disk_info(struct imsm_update_create_array *update) { @@ -748,7 +791,6 @@ static struct disk_info *get_disk_info(struct imsm_update_create_array *update) return inf; } -#endif static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index) { @@ -783,6 +825,242 @@ static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index) return NULL; } +static inline unsigned long long __le48_to_cpu(const struct bbm_log_block_addr + *addr) +{ + return ((((__u64)__le32_to_cpu(addr->dw1)) << 16) | + __le16_to_cpu(addr->w1)); +} + +static inline struct bbm_log_block_addr __cpu_to_le48(unsigned long long sec) +{ + struct bbm_log_block_addr addr; + + addr.w1 = __cpu_to_le16((__u16)(sec & 0xffff)); + addr.dw1 = __cpu_to_le32((__u32)(sec >> 16) & 0xffffffff); + return addr; +} + +/* get size of the bbm log */ +static __u32 get_imsm_bbm_log_size(struct bbm_log *log) +{ + if (!log || log->entry_count == 0) + return 0; + + return sizeof(log->signature) + + sizeof(log->entry_count) + + log->entry_count * sizeof(struct bbm_log_entry); +} + +/* check if bad block is not partially stored in bbm log */ +static int is_stored_in_bbm(struct bbm_log *log, const __u8 idx, const unsigned + long long sector, const int length, __u32 *pos) +{ + __u32 i; + + for (i = *pos; i < log->entry_count; i++) { + struct bbm_log_entry *entry = &log->marked_block_entries[i]; + unsigned long long bb_start; + unsigned long long bb_end; + + bb_start = __le48_to_cpu(&entry->defective_block_start); + bb_end = bb_start + (entry->marked_count + 1); + + if ((entry->disk_ordinal == idx) && (bb_start >= sector) && + (bb_end <= sector + length)) { + *pos = i; + return 1; + } + } + return 0; +} + +/* record new bad block in bbm log */ +static int record_new_badblock(struct bbm_log *log, const __u8 idx, unsigned + long long sector, int length) +{ + int new_bb = 0; + __u32 pos = 0; + struct bbm_log_entry *entry = NULL; + + while (is_stored_in_bbm(log, idx, sector, length, &pos)) { + struct bbm_log_entry *e = &log->marked_block_entries[pos]; + + if ((e->marked_count + 1 == BBM_LOG_MAX_LBA_ENTRY_VAL) && + (__le48_to_cpu(&e->defective_block_start) == sector)) { + sector += BBM_LOG_MAX_LBA_ENTRY_VAL; + length -= BBM_LOG_MAX_LBA_ENTRY_VAL; + pos = pos + 1; + continue; + } + entry = e; + break; + } + + if (entry) { + int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length : + BBM_LOG_MAX_LBA_ENTRY_VAL; + entry->defective_block_start = __cpu_to_le48(sector); + entry->marked_count = cnt - 1; + if (cnt == length) + return 1; + sector += cnt; + length -= cnt; + } + + new_bb = ROUND_UP(length, BBM_LOG_MAX_LBA_ENTRY_VAL) / + BBM_LOG_MAX_LBA_ENTRY_VAL; + if (log->entry_count + new_bb > BBM_LOG_MAX_ENTRIES) + return 0; + + while (length > 0) { + int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length : + BBM_LOG_MAX_LBA_ENTRY_VAL; + struct bbm_log_entry *entry = + &log->marked_block_entries[log->entry_count]; + + entry->defective_block_start = __cpu_to_le48(sector); + entry->marked_count = cnt - 1; + entry->disk_ordinal = idx; + + sector += cnt; + length -= cnt; + + log->entry_count++; + } + + return new_bb; +} + +/* clear all bad blocks for given disk */ +static void clear_disk_badblocks(struct bbm_log *log, const __u8 idx) +{ + __u32 i = 0; + + while (i < log->entry_count) { + struct bbm_log_entry *entries = log->marked_block_entries; + + if (entries[i].disk_ordinal == idx) { + if (i < log->entry_count - 1) + entries[i] = entries[log->entry_count - 1]; + log->entry_count--; + } else { + i++; + } + } +} + +/* clear given bad block */ +static int clear_badblock(struct bbm_log *log, const __u8 idx, const unsigned + long long sector, const int length) { + __u32 i = 0; + + while (i < log->entry_count) { + struct bbm_log_entry *entries = log->marked_block_entries; + + if ((entries[i].disk_ordinal == idx) && + (__le48_to_cpu(&entries[i].defective_block_start) == + sector) && (entries[i].marked_count + 1 == length)) { + if (i < log->entry_count - 1) + entries[i] = entries[log->entry_count - 1]; + log->entry_count--; + break; + } + i++; + } + + return 1; +} + +/* allocate and load BBM log from metadata */ +static int load_bbm_log(struct intel_super *super) +{ + struct imsm_super *mpb = super->anchor; + __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size); + + super->bbm_log = xcalloc(1, sizeof(struct bbm_log)); + if (!super->bbm_log) + return 1; + + if (bbm_log_size) { + struct bbm_log *log = (void *)mpb + + __le32_to_cpu(mpb->mpb_size) - bbm_log_size; + + __u32 entry_count; + + if (bbm_log_size < sizeof(log->signature) + + sizeof(log->entry_count)) + return 2; + + entry_count = __le32_to_cpu(log->entry_count); + if ((__le32_to_cpu(log->signature) != BBM_LOG_SIGNATURE) || + (entry_count > BBM_LOG_MAX_ENTRIES)) + return 3; + + if (bbm_log_size != + sizeof(log->signature) + sizeof(log->entry_count) + + entry_count * sizeof(struct bbm_log_entry)) + return 4; + + memcpy(super->bbm_log, log, bbm_log_size); + } else { + super->bbm_log->signature = __cpu_to_le32(BBM_LOG_SIGNATURE); + super->bbm_log->entry_count = 0; + } + + return 0; +} + +/* checks if bad block is within volume boundaries */ +static int is_bad_block_in_volume(const struct bbm_log_entry *entry, + const unsigned long long start_sector, + const unsigned long long size) +{ + unsigned long long bb_start; + unsigned long long bb_end; + + bb_start = __le48_to_cpu(&entry->defective_block_start); + bb_end = bb_start + (entry->marked_count + 1); + + if (((bb_start >= start_sector) && (bb_start < start_sector + size)) || + ((bb_end >= start_sector) && (bb_end <= start_sector + size))) + return 1; + + return 0; +} + +/* get list of bad blocks on a drive for a volume */ +static void get_volume_badblocks(const struct bbm_log *log, const __u8 idx, + const unsigned long long start_sector, + const unsigned long long size, + struct md_bb *bbs) +{ + __u32 count = 0; + __u32 i; + + for (i = 0; i < log->entry_count; i++) { + const struct bbm_log_entry *ent = + &log->marked_block_entries[i]; + struct md_bb_entry *bb; + + if ((ent->disk_ordinal == idx) && + is_bad_block_in_volume(ent, start_sector, size)) { + + if (!bbs->entries) { + bbs->entries = xmalloc(BBM_LOG_MAX_ENTRIES * + sizeof(*bb)); + if (!bbs->entries) + break; + } + + bb = &bbs->entries[count++]; + bb->sector = __le48_to_cpu(&ent->defective_block_start); + bb->length = ent->marked_count + 1; + } + } + bbs->count = count; +} + /* * for second_map: * == MAP_0 get first map @@ -905,7 +1183,6 @@ static unsigned long long blocks_per_member(struct imsm_map *map) return join_u32(map->blocks_per_member_lo, map->blocks_per_member_hi); } -#ifndef MDASSEMBLE static unsigned long long num_data_stripes(struct imsm_map *map) { if (map == NULL) @@ -917,7 +1194,6 @@ static void set_total_blocks(struct imsm_disk *disk, unsigned long long n) { split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi); } -#endif static void set_pba_of_lba0(struct imsm_map *map, unsigned long long n) { @@ -1037,6 +1313,24 @@ static int is_failed(struct imsm_disk *disk) return (disk->status & FAILED_DISK) == FAILED_DISK; } +static int is_journal(struct imsm_disk *disk) +{ + return (disk->status & JOURNAL_DISK) == JOURNAL_DISK; +} + +/* round array size down to closest MB and ensure it splits evenly + * between members + */ +static unsigned long long round_size_to_mb(unsigned long long size, unsigned int + disk_count) +{ + size /= disk_count; + size = (size >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT; + size *= disk_count; + + return size; +} + /* try to determine how much space is reserved for metadata from * the last get_extents() entry on the smallest active disk, * otherwise fallback to the default @@ -1119,7 +1413,8 @@ static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st) static int is_gen_migration(struct imsm_dev *dev); -#ifndef MDASSEMBLE +#define IMSM_4K_DIV 8 + static __u64 blocks_per_migr_unit(struct intel_super *super, struct imsm_dev *dev); @@ -1217,18 +1512,30 @@ static void print_imsm_dev(struct intel_super *super, printf(" <-- %s", map_state_str[map->map_state]); printf("\n Checkpoint : %u ", __le32_to_cpu(dev->vol.curr_migr_unit)); - if ((is_gen_migration(dev)) && ((slot > 1) || (slot < 0))) + if (is_gen_migration(dev) && (slot > 1 || slot < 0)) printf("(N/A)"); else printf("(%llu)", (unsigned long long) blocks_per_migr_unit(super, dev)); } printf("\n"); - printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean"); + printf(" Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ? + "dirty" : "clean"); + printf(" RWH Policy : "); + if (dev->rwh_policy == RWH_OFF) + printf("off\n"); + else if (dev->rwh_policy == RWH_DISTRIBUTED) + printf("PPL distributed\n"); + else if (dev->rwh_policy == RWH_JOURNALING_DRIVE) + printf("PPL journaling drive\n"); + else + printf("\n", dev->rwh_policy); } -static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved) -{ +static void print_imsm_disk(struct imsm_disk *disk, + int index, + __u32 reserved, + unsigned int sector_size) { char str[MAX_RAID_SERIAL_LEN + 1]; __u64 sz; @@ -1241,15 +1548,91 @@ static void print_imsm_disk(struct imsm_disk *disk, int index, __u32 reserved) printf(" Disk%02d Serial : %s\n", index, str); else printf(" Disk Serial : %s\n", str); - printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "", - is_configured(disk) ? " active" : "", - is_failed(disk) ? " failed" : ""); + printf(" State :%s%s%s%s\n", is_spare(disk) ? " spare" : "", + is_configured(disk) ? " active" : "", + is_failed(disk) ? " failed" : "", + is_journal(disk) ? " journal" : ""); printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id)); sz = total_blocks(disk) - reserved; - printf(" Usable Size : %llu%s\n", (unsigned long long)sz, + printf(" Usable Size : %llu%s\n", + (unsigned long long)sz * 512 / sector_size, human_size(sz * 512)); } +void convert_to_4k_imsm_migr_rec(struct intel_super *super) +{ + struct migr_record *migr_rec = super->migr_rec; + + migr_rec->blocks_per_unit /= IMSM_4K_DIV; + migr_rec->ckpt_area_pba /= IMSM_4K_DIV; + migr_rec->dest_1st_member_lba /= IMSM_4K_DIV; + migr_rec->dest_depth_per_unit /= IMSM_4K_DIV; + split_ull((join_u32(migr_rec->post_migr_vol_cap, + migr_rec->post_migr_vol_cap_hi) / IMSM_4K_DIV), + &migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi); +} + +void convert_to_4k_imsm_disk(struct imsm_disk *disk) +{ + set_total_blocks(disk, (total_blocks(disk)/IMSM_4K_DIV)); +} + +void convert_to_4k(struct intel_super *super) +{ + struct imsm_super *mpb = super->anchor; + struct imsm_disk *disk; + int i; + __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size); + + for (i = 0; i < mpb->num_disks ; i++) { + disk = __get_imsm_disk(mpb, i); + /* disk */ + convert_to_4k_imsm_disk(disk); + } + for (i = 0; i < mpb->num_raid_devs; i++) { + struct imsm_dev *dev = __get_imsm_dev(mpb, i); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + /* dev */ + split_ull((join_u32(dev->size_low, dev->size_high)/IMSM_4K_DIV), + &dev->size_low, &dev->size_high); + dev->vol.curr_migr_unit /= IMSM_4K_DIV; + + /* map0 */ + set_blocks_per_member(map, blocks_per_member(map)/IMSM_4K_DIV); + map->blocks_per_strip /= IMSM_4K_DIV; + set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV); + + if (dev->vol.migr_state) { + /* map1 */ + map = get_imsm_map(dev, MAP_1); + set_blocks_per_member(map, + blocks_per_member(map)/IMSM_4K_DIV); + map->blocks_per_strip /= IMSM_4K_DIV; + set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV); + } + } + if (bbm_log_size) { + struct bbm_log *log = (void *)mpb + + __le32_to_cpu(mpb->mpb_size) - bbm_log_size; + __u32 i; + + for (i = 0; i < log->entry_count; i++) { + struct bbm_log_entry *entry = + &log->marked_block_entries[i]; + + __u8 count = entry->marked_count + 1; + unsigned long long sector = + __le48_to_cpu(&entry->defective_block_start); + + entry->defective_block_start = + __cpu_to_le48(sector/IMSM_4K_DIV); + entry->marked_count = max(count/IMSM_4K_DIV, 1) - 1; + } + } + + mpb->check_sum = __gen_imsm_checksum(mpb); +} + void examine_migr_rec_imsm(struct intel_super *super) { struct migr_record *migr_rec = super->migr_rec; @@ -1270,7 +1653,7 @@ void examine_migr_rec_imsm(struct intel_super *super) map = get_imsm_map(dev, MAP_0); if (map) slot = get_imsm_disk_slot(map, super->disks->index); - if ((map == NULL) || (slot > 1) || (slot < 0)) { + if (map == NULL || slot > 1 || slot < 0) { printf(" Empty\n "); printf("Examine one of first two disks in array\n"); break; @@ -1306,7 +1689,78 @@ void examine_migr_rec_imsm(struct intel_super *super) break; } } -#endif /* MDASSEMBLE */ + +void convert_from_4k_imsm_migr_rec(struct intel_super *super) +{ + struct migr_record *migr_rec = super->migr_rec; + + migr_rec->blocks_per_unit *= IMSM_4K_DIV; + migr_rec->ckpt_area_pba *= IMSM_4K_DIV; + migr_rec->dest_1st_member_lba *= IMSM_4K_DIV; + migr_rec->dest_depth_per_unit *= IMSM_4K_DIV; + split_ull((join_u32(migr_rec->post_migr_vol_cap, + migr_rec->post_migr_vol_cap_hi) * IMSM_4K_DIV), + &migr_rec->post_migr_vol_cap, + &migr_rec->post_migr_vol_cap_hi); +} + +void convert_from_4k(struct intel_super *super) +{ + struct imsm_super *mpb = super->anchor; + struct imsm_disk *disk; + int i; + __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size); + + for (i = 0; i < mpb->num_disks ; i++) { + disk = __get_imsm_disk(mpb, i); + /* disk */ + set_total_blocks(disk, (total_blocks(disk)*IMSM_4K_DIV)); + } + + for (i = 0; i < mpb->num_raid_devs; i++) { + struct imsm_dev *dev = __get_imsm_dev(mpb, i); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + /* dev */ + split_ull((join_u32(dev->size_low, dev->size_high)*IMSM_4K_DIV), + &dev->size_low, &dev->size_high); + dev->vol.curr_migr_unit *= IMSM_4K_DIV; + + /* map0 */ + set_blocks_per_member(map, blocks_per_member(map)*IMSM_4K_DIV); + map->blocks_per_strip *= IMSM_4K_DIV; + set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV); + + if (dev->vol.migr_state) { + /* map1 */ + map = get_imsm_map(dev, MAP_1); + set_blocks_per_member(map, + blocks_per_member(map)*IMSM_4K_DIV); + map->blocks_per_strip *= IMSM_4K_DIV; + set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV); + } + } + if (bbm_log_size) { + struct bbm_log *log = (void *)mpb + + __le32_to_cpu(mpb->mpb_size) - bbm_log_size; + __u32 i; + + for (i = 0; i < log->entry_count; i++) { + struct bbm_log_entry *entry = + &log->marked_block_entries[i]; + + __u8 count = entry->marked_count + 1; + unsigned long long sector = + __le48_to_cpu(&entry->defective_block_start); + + entry->defective_block_start = + __cpu_to_le48(sector*IMSM_4K_DIV); + entry->marked_count = count*IMSM_4K_DIV - 1; + } + } + + mpb->check_sum = __gen_imsm_checksum(mpb); +} + /******************************************************************************* * function: imsm_check_attributes * Description: Function checks if features represented by attributes flags @@ -1386,7 +1840,7 @@ static int imsm_check_attributes(__u32 attributes) } if (not_supported) - dprintf(Name "(IMSM): Unknown attributes : %x\n", not_supported); + dprintf("(IMSM): Unknown attributes : %x\n", not_supported); ret_val = 0; } @@ -1394,7 +1848,6 @@ static int imsm_check_attributes(__u32 attributes) return ret_val; } -#ifndef MDASSEMBLE static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map); static void examine_super_imsm(struct supertype *st, char *homehost) @@ -1409,7 +1862,8 @@ static void examine_super_imsm(struct supertype *st, char *homehost) __u32 reserved = imsm_reserved_sectors(super, super->disks); struct dl *dl; - snprintf(str, MPB_SIG_LEN, "%s", mpb->sig); + strncpy(str, (char *)mpb->sig, MPB_SIG_LEN); + str[MPB_SIG_LEN-1] = '\0'; printf(" Magic : %s\n", str); snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb)); printf(" Version : %s\n", get_imsm_version(mpb)); @@ -1427,11 +1881,12 @@ static void examine_super_imsm(struct supertype *st, char *homehost) sum = __le32_to_cpu(mpb->check_sum); printf(" Checksum : %08x %s\n", sum, __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect"); - printf(" MPB Sectors : %d\n", mpb_sectors(mpb)); + printf(" MPB Sectors : %d\n", mpb_sectors(mpb, super->sector_size)); printf(" Disks : %d\n", mpb->num_disks); printf(" RAID Devices : %d\n", mpb->num_raid_devs); - print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), super->disks->index, reserved); - if (super->bbm_log) { + print_imsm_disk(__get_imsm_disk(mpb, super->disks->index), + super->disks->index, reserved, super->sector_size); + if (get_imsm_bbm_log_size(super->bbm_log)) { struct bbm_log *log = super->bbm_log; printf("\n"); @@ -1439,9 +1894,6 @@ static void examine_super_imsm(struct supertype *st, char *homehost) printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size)); printf(" Signature : %x\n", __le32_to_cpu(log->signature)); printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count)); - printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count)); - printf(" First Spare : %llx\n", - (unsigned long long) __le64_to_cpu(log->first_spare_lba)); } for (i = 0; i < mpb->num_raid_devs; i++) { struct mdinfo info; @@ -1455,12 +1907,14 @@ static void examine_super_imsm(struct supertype *st, char *homehost) for (i = 0; i < mpb->num_disks; i++) { if (i == super->disks->index) continue; - print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved); + print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved, + super->sector_size); } for (dl = super->disks; dl; dl = dl->next) if (dl->index == -1) - print_imsm_disk(&dl->disk, -1, reserved); + print_imsm_disk(&dl->disk, -1, reserved, + super->sector_size); examine_migr_rec_imsm(super); } @@ -1524,7 +1978,7 @@ static void export_examine_super_imsm(struct supertype *st) static int copy_metadata_imsm(struct supertype *st, int from, int to) { - /* The second last 512byte sector of the device contains + /* The second last sector of the device contains * the "struct imsm_super" metadata. * This contains mpb_size which is the size in bytes of the * extended metadata. This is located immediately before @@ -1537,29 +1991,31 @@ static int copy_metadata_imsm(struct supertype *st, int from, int to) unsigned long long dsize, offset; int sectors; struct imsm_super *sb; - int written = 0; + struct intel_super *super = st->sb; + unsigned int sector_size = super->sector_size; + unsigned int written = 0; - if (posix_memalign(&buf, 4096, 4096) != 0) + if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE) != 0) return 1; if (!get_dev_size(from, NULL, &dsize)) goto err; - if (lseek64(from, dsize-1024, 0) < 0) + if (lseek64(from, dsize-(2*sector_size), 0) < 0) goto err; - if (read(from, buf, 512) != 512) + if ((unsigned int)read(from, buf, sector_size) != sector_size) goto err; sb = buf; if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) goto err; - sectors = mpb_sectors(sb) + 2; - offset = dsize - sectors * 512; + sectors = mpb_sectors(sb, sector_size) + 2; + offset = dsize - sectors * sector_size; if (lseek64(from, offset, 0) < 0 || lseek64(to, offset, 0) < 0) goto err; - while (written < sectors * 512) { - int n = sectors*512 - written; + while (written < sectors * sector_size) { + int n = sectors*sector_size - written; if (n > 4096) n = 4096; if (read(from, buf, n) != n) @@ -1582,7 +2038,7 @@ static void detail_super_imsm(struct supertype *st, char *homehost) getinfo_super_imsm(st, &info, NULL); fname_from_uuid(st, &info, nbuf, ':'); - printf("\n UUID : %s\n", nbuf + 5); + printf("\n UUID : %s\n", nbuf + 5); } static void brief_detail_super_imsm(struct supertype *st) @@ -1619,7 +2075,10 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b * this hba */ dir = opendir("/sys/dev/block"); - for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) { + if (!dir) + return 1; + + for (ent = readdir(dir); ent; ent = readdir(dir)) { int fd; char model[64]; char vendor[64]; @@ -1649,7 +2108,7 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b break; } sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor); - if (load_sys(device, buf) != 0) { + if (load_sys(device, buf, sizeof(buf)) != 0) { if (verbose > 0) pr_err("failed to read device type for %s\n", path); @@ -1664,7 +2123,7 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b vendor[0] = '\0'; model[0] = '\0'; sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor); - if (load_sys(device, buf) == 0) { + if (load_sys(device, buf, sizeof(buf)) == 0) { strncpy(vendor, buf, sizeof(vendor)); vendor[sizeof(vendor) - 1] = '\0'; c = (char *) &vendor[sizeof(vendor) - 1]; @@ -1673,7 +2132,7 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b } sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor); - if (load_sys(device, buf) == 0) { + if (load_sys(device, buf, sizeof(buf)) == 0) { strncpy(model, buf, sizeof(model)); model[sizeof(model) - 1] = '\0'; c = (char *) &model[sizeof(model) - 1]; @@ -1709,7 +2168,8 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b break; } *c = '\0'; - if (sscanf(&path[hba_len], "host%d", &port) == 1) + if ((sscanf(&path[hba_len], "ata%d", &port) == 1) || + ((sscanf(&path[hba_len], "host%d", &port) == 1))) port -= host_base; else { if (verbose > 0) { @@ -1760,6 +2220,58 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b return err; } +static int print_vmd_attached_devs(struct sys_dev *hba) +{ + struct dirent *ent; + DIR *dir; + char path[292]; + char link[256]; + char *c, *rp; + + if (hba->type != SYS_DEV_VMD) + return 1; + + /* scroll through /sys/dev/block looking for devices attached to + * this hba + */ + dir = opendir("/sys/bus/pci/drivers/nvme"); + if (!dir) + return 1; + + for (ent = readdir(dir); ent; ent = readdir(dir)) { + int n; + + /* is 'ent' a device? check that the 'subsystem' link exists and + * that its target matches 'bus' + */ + sprintf(path, "/sys/bus/pci/drivers/nvme/%s/subsystem", + ent->d_name); + n = readlink(path, link, sizeof(link)); + if (n < 0 || n >= (int)sizeof(link)) + continue; + link[n] = '\0'; + c = strrchr(link, '/'); + if (!c) + continue; + if (strncmp("pci", c+1, strlen("pci")) != 0) + continue; + + sprintf(path, "/sys/bus/pci/drivers/nvme/%s", ent->d_name); + + rp = realpath(path, NULL); + if (!rp) + continue; + + if (path_attached_to_hba(rp, hba->path)) { + printf(" NVMe under VMD : %s\n", rp); + } + free(rp); + } + + closedir(dir); + return 0; +} + static void print_found_intel_controllers(struct sys_dev *elem) { for (; elem; elem = elem->next) { @@ -1768,7 +2280,14 @@ static void print_found_intel_controllers(struct sys_dev *elem) fprintf(stderr, "SATA "); else if (elem->type == SYS_DEV_SAS) fprintf(stderr, "SAS "); - fprintf(stderr, "RAID controller"); + else if (elem->type == SYS_DEV_NVME) + fprintf(stderr, "NVMe "); + + if (elem->type == SYS_DEV_VMD) + fprintf(stderr, "VMD domain"); + else + fprintf(stderr, "RAID controller"); + if (elem->pci_id) fprintf(stderr, " at %s", elem->pci_id); fprintf(stderr, ".\n"); @@ -1789,7 +2308,8 @@ static int ahci_get_port_count(const char *hba_path, int *port_count) for (ent = readdir(dir); ent; ent = readdir(dir)) { int host; - if (sscanf(ent->d_name, "host%d", &host) != 1) + if ((sscanf(ent->d_name, "ata%d", &host) != 1) && + ((sscanf(ent->d_name, "host%d", &host) != 1))) continue; if (*port_count == 0) host_base = host; @@ -1805,9 +2325,15 @@ static int ahci_get_port_count(const char *hba_path, int *port_count) static void print_imsm_capability(const struct imsm_orom *orom) { - printf(" Platform : Intel(R) Matrix Storage Manager\n"); - printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver, - orom->hotfix_ver, orom->build); + printf(" Platform : Intel(R) "); + if (orom->capabilities == 0 && orom->driver_features == 0) + printf("Matrix Storage Manager\n"); + else + printf("Rapid Storage Technology%s\n", + imsm_orom_is_enterprise(orom) ? " enterprise" : ""); + if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build) + printf(" Version : %d.%d.%d.%d\n", orom->major_ver, + orom->minor_ver, orom->hotfix_ver, orom->build); printf(" RAID Levels :%s%s%s%s%s\n", imsm_orom_has_raid0(orom) ? " raid0" : "", imsm_orom_has_raid1(orom) ? " raid1" : "", @@ -1836,16 +2362,18 @@ static void print_imsm_capability(const struct imsm_orom *orom) printf(" 2TB disks :%s supported\n", (orom->attr & IMSM_OROM_ATTR_2TB_DISK)?"":" not"); printf(" Max Disks : %d\n", orom->tds); - printf(" Max Volumes : %d per array, %d per controller\n", - orom->vpa, orom->vphba); + printf(" Max Volumes : %d per array, %d per %s\n", + orom->vpa, orom->vphba, + imsm_orom_is_nvme(orom) ? "platform" : "controller"); return; } static void print_imsm_capability_export(const struct imsm_orom *orom) { printf("MD_FIRMWARE_TYPE=imsm\n"); - printf("IMSM_VERSION=%d.%d.%d.%d\n",orom->major_ver, orom->minor_ver, - orom->hotfix_ver, orom->build); + if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build) + printf("IMSM_VERSION=%d.%d.%d.%d\n", orom->major_ver, orom->minor_ver, + orom->hotfix_ver, orom->build); printf("IMSM_SUPPORTED_RAID_LEVELS=%s%s%s%s%s\n", imsm_orom_has_raid0(orom) ? "raid0 " : "", imsm_orom_has_raid1(orom) ? "raid1 " : "", @@ -1889,7 +2417,6 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle * platform capabilities. If raid support is disabled in the BIOS the * option-rom capability structure will not be available. */ - const struct imsm_orom *orom; struct sys_dev *list, *hba; int host_base = 0; int port_count = 0; @@ -1915,46 +2442,88 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle list = find_intel_devices(); if (!list) { if (verbose > 0) - pr_err("no active Intel(R) RAID " - "controller found.\n"); + pr_err("no active Intel(R) RAID controller found.\n"); return 2; } else if (verbose > 0) print_found_intel_controllers(list); for (hba = list; hba; hba = hba->next) { - if (controller_path && (compare_paths(hba->path,controller_path) != 0)) + if (controller_path && (compare_paths(hba->path, controller_path) != 0)) continue; - orom = find_imsm_capability(hba); - if (!orom) + if (!find_imsm_capability(hba)) { + char buf[PATH_MAX]; pr_err("imsm capabilities not found for controller: %s (type %s)\n", - hba->path, get_sys_dev_type(hba->type)); - else { - result = 0; - print_imsm_capability(orom); + hba->type == SYS_DEV_VMD ? vmd_domain_to_controller(hba, buf) : hba->path, + get_sys_dev_type(hba->type)); + continue; + } + result = 0; + } + + if (controller_path && result == 1) { + pr_err("no active Intel(R) RAID controller found under %s\n", + controller_path); + return result; + } + + const struct orom_entry *entry; + + for (entry = orom_entries; entry; entry = entry->next) { + if (entry->type == SYS_DEV_VMD) { + print_imsm_capability(&entry->orom); + printf(" 3rd party NVMe :%s supported\n", + imsm_orom_has_tpv_support(&entry->orom)?"":" not"); + for (hba = list; hba; hba = hba->next) { + if (hba->type == SYS_DEV_VMD) { + char buf[PATH_MAX]; + printf(" I/O Controller : %s (%s)\n", + vmd_domain_to_controller(hba, buf), get_sys_dev_type(hba->type)); + if (print_vmd_attached_devs(hba)) { + if (verbose > 0) + pr_err("failed to get devices attached to VMD domain.\n"); + result |= 2; + } + } + } + printf("\n"); + continue; + } + + print_imsm_capability(&entry->orom); + if (entry->type == SYS_DEV_NVME) { + for (hba = list; hba; hba = hba->next) { + if (hba->type == SYS_DEV_NVME) + printf(" NVMe Device : %s\n", hba->path); + } + printf("\n"); + continue; + } + + struct devid_list *devid; + for (devid = entry->devid_list; devid; devid = devid->next) { + hba = device_by_id(devid->devid); + if (!hba) + continue; + printf(" I/O Controller : %s (%s)\n", hba->path, get_sys_dev_type(hba->type)); if (hba->type == SYS_DEV_SATA) { host_base = ahci_get_port_count(hba->path, &port_count); if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) { if (verbose > 0) - pr_err("failed to enumerate " - "ports on SATA controller at %s.\n", hba->pci_id); + pr_err("failed to enumerate ports on SATA controller at %s.\n", hba->pci_id); result |= 2; } } } + printf("\n"); } - if (controller_path && result == 1) - pr_err("no active Intel(R) RAID " - "controller found under %s\n",controller_path); - return result; } static int export_detail_platform_imsm(int verbose, char *controller_path) { - const struct imsm_orom *orom; struct sys_dev *list, *hba; int result=1; @@ -1969,22 +2538,29 @@ static int export_detail_platform_imsm(int verbose, char *controller_path) for (hba = list; hba; hba = hba->next) { if (controller_path && (compare_paths(hba->path,controller_path) != 0)) continue; - orom = find_imsm_capability(hba); - if (!orom) { - if (verbose > 0) - pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_IMSM_CAPABLE_DEVICE_UNDER_%s\n",hba->path); + if (!find_imsm_capability(hba) && verbose > 0) { + char buf[PATH_MAX]; + pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_IMSM_CAPABLE_DEVICE_UNDER_%s\n", + hba->type == SYS_DEV_VMD ? vmd_domain_to_controller(hba, buf) : hba->path); } - else { - print_imsm_capability_export(orom); + else result = 0; + } + + const struct orom_entry *entry; + + for (entry = orom_entries; entry; entry = entry->next) { + if (entry->type == SYS_DEV_VMD) { + for (hba = list; hba; hba = hba->next) + print_imsm_capability_export(&entry->orom); + continue; } + print_imsm_capability_export(&entry->orom); } return result; } -#endif - static int match_home_imsm(struct supertype *st, char *homehost) { /* the imsm metadata format does not specify any host @@ -2157,7 +2733,7 @@ static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map) case 5: return map->num_members - 1; default: - dprintf("%s: unsupported raid level\n", __func__); + dprintf("unsupported raid level\n"); return 0; } } @@ -2286,21 +2862,26 @@ static int imsm_level_to_layout(int level) static int read_imsm_migr_rec(int fd, struct intel_super *super) { int ret_val = -1; + unsigned int sector_size = super->sector_size; unsigned long long dsize; get_dev_size(fd, NULL, &dsize); - if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) { + if (lseek64(fd, dsize - (sector_size*MIGR_REC_SECTOR_POSITION), + SEEK_SET) < 0) { pr_err("Cannot seek to anchor block: %s\n", strerror(errno)); goto out; } - if (read(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) != - MIGR_REC_BUF_SIZE) { + if ((unsigned int)read(fd, super->migr_rec_buf, + MIGR_REC_BUF_SECTORS*sector_size) != + MIGR_REC_BUF_SECTORS*sector_size) { pr_err("Cannot read migr record block: %s\n", strerror(errno)); goto out; } ret_val = 0; + if (sector_size == 4096) + convert_from_4k_imsm_migr_rec(super); out: return ret_val; @@ -2334,12 +2915,12 @@ static struct imsm_dev *imsm_get_device_during_migration( static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info) { struct mdinfo *sd; - struct dl *dl = NULL; + struct dl *dl; char nm[30]; int retval = -1; int fd = -1; struct imsm_dev *dev; - struct imsm_map *map = NULL; + struct imsm_map *map; int slot = -1; /* find map under migration */ @@ -2348,19 +2929,12 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info) */ if (dev == NULL) return -2; - map = get_imsm_map(dev, MAP_0); if (info) { for (sd = info->devs ; sd ; sd = sd->next) { - /* skip spare and failed disks - */ - if (sd->disk.raid_disk < 0) - continue; /* read only from one of the first two slots */ - if (map) - slot = get_imsm_disk_slot(map, - sd->disk.raid_disk); - if ((map == NULL) || (slot > 1) || (slot < 0)) + if ((sd->disk.raid_disk < 0) || + (sd->disk.raid_disk > 1)) continue; sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); @@ -2370,6 +2944,7 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info) } } if (fd < 0) { + map = get_imsm_map(dev, MAP_0); for (dl = super->disks; dl; dl = dl->next) { /* skip spare and failed disks */ @@ -2378,7 +2953,7 @@ static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info) /* read only from one of the first two slots */ if (map) slot = get_imsm_disk_slot(map, dl->index); - if ((map == NULL) || (slot > 1) || (slot < 0)) + if (map == NULL || slot > 1 || slot < 0) continue; sprintf(nm, "%d:%d", dl->major, dl->minor); fd = dev_open(nm, O_RDONLY); @@ -2396,7 +2971,6 @@ out: return retval; } -#ifndef MDASSEMBLE /******************************************************************************* * function: imsm_create_metadata_checkpoint_update * Description: It creates update for checkpoint change. @@ -2414,7 +2988,7 @@ static int imsm_create_metadata_checkpoint_update( int update_memory_size = 0; - dprintf("imsm_create_metadata_checkpoint_update(enter)\n"); + dprintf("(enter)\n"); if (u == NULL) return 0; @@ -2426,14 +3000,12 @@ static int imsm_create_metadata_checkpoint_update( *u = xcalloc(1, update_memory_size); if (*u == NULL) { - dprintf("error: cannot get memory for " - "imsm_create_metadata_checkpoint_update update\n"); + dprintf("error: cannot get memory\n"); return 0; } (*u)->type = update_general_migration_checkpoint; (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit); - dprintf("imsm_create_metadata_checkpoint_update: prepared for %u\n", - (*u)->curr_migr_unit); + dprintf("prepared for %u\n", (*u)->curr_migr_unit); return update_memory_size; } @@ -2454,6 +3026,7 @@ static void imsm_update_metadata_locally(struct supertype *st, static int write_imsm_migr_rec(struct supertype *st) { struct intel_super *super = st->sb; + unsigned int sector_size = super->sector_size; unsigned long long dsize; char nm[30]; int fd = -1; @@ -2462,7 +3035,7 @@ static int write_imsm_migr_rec(struct supertype *st) int len; struct imsm_update_general_migration_checkpoint *u; struct imsm_dev *dev; - struct imsm_map *map = NULL; + struct imsm_map *map; /* find map under migration */ dev = imsm_get_device_during_migration(super); @@ -2475,6 +3048,8 @@ static int write_imsm_migr_rec(struct supertype *st) map = get_imsm_map(dev, MAP_0); + if (sector_size == 4096) + convert_to_4k_imsm_migr_rec(super); for (sd = super->disks ; sd ; sd = sd->next) { int slot = -1; @@ -2484,7 +3059,7 @@ static int write_imsm_migr_rec(struct supertype *st) /* write to 2 first slots only */ if (map) slot = get_imsm_disk_slot(map, sd->index); - if ((map == NULL) || (slot > 1) || (slot < 0)) + if (map == NULL || slot > 1 || slot < 0) continue; sprintf(nm, "%d:%d", sd->major, sd->minor); @@ -2492,13 +3067,15 @@ static int write_imsm_migr_rec(struct supertype *st) if (fd < 0) continue; get_dev_size(fd, NULL, &dsize); - if (lseek64(fd, dsize - MIGR_REC_POSITION, SEEK_SET) < 0) { + if (lseek64(fd, dsize - (MIGR_REC_SECTOR_POSITION*sector_size), + SEEK_SET) < 0) { pr_err("Cannot seek to anchor block: %s\n", strerror(errno)); goto out; } - if (write(fd, super->migr_rec_buf, MIGR_REC_BUF_SIZE) != - MIGR_REC_BUF_SIZE) { + if ((unsigned int)write(fd, super->migr_rec_buf, + MIGR_REC_BUF_SECTORS*sector_size) != + MIGR_REC_BUF_SECTORS*sector_size) { pr_err("Cannot write migr record block: %s\n", strerror(errno)); goto out; @@ -2506,9 +3083,10 @@ static int write_imsm_migr_rec(struct supertype *st) close(fd); fd = -1; } + if (sector_size == 4096) + convert_from_4k_imsm_migr_rec(super); /* update checkpoint information in metadata */ len = imsm_create_metadata_checkpoint_update(super, &u); - if (len <= 0) { dprintf("imsm: Cannot prepare update\n"); goto out; @@ -2533,7 +3111,6 @@ static int write_imsm_migr_rec(struct supertype *st) close(fd); return retval; } -#endif /* MDASSEMBLE */ /* spare/missing disks activations are not allowe when * array/container performs reshape operation, because @@ -2560,17 +3137,16 @@ int imsm_reshape_blocks_arrays_changes(struct intel_super *super) } static unsigned long long imsm_component_size_aligment_check(int level, int chunk_size, + unsigned int sector_size, unsigned long long component_size) { unsigned int component_size_alligment; /* check component size aligment */ - component_size_alligment = component_size % (chunk_size/512); + component_size_alligment = component_size % (chunk_size/sector_size); - dprintf("imsm_component_size_aligment_check(Level: %i, " - "chunk_size = %i, component_size = %llu), " - "component_size_alligment = %u\n", + dprintf("(Level: %i, chunk_size = %i, component_size = %llu), component_size_alligment = %u\n", level, chunk_size, component_size, component_size_alligment); @@ -2578,13 +3154,22 @@ static unsigned long long imsm_component_size_aligment_check(int level, dprintf("imsm: reported component size alligned from %llu ", component_size); component_size -= component_size_alligment; - dprintf("to %llu (%i).\n", + dprintf_cont("to %llu (%i).\n", component_size, component_size_alligment); } return component_size; } +static unsigned long long get_ppl_sector(struct intel_super *super, int dev_idx) +{ + struct imsm_dev *dev = get_imsm_dev(super, dev_idx); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + + return pba_of_lba0(map) + + (num_data_stripes(map) * map->blocks_per_strip); +} + static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap) { struct intel_super *super = st->sb; @@ -2611,7 +3196,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, info->array.utime = 0; info->array.chunk_size = __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9; - info->array.state = !dev->vol.dirty; + info->array.state = !(dev->vol.dirty & RAIDVOL_DIRTY); info->custom_array_size = __le32_to_cpu(dev->size_high); info->custom_array_size <<= 32; info->custom_array_size |= __le32_to_cpu(dev->size_low); @@ -2674,20 +3259,38 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, } info->data_offset = pba_of_lba0(map_to_analyse); - info->component_size = blocks_per_member(map_to_analyse); + + if (info->array.level == 5) { + info->component_size = num_data_stripes(map_to_analyse) * + map_to_analyse->blocks_per_strip; + } else { + info->component_size = blocks_per_member(map_to_analyse); + } info->component_size = imsm_component_size_aligment_check( info->array.level, info->array.chunk_size, + super->sector_size, info->component_size); + info->bb.supported = 1; memset(info->uuid, 0, sizeof(info->uuid)); info->recovery_start = MaxSector; + if (info->array.level == 5 && dev->rwh_policy == RWH_DISTRIBUTED) { + info->consistency_policy = CONSISTENCY_POLICY_PPL; + info->ppl_sector = get_ppl_sector(super, super->current_vol); + info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9; + } else if (info->array.level <= 0) { + info->consistency_policy = CONSISTENCY_POLICY_NONE; + } else { + info->consistency_policy = CONSISTENCY_POLICY_RESYNC; + } + info->reshape_progress = 0; info->resync_start = MaxSector; if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED || - dev->vol.dirty) && + !(info->array.state & 1)) && imsm_reshape_blocks_arrays_changes(super) == 0) { info->resync_start = 0; } @@ -2718,8 +3321,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, info->reshape_progress = blocks_per_unit * units; - dprintf("IMSM: General Migration checkpoint : %llu " - "(%llu) -> read reshape progress : %llu\n", + dprintf("IMSM: General Migration checkpoint : %llu (%llu) -> read reshape progress : %llu\n", (unsigned long long)units, (unsigned long long)blocks_per_unit, info->reshape_progress); @@ -2728,11 +3330,10 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, if (used_disks > 0) { array_blocks = blocks_per_member(map) * used_disks; - /* round array size down to closest MB - */ - info->custom_array_size = (array_blocks - >> SECT_PER_MB_SHIFT) - << SECT_PER_MB_SHIFT; + info->custom_array_size = + round_size_to_mb(array_blocks, + used_disks); + } } case MIGR_VERIFY: @@ -2780,7 +3381,6 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev, int look_in_map); -#ifndef MDASSEMBLE static void manage_second_map(struct intel_super *super, struct imsm_dev *dev) { if (is_gen_migration(dev)) { @@ -2796,7 +3396,6 @@ static void manage_second_map(struct intel_super *super, struct imsm_dev *dev) } } } -#endif static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index) { @@ -2847,9 +3446,11 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * info->name[0] = 0; info->recovery_start = MaxSector; info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb); + info->bb.supported = 1; /* do we have the all the insync disks that we expect? */ mpb = super->anchor; + info->events = __le32_to_cpu(mpb->generation_num); for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); @@ -2898,7 +3499,7 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * */ max_enough = max(max_enough, enough); } - dprintf("%s: enough: %d\n", __func__, max_enough); + dprintf("enough: %d\n", max_enough); info->container_enough = max_enough; if (super->disks) { @@ -2913,7 +3514,8 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * * found the 'most fresh' version of the metadata */ info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0; - info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC); + info->disk.state |= (is_spare(disk) || is_journal(disk)) ? + 0 : (1 << MD_DISK_SYNC); } /* only call uuid_from_super_imsm when this disk is part of a populated container, @@ -2937,7 +3539,7 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * * for each disk in array */ struct mdinfo *getinfo_super_disks_imsm(struct supertype *st) { - struct mdinfo *mddev = NULL; + struct mdinfo *mddev; struct intel_super *super = st->sb; struct imsm_disk *disk; int count = 0; @@ -3051,6 +3653,8 @@ static size_t disks_to_mpb_size(int disks) size += (4 - 2) * sizeof(struct imsm_map); /* 4 possible disk_ord_tbl's */ size += 4 * (disks - 1) * sizeof(__u32); + /* maximum bbm log */ + size += sizeof(struct bbm_log); return size; } @@ -3207,23 +3811,40 @@ static void fd2devname(int fd, char *name) } } +static int nvme_get_serial(int fd, void *buf, size_t buf_len) +{ + char path[60]; + char *name = fd2kname(fd); + + if (!name) + return 1; + + if (strncmp(name, "nvme", 4) != 0) + return 1; + + snprintf(path, sizeof(path) - 1, "/sys/block/%s/device/serial", name); + + return load_sys(path, buf, buf_len); +} + extern int scsi_get_serial(int fd, void *buf, size_t buf_len); static int imsm_read_serial(int fd, char *devname, __u8 serial[MAX_RAID_SERIAL_LEN]) { - unsigned char scsi_serial[255]; + char buf[50]; int rv; - int rsp_len; int len; char *dest; char *src; - char *rsp_buf; - int i; + unsigned int i; + + memset(buf, 0, sizeof(buf)); - memset(scsi_serial, 0, sizeof(scsi_serial)); + rv = nvme_get_serial(fd, buf, sizeof(buf)); - rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial)); + if (rv) + rv = scsi_get_serial(fd, buf, sizeof(buf)); if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) { memset(serial, 0, MAX_RAID_SERIAL_LEN); @@ -3238,20 +3859,11 @@ static int imsm_read_serial(int fd, char *devname, return rv; } - rsp_len = scsi_serial[3]; - if (!rsp_len) { - if (devname) - pr_err("Failed to retrieve serial for %s\n", - devname); - return 2; - } - rsp_buf = (char *) &scsi_serial[4]; - /* trim all whitespace and non-printable characters and convert * ':' to ';' */ - for (i = 0, dest = rsp_buf; i < rsp_len; i++) { - src = &rsp_buf[i]; + for (i = 0, dest = buf; i < sizeof(buf) && buf[i]; i++) { + src = &buf[i]; if (*src > 0x20) { /* ':' is reserved for use in placeholder serial * numbers for missing disks @@ -3262,8 +3874,8 @@ static int imsm_read_serial(int fd, char *devname, *dest++ = *src; } } - len = dest - rsp_buf; - dest = rsp_buf; + len = dest - buf; + dest = buf; /* truncate leading characters */ if (len > MAX_RAID_SERIAL_LEN) { @@ -3358,14 +3970,13 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) */ if (is_failed(&dl->disk)) dl->index = -2; - else if (is_spare(&dl->disk)) + else if (is_spare(&dl->disk) || is_journal(&dl->disk)) dl->index = -1; } return 0; } -#ifndef MDASSEMBLE /* When migrating map0 contains the 'destination' state while map1 * contains the current state. When not migrating map0 contains the * current state. This routine assumes that map[0].map_state is set to @@ -3395,8 +4006,7 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super, /* duplicate and then set the target end state in map[0] */ memcpy(dest, src, sizeof_imsm_map(src)); - if ((migr_type == MIGR_REBUILD) || - (migr_type == MIGR_GEN_MIGR)) { + if (migr_type == MIGR_REBUILD || migr_type == MIGR_GEN_MIGR) { __u32 ord; int i; @@ -3426,8 +4036,8 @@ static void end_migration(struct imsm_dev *dev, struct intel_super *super, * * FIXME add support for raid-level-migration */ - if ((map_state != map->map_state) && (is_gen_migration(dev) == 0) && - (prev->map_state != IMSM_T_STATE_UNINITIALIZED)) { + if (map_state != map->map_state && (is_gen_migration(dev) == 0) && + prev->map_state != IMSM_T_STATE_UNINITIALIZED) { /* when final map state is other than expected * merge maps (not for migration) */ @@ -3454,7 +4064,6 @@ static void end_migration(struct imsm_dev *dev, struct intel_super *super, dev->vol.curr_migr_unit = 0; map->map_state = map_state; } -#endif static int parse_raid_devices(struct intel_super *super) { @@ -3493,8 +4102,9 @@ static int parse_raid_devices(struct intel_super *super) if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) { void *buf; - len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512); - if (posix_memalign(&buf, 512, len) != 0) + len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, + super->sector_size); + if (posix_memalign(&buf, MAX_SECTOR_SIZE, len) != 0) return 1; memcpy(buf, super->buf, super->len); @@ -3504,20 +4114,9 @@ static int parse_raid_devices(struct intel_super *super) super->len = len; } - return 0; -} - -/* retrieve a pointer to the bbm log which starts after all raid devices */ -struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb) -{ - void *ptr = NULL; + super->extra_space += space_needed; - if (__le32_to_cpu(mpb->bbm_log_size)) { - ptr = mpb; - ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size); - } - - return ptr; + return 0; } /******************************************************************************* @@ -3567,32 +4166,32 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) { unsigned long long dsize; unsigned long long sectors; + unsigned int sector_size = super->sector_size; struct stat; struct imsm_super *anchor; __u32 check_sum; get_dev_size(fd, NULL, &dsize); - if (dsize < 1024) { + if (dsize < 2*sector_size) { if (devname) pr_err("%s: device to small for imsm\n", devname); return 1; } - if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) { + if (lseek64(fd, dsize - (sector_size * 2), SEEK_SET) < 0) { if (devname) pr_err("Cannot seek to anchor block on %s: %s\n", devname, strerror(errno)); return 1; } - if (posix_memalign((void**)&anchor, 512, 512) != 0) { + if (posix_memalign((void **)&anchor, sector_size, sector_size) != 0) { if (devname) - pr_err("Failed to allocate imsm anchor buffer" - " on %s\n", devname); + pr_err("Failed to allocate imsm anchor buffer on %s\n", devname); return 1; } - if (read(fd, anchor, 512) != 512) { + if ((unsigned int)read(fd, anchor, sector_size) != sector_size) { if (devname) pr_err("Cannot read anchor block on %s: %s\n", devname, strerror(errno)); @@ -3612,21 +4211,22 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) /* capability and hba must be updated with new super allocation */ find_intel_hba_capability(fd, super, devname); - super->len = ROUND_UP(anchor->mpb_size, 512); - if (posix_memalign(&super->buf, 512, super->len) != 0) { + super->len = ROUND_UP(anchor->mpb_size, sector_size); + if (posix_memalign(&super->buf, MAX_SECTOR_SIZE, super->len) != 0) { if (devname) pr_err("unable to allocate %zu byte mpb buffer\n", super->len); free(anchor); return 2; } - memcpy(super->buf, anchor, 512); + memcpy(super->buf, anchor, sector_size); - sectors = mpb_sectors(anchor) - 1; + sectors = mpb_sectors(anchor, sector_size) - 1; free(anchor); - if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) { - pr_err("%s could not allocate migr_rec buffer\n", __func__); + if (posix_memalign(&super->migr_rec_buf, sector_size, + MIGR_REC_BUF_SECTORS*sector_size) != 0) { + pr_err("could not allocate migr_rec buffer\n"); free(super->buf); return 2; } @@ -3647,14 +4247,15 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) } /* read the extended mpb */ - if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) { + if (lseek64(fd, dsize - (sector_size * (2 + sectors)), SEEK_SET) < 0) { if (devname) pr_err("Cannot seek to extended mpb on %s: %s\n", devname, strerror(errno)); return 1; } - if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) { + if ((unsigned int)read(fd, super->buf + sector_size, + super->len - sector_size) != super->len - sector_size) { if (devname) pr_err("Cannot read extended mpb on %s: %s\n", devname, strerror(errno)); @@ -3670,12 +4271,6 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) return 3; } - /* FIXME the BBM log is disk specific so we cannot use this global - * buffer for all disks. Ok for now since we only look at the global - * bbm_log_size parameter to gate assembly - */ - super->bbm_log = __get_imsm_bbm_log(super->anchor); - return 0; } @@ -3715,10 +4310,15 @@ load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd err = load_imsm_mpb(fd, super, devname); if (err) return err; + if (super->sector_size == 4096) + convert_from_4k(super); err = load_imsm_disk(fd, super, devname, keep_fd); if (err) return err; err = parse_raid_devices(super); + if (err) + return err; + err = load_bbm_log(super); clear_hi(super); return err; } @@ -3783,12 +4383,15 @@ static void __free_imsm(struct intel_super *super, int free_disks) free(elem); elem = next; } + if (super->bbm_log) + free(super->bbm_log); super->hba = NULL; } static void free_imsm(struct intel_super *super) { __free_imsm(super, 1); + free(super->bb.entries); free(super); } @@ -3809,6 +4412,14 @@ static struct intel_super *alloc_super(void) super->current_vol = -1; super->create_offset = ~((unsigned long long) 0); + + super->bb.entries = xmalloc(BBM_LOG_MAX_ENTRIES * + sizeof(struct md_bb_entry)); + if (!super->bb.entries) { + free(super); + return NULL; + } + return super; } @@ -3820,7 +4431,7 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de struct sys_dev *hba_name; int rv = 0; - if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) { + if (fd < 0 || check_env("IMSM_NO_PLATFORM")) { super->orom = NULL; super->hba = NULL; return 0; @@ -3837,14 +4448,14 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de if (devname) { struct intel_hba *hba = super->hba; - pr_err("%s is attached to Intel(R) %s RAID " - "controller (%s),\n" - " but the container is assigned to Intel(R) " - "%s RAID controller (", + pr_err("%s is attached to Intel(R) %s %s (%s),\n" + " but the container is assigned to Intel(R) %s %s (", devname, get_sys_dev_type(hba_name->type), + hba_name->type == SYS_DEV_VMD ? "domain" : "RAID controller", hba_name->pci_id ? : "Err!", - get_sys_dev_type(super->hba->type)); + get_sys_dev_type(super->hba->type), + hba->type == SYS_DEV_VMD ? "domain" : "RAID controller"); while (hba) { fprintf(stderr, "%s", hba->pci_id ? : "Err!"); @@ -3853,8 +4464,8 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de hba = hba->next; } fprintf(stderr, ").\n" - " Mixing devices attached to different controllers " - "is not allowed.\n"); + " Mixing devices attached to different %s is not allowed.\n", + hba_name->type == SYS_DEV_VMD ? "VMD domains" : "controllers"); } return 2; } @@ -3898,7 +4509,6 @@ static int find_missing(struct intel_super *super) return 0; } -#ifndef MDASSEMBLE static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list) { struct intel_disk *idisk = disk_list; @@ -3926,8 +4536,8 @@ static int __prep_thunderdome(struct intel_super **table, int tbl_size, if (tbl_mpb->family_num == mpb->family_num) { if (tbl_mpb->check_sum == mpb->check_sum) { - dprintf("%s: mpb from %d:%d matches %d:%d\n", - __func__, super->disks->major, + dprintf("mpb from %d:%d matches %d:%d\n", + super->disks->major, super->disks->minor, table[i]->disks->major, table[i]->disks->minor); @@ -3944,8 +4554,8 @@ static int __prep_thunderdome(struct intel_super **table, int tbl_size, */ struct intel_disk *idisk; - dprintf("%s: mpb from %d:%d replaces %d:%d\n", - __func__, super->disks->major, + dprintf("mpb from %d:%d replaces %d:%d\n", + super->disks->major, super->disks->minor, table[i]->disks->major, table[i]->disks->minor); @@ -3973,8 +4583,8 @@ static int __prep_thunderdome(struct intel_super **table, int tbl_size, idisk->disk.status |= CONFIGURED_DISK; } - dprintf("%s: mpb from %d:%d prefer %d:%d\n", - __func__, super->disks->major, + dprintf("mpb from %d:%d prefer %d:%d\n", + super->disks->major, super->disks->minor, table[i]->disks->major, table[i]->disks->minor); @@ -4033,12 +4643,12 @@ validate_members(struct intel_super *super, struct intel_disk *disk_list, idisk->owner == IMSM_UNKNOWN_OWNER) ok_count++; else - dprintf("%s: '%.16s' owner %d != %d\n", - __func__, disk->serial, idisk->owner, + dprintf("'%.16s' owner %d != %d\n", + disk->serial, idisk->owner, owner); } else { - dprintf("%s: unknown disk %x [%d]: %.16s\n", - __func__, __le32_to_cpu(mpb->family_num), i, + dprintf("unknown disk %x [%d]: %.16s\n", + __le32_to_cpu(mpb->family_num), i, disk->serial); break; } @@ -4094,8 +4704,8 @@ imsm_thunderdome(struct intel_super **super_list, int len) s = NULL; if (!s) - dprintf("%s: marking family: %#x from %d:%d offline\n", - __func__, mpb->family_num, + dprintf("marking family: %#x from %d:%d offline\n", + mpb->family_num, super_table[i]->disks->major, super_table[i]->disks->minor); super_table[i] = s; @@ -4142,8 +4752,7 @@ imsm_thunderdome(struct intel_super **super_list, int len) champion = s; if (conflict) - pr_err("Chose family %#x on '%s', " - "assemble conflicts to new container with '--update=uuid'\n", + pr_err("Chose family %#x on '%s', assemble conflicts to new container with '--update=uuid'\n", __le32_to_cpu(s->anchor->family_num), s->disks->devname); /* collect all dl's onto 'champion', and update them to @@ -4264,7 +4873,7 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, } /* Check migration compatibility */ - if ((err == 0) && (check_mpb_migr_compatibility(super) != 0)) { + if (err == 0 && check_mpb_migr_compatibility(super) != 0) { pr_err("Unsupported migration detected"); if (devname) fprintf(stderr, " on %s\n", devname); @@ -4353,7 +4962,7 @@ get_devlist_super_block(struct md_list *devlist, struct intel_super **super_list static int get_super_block(struct intel_super **super_list, char *devnm, char *devname, int major, int minor, int keep_fd) { - struct intel_super*s = NULL; + struct intel_super *s; char nm[32]; int dfd = -1; int err = 0; @@ -4372,6 +4981,7 @@ static int get_super_block(struct intel_super **super_list, char *devnm, char *d goto error; } + get_dev_sector_size(dfd, NULL, &s->sector_size); find_intel_hba_capability(dfd, s, devname); err = load_and_parse_mpb(dfd, s, NULL, keep_fd); @@ -4389,11 +4999,11 @@ static int get_super_block(struct intel_super **super_list, char *devnm, char *d *super_list = s; } else { if (s) - free(s); + free_imsm(s); if (dfd >= 0) close(dfd); } - if ((dfd >= 0) && (!keep_fd)) + if (dfd >= 0 && !keep_fd) close(dfd); return err; @@ -4436,7 +5046,6 @@ static int load_container_imsm(struct supertype *st, int fd, char *devname) { return load_super_imsm_all(st, fd, &st->sb, devname, NULL, 1); } -#endif static int load_super_imsm(struct supertype *st, int fd, char *devname) { @@ -4451,13 +5060,16 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) free_super_imsm(st); super = alloc_super(); + get_dev_sector_size(fd, NULL, &super->sector_size); + if (!super) + return 1; /* Load hba and capabilities if they exist. * But do not preclude loading metadata in case capabilities or hba are * non-compliant and ignore_hw_compat is set. */ rv = find_intel_hba_capability(fd, super, devname); /* no orom/efi or non-intel hba of the disk */ - if ((rv != 0) && (st->ignore_hw_compat == 0)) { + if (rv != 0 && st->ignore_hw_compat == 0) { if (devname) pr_err("No OROM/EFI properties for %s\n", devname); free_imsm(super); @@ -4467,7 +5079,11 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) /* retry the load if we might have raced against mdmon */ if (rv == 3) { - struct mdstat_ent *mdstat = mdstat_by_component(fd2devnm(fd)); + struct mdstat_ent *mdstat = NULL; + char *name = fd2kname(fd); + + if (name) + mdstat = mdstat_by_component(name); if (mdstat && mdmon_running(mdstat->devnm) && getpid() != mdmon_pid(mdstat->devnm)) { for (retry = 0; retry < 3; retry++) { @@ -4483,8 +5099,7 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) if (rv) { if (devname) - pr_err("Failed to load all information " - "sections on %s\n", devname); + pr_err("Failed to load all information sections on %s\n", devname); free_imsm(super); return rv; } @@ -4600,7 +5215,7 @@ static int check_name(struct intel_super *super, char *name, int quiet) } static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, - unsigned long long size, char *name, + struct shape *s, char *name, char *homehost, int *uuid, long long data_offset) { @@ -4608,6 +5223,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, * so st->sb is already set. */ struct intel_super *super = st->sb; + unsigned int sector_size = super->sector_size; struct imsm_super *mpb = super->anchor; struct intel_dev *dv; struct imsm_dev *dev; @@ -4618,10 +5234,11 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, unsigned long long array_blocks; size_t size_old, size_new; unsigned long long num_data_stripes; + unsigned int data_disks; + unsigned long long size_per_member; if (super->orom && mpb->num_raid_devs >= super->orom->vpa) { - pr_err("This imsm-container already has the " - "maximum of %d volumes\n", super->orom->vpa); + pr_err("This imsm-container already has the maximum of %d volumes\n", super->orom->vpa); return 0; } @@ -4630,16 +5247,15 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, size_new = disks_to_mpb_size(info->nr_disks); if (size_new > size_old) { void *mpb_new; - size_t size_round = ROUND_UP(size_new, 512); + size_t size_round = ROUND_UP(size_new, sector_size); - if (posix_memalign(&mpb_new, 512, size_round) != 0) { + if (posix_memalign(&mpb_new, sector_size, size_round) != 0) { pr_err("could not allocate new mpb\n"); return 0; } - if (posix_memalign(&super->migr_rec_buf, 512, - MIGR_REC_BUF_SIZE) != 0) { - pr_err("%s could not allocate migr_rec buffer\n", - __func__); + if (posix_memalign(&super->migr_rec_buf, sector_size, + MIGR_REC_BUF_SECTORS*sector_size) != 0) { + pr_err("could not allocate migr_rec buffer\n"); free(super->buf); free(super); free(mpb_new); @@ -4651,6 +5267,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, super->anchor = mpb_new; mpb->mpb_size = __cpu_to_le32(size_new); memset(mpb_new + size_old, 0, size_round - size_old); + super->len = size_round; } super->current_vol = idx; @@ -4673,7 +5290,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, disk->status = CONFIGURED_DISK | FAILED_DISK; disk->scsi_id = __cpu_to_le32(~(__u32)0); snprintf((char *) disk->serial, MAX_RAID_SERIAL_LEN, - "missing:%d", i); + "missing:%d", (__u8)i); } find_missing(super); } else { @@ -4695,9 +5312,11 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN); array_blocks = calc_array_size(info->level, info->raid_disks, info->layout, info->chunk_size, - size * 2); - /* round array size down to closest MB */ - array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT; + s->size * BLOCKS_PER_KB); + data_disks = get_data_disks(info->level, info->layout, + info->raid_disks); + array_blocks = round_size_to_mb(array_blocks, data_disks); + size_per_member = array_blocks / data_disks; dev->size_low = __cpu_to_le32((__u32) array_blocks); dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32)); @@ -4709,7 +5328,9 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, vol->curr_migr_unit = 0; map = get_imsm_map(dev, MAP_0); set_pba_of_lba0(map, super->create_offset); - set_blocks_per_member(map, info_to_blocks_per_member(info, size)); + set_blocks_per_member(map, info_to_blocks_per_member(info, + size_per_member / + BLOCKS_PER_KB)); map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info)); map->failed_disk_num = ~0; if (info->level > 0) @@ -4723,8 +5344,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, if (info->level == 1 && info->raid_disks > 2) { free(dev); free(dv); - pr_err("imsm does not support more than 2 disks" - "in a raid1 volume\n"); + pr_err("imsm does not support more than 2 disksin a raid1 volume\n"); return 0; } @@ -4738,7 +5358,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, map->num_domains = 1; /* info->size is only int so use the 'size' parameter instead */ - num_data_stripes = (size * 2) / info_to_blocks_per_strip(info); + num_data_stripes = size_per_member / info_to_blocks_per_strip(info); num_data_stripes /= map->num_domains; set_num_data_stripes(map, num_data_stripes); @@ -4749,6 +5369,20 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, } mpb->num_raid_devs++; + if (s->consistency_policy == UnSet || + s->consistency_policy == CONSISTENCY_POLICY_RESYNC || + s->consistency_policy == CONSISTENCY_POLICY_NONE) { + dev->rwh_policy = RWH_OFF; + } else if (s->consistency_policy == CONSISTENCY_POLICY_PPL) { + dev->rwh_policy = RWH_DISTRIBUTED; + } else { + free(dev); + free(dv); + pr_err("imsm does not support consistency policy %s\n", + map_num(consistency_policies, s->consistency_policy)); + return 0; + } + dv->dev = dev; dv->index = super->current_vol; dv->next = super->devlist; @@ -4760,7 +5394,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, } static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, - unsigned long long size, char *name, + struct shape *s, char *name, char *homehost, int *uuid, unsigned long long data_offset) { @@ -4783,27 +5417,29 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, } if (st->sb) - return init_super_imsm_volume(st, info, size, name, homehost, uuid, + return init_super_imsm_volume(st, info, s, name, homehost, uuid, data_offset); if (info) mpb_size = disks_to_mpb_size(info->nr_disks); else - mpb_size = 512; + mpb_size = MAX_SECTOR_SIZE; super = alloc_super(); - if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) { - free(super); + if (super && + posix_memalign(&super->buf, MAX_SECTOR_SIZE, mpb_size) != 0) { + free_imsm(super); super = NULL; } if (!super) { - pr_err("%s could not allocate superblock\n", __func__); + pr_err("could not allocate superblock\n"); return 0; } - if (posix_memalign(&super->migr_rec_buf, 512, MIGR_REC_BUF_SIZE) != 0) { - pr_err("%s could not allocate migr_rec buffer\n", __func__); + if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE, + MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE) != 0) { + pr_err("could not allocate migr_rec buffer\n"); free(super->buf); - free(super); + free_imsm(super); return 0; } memset(super->buf, 0, mpb_size); @@ -4826,7 +5462,6 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info, return 1; } -#ifndef MDASSEMBLE static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, int fd, char *devname) { @@ -4907,8 +5542,7 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk, struct imsm_map *map2 = get_imsm_map(dev, MAP_1); int slot2 = get_imsm_disk_slot(map2, df->index); - if ((slot2 < map2->num_members) && - (slot2 >= 0)) { + if (slot2 < map2->num_members && slot2 >= 0) { __u32 ord2 = get_imsm_ord_tbl_ent(dev, slot2, MAP_1); @@ -4984,6 +5618,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, struct intel_super *super = st->sb; struct dl *dd; unsigned long long size; + unsigned int member_sector_size; __u32 id; int rv; struct stat stb; @@ -5015,11 +5650,85 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk, rv = imsm_read_serial(fd, devname, dd->serial); if (rv) { pr_err("failed to retrieve scsi serial, aborting\n"); + if (dd->devname) + free(dd->devname); free(dd); abort(); } + if (super->hba && ((super->hba->type == SYS_DEV_NVME) || + (super->hba->type == SYS_DEV_VMD))) { + int i; + char *devpath = diskfd_to_devpath(fd); + char controller_path[PATH_MAX]; + + if (!devpath) { + pr_err("failed to get devpath, aborting\n"); + if (dd->devname) + free(dd->devname); + free(dd); + return 1; + } + + snprintf(controller_path, PATH_MAX-1, "%s/device", devpath); + free(devpath); + + if (devpath_to_vendor(controller_path) == 0x8086) { + /* + * If Intel's NVMe drive has serial ended with + * "-A","-B","-1" or "-2" it means that this is "x8" + * device (double drive on single PCIe card). + * User should be warned about potential data loss. + */ + for (i = MAX_RAID_SERIAL_LEN-1; i > 0; i--) { + /* Skip empty character at the end */ + if (dd->serial[i] == 0) + continue; + + if (((dd->serial[i] == 'A') || + (dd->serial[i] == 'B') || + (dd->serial[i] == '1') || + (dd->serial[i] == '2')) && + (dd->serial[i-1] == '-')) + pr_err("\tThe action you are about to take may put your data at risk.\n" + "\tPlease note that x8 devices may consist of two separate x4 devices " + "located on a single PCIe port.\n" + "\tRAID 0 is the only supported configuration for this type of x8 device.\n"); + break; + } + } else if (super->hba->type == SYS_DEV_VMD && super->orom && + !imsm_orom_has_tpv_support(super->orom)) { + pr_err("\tPlatform configuration does not support non-Intel NVMe drives.\n" + "\tPlease refer to Intel(R) RSTe user guide.\n"); + free(dd->devname); + free(dd); + return 1; + } + } get_dev_size(fd, NULL, &size); + get_dev_sector_size(fd, NULL, &member_sector_size); + + if (super->sector_size == 0) { + /* this a first device, so sector_size is not set yet */ + super->sector_size = member_sector_size; + } else if (member_sector_size != super->sector_size) { + pr_err("Mixing between different sector size is forbidden, aborting...\n"); + if (dd->devname) + free(dd->devname); + free(dd); + return 1; + } + + /* clear migr_rec when adding disk to container */ + memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*super->sector_size); + if (lseek64(fd, size - MIGR_REC_SECTOR_POSITION*super->sector_size, + SEEK_SET) >= 0) { + if ((unsigned int)write(fd, super->migr_rec_buf, + MIGR_REC_BUF_SECTORS*super->sector_size) != + MIGR_REC_BUF_SECTORS*super->sector_size) + perror("Write migr_rec failed"); + } + size /= 512; serialcpy(dd->disk.serial, dd->serial); set_total_blocks(&dd->disk, size); @@ -5055,8 +5764,7 @@ static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk) * is prepared. */ if (!st->update_tail) { - pr_err("%s shall be used in mdmon context only" - "(line %d).\n", __func__, __LINE__); + pr_err("shall be used in mdmon context only\n"); return 1; } dd = xcalloc(1, sizeof(*dd)); @@ -5075,9 +5783,9 @@ static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk) static int store_imsm_mpb(int fd, struct imsm_super *mpb); static union { - char buf[512]; + char buf[MAX_SECTOR_SIZE]; struct imsm_super anchor; -} spare_record __attribute__ ((aligned(512))); +} spare_record __attribute__ ((aligned(MAX_SECTOR_SIZE))); /* spare records have their own family number and do not have any defined raid * devices @@ -5089,13 +5797,13 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose) __u32 sum; struct dl *d; - spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)), - spare->generation_num = __cpu_to_le32(1UL), + spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)); + spare->generation_num = __cpu_to_le32(1UL); spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY; - spare->num_disks = 1, - spare->num_raid_devs = 0, - spare->cache_size = mpb->cache_size, - spare->pwr_cycle_count = __cpu_to_le32(1), + spare->num_disks = 1; + spare->num_raid_devs = 0; + spare->cache_size = mpb->cache_size; + spare->pwr_cycle_count = __cpu_to_le32(1); snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH, MPB_SIGNATURE MPB_VERSION_RAID0); @@ -5108,6 +5816,9 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose) if (__le32_to_cpu(d->disk.total_blocks_hi) > 0) spare->attributes |= MPB_ATTRIB_2TB_DISK; + if (super->sector_size == 4096) + convert_to_4k_imsm_disk(&spare->disk[0]); + sum = __gen_imsm_checksum(spare); spare->family_num = __cpu_to_le32(sum); spare->orig_family_num = 0; @@ -5115,8 +5826,8 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose) spare->check_sum = __cpu_to_le32(sum); if (store_imsm_mpb(d->fd, spare)) { - pr_err("%s: failed for device %d:%d %s\n", - __func__, d->major, d->minor, strerror(errno)); + pr_err("failed for device %d:%d %s\n", + d->major, d->minor, strerror(errno)); return 1; } if (doclose) { @@ -5131,6 +5842,7 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose) static int write_super_imsm(struct supertype *st, int doclose) { struct intel_super *super = st->sb; + unsigned int sector_size = super->sector_size; struct imsm_super *mpb = super->anchor; struct dl *d; __u32 generation; @@ -5140,6 +5852,7 @@ static int write_super_imsm(struct supertype *st, int doclose) __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk); int num_disks = 0; int clear_migration_record = 1; + __u32 bbm_log_size; /* 'generation' is incremented everytime the metadata is written */ generation = __le32_to_cpu(mpb->generation_num); @@ -5177,9 +5890,23 @@ static int write_super_imsm(struct supertype *st, int doclose) if (is_gen_migration(dev2)) clear_migration_record = 0; } - mpb_size += __le32_to_cpu(mpb->bbm_log_size); + + bbm_log_size = get_imsm_bbm_log_size(super->bbm_log); + + if (bbm_log_size) { + memcpy((void *)mpb + mpb_size, super->bbm_log, bbm_log_size); + mpb->attributes |= MPB_ATTRIB_BBM; + } else + mpb->attributes &= ~MPB_ATTRIB_BBM; + + super->anchor->bbm_log_size = __cpu_to_le32(bbm_log_size); + mpb_size += bbm_log_size; mpb->mpb_size = __cpu_to_le32(mpb_size); +#ifdef DEBUG + assert(super->len == 0 || mpb_size <= super->len); +#endif + /* recalculate checksum */ sum = __gen_imsm_checksum(mpb); mpb->check_sum = __cpu_to_le32(sum); @@ -5189,7 +5916,11 @@ static int write_super_imsm(struct supertype *st, int doclose) super->clean_migration_record_by_mdmon = 0; } if (clear_migration_record) - memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SIZE); + memset(super->migr_rec_buf, 0, + MIGR_REC_BUF_SECTORS*sector_size); + + if (sector_size == 4096) + convert_to_4k(super); /* write the mpb for disks that compose raid devices */ for (d = super->disks; d ; d = d->next) { @@ -5200,17 +5931,20 @@ static int write_super_imsm(struct supertype *st, int doclose) unsigned long long dsize; get_dev_size(d->fd, NULL, &dsize); - if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) { - if (write(d->fd, super->migr_rec_buf, - MIGR_REC_BUF_SIZE) != MIGR_REC_BUF_SIZE) + if (lseek64(d->fd, dsize - sector_size, + SEEK_SET) >= 0) { + if ((unsigned int)write(d->fd, + super->migr_rec_buf, + MIGR_REC_BUF_SECTORS*sector_size) != + MIGR_REC_BUF_SECTORS*sector_size) perror("Write migr_rec failed"); } } if (store_imsm_mpb(d->fd, mpb)) fprintf(stderr, - "%s: failed for device %d:%d (fd: %d)%s\n", - __func__, d->major, d->minor, + "failed for device %d:%d (fd: %d)%s\n", + d->major, d->minor, d->fd, strerror(errno)); if (doclose) { @@ -5273,10 +6007,142 @@ static int mgmt_disk(struct supertype *st) return 0; } +__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len); + +static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd) +{ + struct intel_super *super = st->sb; + void *buf; + struct ppl_header *ppl_hdr; + int ret; + + ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE); + if (ret) { + pr_err("Failed to allocate PPL header buffer\n"); + return ret; + } + + memset(buf, 0, PPL_HEADER_SIZE); + ppl_hdr = buf; + memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED); + ppl_hdr->signature = __cpu_to_le32(super->anchor->orig_family_num); + ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE)); + + if (lseek64(fd, info->ppl_sector * 512, SEEK_SET) < 0) { + ret = errno; + perror("Failed to seek to PPL header location"); + } + + if (!ret && write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) { + ret = errno; + perror("Write PPL header failed"); + } + + if (!ret) + fsync(fd); + + free(buf); + return ret; +} + +static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info, + struct mdinfo *disk) +{ + struct intel_super *super = st->sb; + struct dl *d; + void *buf; + int ret = 0; + struct ppl_header *ppl_hdr; + __u32 crc; + struct imsm_dev *dev; + struct imsm_map *map; + __u32 idx; + + if (disk->disk.raid_disk < 0) + return 0; + + if (posix_memalign(&buf, 4096, PPL_HEADER_SIZE)) { + pr_err("Failed to allocate PPL header buffer\n"); + return -1; + } + + dev = get_imsm_dev(super, info->container_member); + map = get_imsm_map(dev, MAP_X); + idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_X); + d = get_imsm_dl_disk(super, idx); + + if (!d || d->index < 0 || is_failed(&d->disk)) + goto out; + + if (lseek64(d->fd, info->ppl_sector * 512, SEEK_SET) < 0) { + perror("Failed to seek to PPL header location"); + ret = -1; + goto out; + } + + if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) { + perror("Read PPL header failed"); + ret = -1; + goto out; + } + + ppl_hdr = buf; + + crc = __le32_to_cpu(ppl_hdr->checksum); + ppl_hdr->checksum = 0; + + if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) { + dprintf("Wrong PPL header checksum on %s\n", + d->devname); + ret = 1; + } + + if (!ret && (__le32_to_cpu(ppl_hdr->signature) != + super->anchor->orig_family_num)) { + dprintf("Wrong PPL header signature on %s\n", + d->devname); + ret = 1; + } + +out: + free(buf); + + if (ret == 1 && map->map_state == IMSM_T_STATE_UNINITIALIZED) + return st->ss->write_init_ppl(st, info, d->fd); + + return ret; +} + +static int write_init_ppl_imsm_all(struct supertype *st, struct mdinfo *info) +{ + struct intel_super *super = st->sb; + struct dl *d; + int ret = 0; + + if (info->consistency_policy != CONSISTENCY_POLICY_PPL || + info->array.level != 5) + return 0; + + for (d = super->disks; d ; d = d->next) { + if (d->index < 0 || is_failed(&d->disk)) + continue; + + ret = st->ss->write_init_ppl(st, info, d->fd); + if (ret) + break; + } + + return ret; +} + static int write_init_super_imsm(struct supertype *st) { struct intel_super *super = st->sb; int current_vol = super->current_vol; + int rv = 0; + struct mdinfo info; + + getinfo_super_imsm(st, &info, NULL); /* we are done with current_vol reset it to point st at the container */ super->current_vol = -1; @@ -5284,26 +6150,30 @@ static int write_init_super_imsm(struct supertype *st) if (st->update_tail) { /* queue the recently created array / added disk * as a metadata update */ - int rv; /* determine if we are creating a volume or adding a disk */ if (current_vol < 0) { /* in the mgmt (add/remove) disk case we are running * in mdmon context, so don't close fd's */ - return mgmt_disk(st); - } else - rv = create_array(st, current_vol); - - return rv; + rv = mgmt_disk(st); + } else { + rv = write_init_ppl_imsm_all(st, &info); + if (!rv) + rv = create_array(st, current_vol); + } } else { struct dl *d; for (d = super->disks; d; d = d->next) Kill(d->devname, NULL, 0, -1, 1); - return write_super_imsm(st, 1); + if (current_vol >= 0) + rv = write_init_ppl_imsm_all(st, &info); + if (!rv) + rv = write_super_imsm(st, 1); } + + return rv; } -#endif static int store_super_imsm(struct supertype *st, int fd) { @@ -5313,19 +6183,11 @@ static int store_super_imsm(struct supertype *st, int fd) if (!mpb) return 1; -#ifndef MDASSEMBLE + if (super->sector_size == 4096) + convert_to_4k(super); return store_imsm_mpb(fd, mpb); -#else - return 1; -#endif -} - -static int imsm_bbm_log_size(struct imsm_super *mpb) -{ - return __le32_to_cpu(mpb->bbm_log_size); } -#ifndef MDASSEMBLE static int validate_geometry_imsm_container(struct supertype *st, int level, int layout, int raiddisks, int chunk, unsigned long long size, @@ -5336,7 +6198,7 @@ static int validate_geometry_imsm_container(struct supertype *st, int level, { int fd; unsigned long long ldsize; - struct intel_super *super=NULL; + struct intel_super *super; int rv = 0; if (level != LEVEL_CONTAINER) @@ -5360,12 +6222,22 @@ static int validate_geometry_imsm_container(struct supertype *st, int level, * note that there is no fd for the disks in array. */ super = alloc_super(); + if (!super) { + close(fd); + return 0; + } + if (!get_dev_sector_size(fd, NULL, &super->sector_size)) { + close(fd); + free_imsm(super); + return 0; + } + rv = find_intel_hba_capability(fd, super, verbose > 0 ? dev : NULL); if (rv != 0) { #if DEBUG char str[256]; fd2devname(fd, str); - dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n", + dprintf("fd: %d %s orom: %p rv: %d raiddisk: %d\n", fd, str, super->orom, rv, raiddisks); #endif /* no orom/efi or non-intel hba of the disk */ @@ -5377,8 +6249,7 @@ static int validate_geometry_imsm_container(struct supertype *st, int level, if (super->orom) { if (raiddisks > super->orom->tds) { if (verbose) - pr_err("%d exceeds maximum number of" - " platform supported disks: %d\n", + pr_err("%d exceeds maximum number of platform supported disks: %d\n", raiddisks, super->orom->tds); free_imsm(super); return 0; @@ -5530,10 +6401,10 @@ active_arrays_by_format(char *name, char* hba, struct md_list **devlist, int dpa, int verbose) { struct mdstat_ent *mdstat = mdstat_read(0, 0); - struct mdstat_ent *memb = NULL; + struct mdstat_ent *memb; int count = 0; int num = 0; - struct md_list *dv = NULL; + struct md_list *dv; int found; for (memb = mdstat ; memb ; memb = memb->next) { @@ -5549,28 +6420,28 @@ active_arrays_by_format(char *name, char* hba, struct md_list **devlist, num = sprintf(path, "%s%s", "/dev/", dev->name); if (num > 0) fd = open(path, O_RDONLY, 0); - if ((num <= 0) || (fd < 0)) { - pr_vrb(": Cannot open %s: %s\n", + if (num <= 0 || fd < 0) { + pr_vrb("Cannot open %s: %s\n", dev->name, strerror(errno)); } free(path); dev = dev->next; } found = 0; - if ((fd >= 0) && disk_attached_to_hba(fd, hba)) { + if (fd >= 0 && disk_attached_to_hba(fd, hba)) { struct mdstat_ent *vol; for (vol = mdstat ; vol ; vol = vol->next) { - if ((vol->active > 0) && + if (vol->active > 0 && vol->metadata_version && - is_container_member(vol, memb->dev)) { + is_container_member(vol, memb->devnm)) { found++; count++; } } if (*devlist && (found < dpa)) { dv = xcalloc(1, sizeof(*dv)); - dv->devname = xmalloc(strlen(memb->dev) + strlen("/dev/") + 1); - sprintf(dv->devname, "%s%s", "/dev/", memb->dev); + dv->devname = xmalloc(strlen(memb->devnm) + strlen("/dev/") + 1); + sprintf(dv->devname, "%s%s", "/dev/", memb->devnm); dv->found = found; dv->used = 0; dv->next = *devlist; @@ -5591,7 +6462,7 @@ get_loop_devices(void) { int i; struct md_list *devlist = NULL; - struct md_list *dv = NULL; + struct md_list *dv; for(i = 0; i < 12; i++) { dv = xcalloc(1, sizeof(*dv)); @@ -5608,7 +6479,7 @@ static struct md_list* get_devices(const char *hba_path) { struct md_list *devlist = NULL; - struct md_list *dv = NULL; + struct md_list *dv; struct dirent *ent; DIR *dir; int err = 0; @@ -5671,7 +6542,7 @@ count_volumes_list(struct md_list *devlist, char *homehost, { struct md_list *tmpdev; int count = 0; - struct supertype *st = NULL; + struct supertype *st; /* first walk the list of devices to find a consistent set * that match the criterea, if that is possible. @@ -5680,7 +6551,7 @@ count_volumes_list(struct md_list *devlist, char *homehost, *found = 0; st = match_metadata_desc_imsm("imsm"); if (st == NULL) { - pr_vrb(": cannot allocate memory for imsm supertype\n"); + pr_vrb("cannot allocate memory for imsm supertype\n"); return 0; } @@ -5693,33 +6564,33 @@ count_volumes_list(struct md_list *devlist, char *homehost, continue; tst = dup_super(st); if (tst == NULL) { - pr_vrb(": cannot allocate memory for imsm supertype\n"); + pr_vrb("cannot allocate memory for imsm supertype\n"); goto err_1; } tmpdev->container = 0; dfd = dev_open(devname, O_RDONLY|O_EXCL); if (dfd < 0) { - dprintf(": cannot open device %s: %s\n", + dprintf("cannot open device %s: %s\n", devname, strerror(errno)); tmpdev->used = 2; } else if (fstat(dfd, &stb)< 0) { /* Impossible! */ - dprintf(": fstat failed for %s: %s\n", + dprintf("fstat failed for %s: %s\n", devname, strerror(errno)); tmpdev->used = 2; } else if ((stb.st_mode & S_IFMT) != S_IFBLK) { - dprintf(": %s is not a block device.\n", + dprintf("%s is not a block device.\n", devname); tmpdev->used = 2; } else if (must_be_container(dfd)) { struct supertype *cst; cst = super_by_fd(dfd, NULL); if (cst == NULL) { - dprintf(": cannot recognize container type %s\n", + dprintf("cannot recognize container type %s\n", devname); tmpdev->used = 2; } else if (tst->ss != st->ss) { - dprintf(": non-imsm container - ignore it: %s\n", + dprintf("non-imsm container - ignore it: %s\n", devname); tmpdev->used = 2; } else if (!tst->ss->load_container || @@ -5733,11 +6604,11 @@ count_volumes_list(struct md_list *devlist, char *homehost, } else { tmpdev->st_rdev = stb.st_rdev; if (tst->ss->load_super(tst,dfd, NULL)) { - dprintf(": no RAID superblock on %s\n", + dprintf("no RAID superblock on %s\n", devname); tmpdev->used = 2; } else if (tst->ss->compare_super == NULL) { - dprintf(": Cannot assemble %s metadata on %s\n", + dprintf("Cannot assemble %s metadata on %s\n", tst->ss->name, devname); tmpdev->used = 2; } @@ -5774,7 +6645,7 @@ count_volumes_list(struct md_list *devlist, char *homehost, * Or, if we are auto assembling, we just ignore the second * for now. */ - dprintf(": superblock on %s doesn't match others - assembly aborted\n", + dprintf("superblock on %s doesn't match others - assembly aborted\n", devname); goto loop; } @@ -5796,7 +6667,7 @@ count_volumes_list(struct md_list *devlist, char *homehost, if (iter->array.state & (1<text_version); } else count++; @@ -5804,15 +6675,15 @@ count_volumes_list(struct md_list *devlist, char *homehost, sysfs_free(head); } else { - dprintf(" no valid super block on device list: err: %d %p\n", + dprintf("No valid super block on device list: err: %d %p\n", err, st->sb); } } else { - dprintf(" no more devices to examin\n"); + dprintf("no more devices to examine\n"); } for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) { - if ((tmpdev->used == 1) && (tmpdev->found)) { + if (tmpdev->used == 1 && tmpdev->found) { if (count) { if (count < tmpdev->found) count = 0; @@ -5829,42 +6700,96 @@ count_volumes_list(struct md_list *devlist, char *homehost, return count; } -static int -count_volumes(char *hba, int dpa, int verbose) +static int __count_volumes(char *hba_path, int dpa, int verbose, + int cmp_hba_path) { - struct md_list *devlist = NULL; + struct sys_dev *idev, *intel_devices = find_intel_devices(); int count = 0; - int found = 0;; + const struct orom_entry *entry; + struct devid_list *dv, *devid_list; + + if (!hba_path) + return 0; - devlist = get_devices(hba); - /* if no intel devices return zero volumes */ - if (devlist == NULL) + for (idev = intel_devices; idev; idev = idev->next) { + if (strstr(idev->path, hba_path)) + break; + } + + if (!idev || !idev->dev_id) return 0; - count = active_arrays_by_format("imsm", hba, &devlist, dpa, verbose); - dprintf(" path: %s active arrays: %d\n", hba, count); - if (devlist == NULL) + entry = get_orom_entry_by_device_id(idev->dev_id); + + if (!entry || !entry->devid_list) return 0; - do { - found = 0; - count += count_volumes_list(devlist, - NULL, - verbose, - &found); - dprintf("found %d count: %d\n", found, count); - } while (found); - - dprintf("path: %s total number of volumes: %d\n", hba, count); - - while(devlist) { - struct md_list *dv = devlist; - devlist = devlist->next; - free(dv->devname); - free(dv); + + devid_list = entry->devid_list; + for (dv = devid_list; dv; dv = dv->next) { + struct md_list *devlist; + struct sys_dev *device = NULL; + char *hpath; + int found = 0; + + if (cmp_hba_path) + device = device_by_id_and_path(dv->devid, hba_path); + else + device = device_by_id(dv->devid); + + if (device) + hpath = device->path; + else + return 0; + + devlist = get_devices(hpath); + /* if no intel devices return zero volumes */ + if (devlist == NULL) + return 0; + + count += active_arrays_by_format("imsm", hpath, &devlist, dpa, + verbose); + dprintf("path: %s active arrays: %d\n", hpath, count); + if (devlist == NULL) + return 0; + do { + found = 0; + count += count_volumes_list(devlist, + NULL, + verbose, + &found); + dprintf("found %d count: %d\n", found, count); + } while (found); + + dprintf("path: %s total number of volumes: %d\n", hpath, count); + + while (devlist) { + struct md_list *dv = devlist; + devlist = devlist->next; + free(dv->devname); + free(dv); + } } return count; } +static int count_volumes(struct intel_hba *hba, int dpa, int verbose) +{ + if (!hba) + return 0; + if (hba->type == SYS_DEV_VMD) { + struct sys_dev *dev; + int count = 0; + + for (dev = find_intel_devices(); dev; dev = dev->next) { + if (dev->type == SYS_DEV_VMD) + count += __count_volumes(dev->path, dpa, + verbose, 1); + } + return count; + } + return __count_volumes(hba->path, dpa, verbose, 0); +} + static int imsm_default_chunk(const struct imsm_orom *orom) { /* up to 512 if the plaform supports it, otherwise the platform max. @@ -5881,14 +6806,14 @@ validate_geometry_imsm_orom(struct intel_super *super, int level, int layout, { /* check/set platform and metadata limits/defaults */ if (super->orom && raiddisks > super->orom->dpa) { - pr_vrb(": platform supports a maximum of %d disks per array\n", + pr_vrb("platform supports a maximum of %d disks per array\n", super->orom->dpa); return 0; } /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */ if (!is_raid_level_supported(super->orom, level, raiddisks)) { - pr_vrb(": platform does not support raid%d with %d disk%s\n", + pr_vrb("platform does not support raid%d with %d disk%s\n", level, raiddisks, raiddisks > 1 ? "s" : ""); return 0; } @@ -5897,25 +6822,24 @@ validate_geometry_imsm_orom(struct intel_super *super, int level, int layout, *chunk = imsm_default_chunk(super->orom); if (super->orom && !imsm_orom_has_chunk(super->orom, *chunk)) { - pr_vrb(": platform does not support a chunk size of: " - "%d\n", *chunk); + pr_vrb("platform does not support a chunk size of: %d\n", *chunk); return 0; } if (layout != imsm_level_to_layout(level)) { if (level == 5) - pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n"); + pr_vrb("imsm raid 5 only supports the left-asymmetric layout\n"); else if (level == 10) - pr_vrb(": imsm raid 10 only supports the n2 layout\n"); + pr_vrb("imsm raid 10 only supports the n2 layout\n"); else - pr_vrb(": imsm unknown layout %#x for this raid level %d\n", + pr_vrb("imsm unknown layout %#x for this raid level %d\n", layout, level); return 0; } if (super->orom && (super->orom->attr & IMSM_OROM_ATTR_2TB) == 0 && (calc_array_size(level, raiddisks, layout, *chunk, size) >> 32) > 0) { - pr_vrb(": platform does not support a volume size over 2TB\n"); + pr_vrb("platform does not support a volume size over 2TB\n"); return 0; } @@ -5949,8 +6873,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, mpb = super->anchor; if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, size, verbose)) { - pr_err("RAID gemetry validation failed. " - "Cannot proceed with the action(s).\n"); + pr_err("RAID gemetry validation failed. Cannot proceed with the action(s).\n"); return 0; } if (!dev) { @@ -5991,9 +6914,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, } if (dcnt < raiddisks) { if (verbose) - pr_err("imsm: Not enough " - "devices with space for this array " - "(%d < %d)\n", + pr_err("imsm: Not enough devices with space for this array (%d < %d)\n", dcnt, raiddisks); return 0; } @@ -6012,8 +6933,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, } if (!dl) { if (verbose) - pr_err("%s is not in the " - "same imsm set\n", dev); + pr_err("%s is not in the same imsm set\n", dev); return 0; } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) { /* If a volume is present then the current creation attempt @@ -6021,15 +6941,12 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, * understand this configuration (all member disks must be * members of each array in the container). */ - pr_err("%s is a spare and a volume" - " is already defined for this container\n", dev); - pr_err("The option-rom requires all member" - " disks to be a member of all volumes\n"); + pr_err("%s is a spare and a volume is already defined for this container\n", dev); + pr_err("The option-rom requires all member disks to be a member of all volumes\n"); return 0; } else if (super->orom && mpb->num_raid_devs > 0 && mpb->num_disks != raiddisks) { - pr_err("The option-rom requires all member" - " disks to be a member of all volumes\n"); + pr_err("The option-rom requires all member disks to be a member of all volumes\n"); return 0; } @@ -6072,21 +6989,16 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, if (!check_env("IMSM_NO_PLATFORM") && mpb->num_raid_devs > 0 && size && size != maxsize) { - pr_err("attempting to create a second " - "volume with size less then remaining space. " - "Aborting...\n"); + pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n"); return 0; } if (maxsize < size || maxsize == 0) { if (verbose) { if (maxsize == 0) - pr_err("no free space" - " left on device. Aborting...\n"); + pr_err("no free space left on device. Aborting...\n"); else - pr_err("not enough space" - " to create volume of given size" - " (%llu < %llu). Aborting...\n", + pr_err("not enough space to create volume of given size (%llu < %llu). Aborting...\n", maxsize, size); } return 0; @@ -6095,10 +7007,10 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, *freesize = maxsize; if (super->orom) { - int count = count_volumes(super->hba->path, + int count = count_volumes(super->hba, super->orom->dpa, verbose); if (super->orom->vphba <= count) { - pr_vrb(": platform does not support more than %d raid volumes.\n", + pr_vrb("platform does not support more than %d raid volumes.\n", super->orom->vphba); return 0; } @@ -6172,9 +7084,7 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks, } if (!check_env("IMSM_NO_PLATFORM") && mpb->num_raid_devs > 0 && size && size != maxsize) { - pr_err("attempting to create a second " - "volume with size less then remaining space. " - "Aborting...\n"); + pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n"); return 0; } cnt = 0; @@ -6214,7 +7124,7 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, int raiddisks, int *chunk, unsigned long long size, unsigned long long data_offset, char *dev, unsigned long long *freesize, - int verbose) + int consistency_policy, int verbose) { int fd, cfd; struct mdinfo *sra; @@ -6253,11 +7163,10 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, created */ if (super->orom && freesize) { int count; - count = count_volumes(super->hba->path, + count = count_volumes(super->hba, super->orom->dpa, verbose); if (super->orom->vphba <= count) { - pr_vrb(": platform does not support more" - " than %d raid volumes.\n", + pr_vrb("platform does not support more than %d raid volumes.\n", super->orom->vphba); return 0; } @@ -6444,26 +7353,55 @@ static int update_subarray_imsm(struct supertype *st, char *subarray, u->type = update_rename_array; u->dev_idx = vol; - snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name); + strncpy((char *) u->name, name, MAX_RAID_SERIAL_LEN); + u->name[MAX_RAID_SERIAL_LEN-1] = '\0'; append_metadata_update(st, u, sizeof(*u)); } else { struct imsm_dev *dev; int i; dev = get_imsm_dev(super, vol); - snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name); + strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN); + dev->volume[MAX_RAID_SERIAL_LEN-1] = '\0'; for (i = 0; i < mpb->num_raid_devs; i++) { dev = get_imsm_dev(super, i); handle_missing(super, dev); } super->updates_pending++; } + } else if (strcmp(update, "ppl") == 0 || + strcmp(update, "no-ppl") == 0) { + int new_policy; + char *ep; + int vol = strtoul(subarray, &ep, 10); + + if (*ep != '\0' || vol >= super->anchor->num_raid_devs) + return 2; + + if (strcmp(update, "ppl") == 0) + new_policy = RWH_DISTRIBUTED; + else + new_policy = RWH_OFF; + + if (st->update_tail) { + struct imsm_update_rwh_policy *u = xmalloc(sizeof(*u)); + + u->type = update_rwh_policy; + u->dev_idx = vol; + u->new_policy = new_policy; + append_metadata_update(st, u, sizeof(*u)); + } else { + struct imsm_dev *dev; + + dev = get_imsm_dev(super, vol); + dev->rwh_policy = new_policy; + super->updates_pending++; + } } else return 2; return 0; } -#endif /* MDASSEMBLE */ static int is_gen_migration(struct imsm_dev *dev) { @@ -6497,7 +7435,6 @@ static int is_rebuilding(struct imsm_dev *dev) return 0; } -#ifndef MDASSEMBLE static int is_initializing(struct imsm_dev *dev) { struct imsm_map *migr_map; @@ -6515,7 +7452,6 @@ static int is_initializing(struct imsm_dev *dev) return 0; } -#endif static void update_recovery_start(struct intel_super *super, struct imsm_dev *dev, @@ -6541,7 +7477,7 @@ static void update_recovery_start(struct intel_super *super, * IMSM_ORD_REBUILD, so assume they are missing and the * disk_ord_tbl was not correctly updated */ - dprintf("%s: failed to locate out-of-sync disk\n", __func__); + dprintf("failed to locate out-of-sync disk\n"); return; } @@ -6549,9 +7485,7 @@ static void update_recovery_start(struct intel_super *super, rebuild->recovery_start = units * blocks_per_migr_unit(super, dev); } -#ifndef MDASSEMBLE static int recover_backup_imsm(struct supertype *st, struct mdinfo *info); -#endif static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray) { @@ -6575,15 +7509,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra /* do not assemble arrays when not all attributes are supported */ if (imsm_check_attributes(mpb->attributes) == 0) { sb_errors = 1; - pr_err("Unsupported attributes in IMSM metadata." - "Arrays activation is blocked.\n"); - } - - /* check for bad blocks */ - if (imsm_bbm_log_size(super->anchor)) { - pr_err("BBM log found in IMSM metadata." - "Arrays activation is blocked.\n"); - sb_errors = 1; + pr_err("Unsupported attributes in IMSM metadata.Arrays activation is blocked.\n"); } /* count spare devices, not used in maps @@ -6598,9 +7524,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra struct imsm_map *map2; struct mdinfo *this; int slot; -#ifndef MDASSEMBLE int chunk; -#endif char *ep; if (subarray && @@ -6616,8 +7540,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra */ if (dev->vol.migr_state && (migr_type(dev) == MIGR_STATE_CHANGE)) { - pr_err("cannot assemble volume '%.16s':" - " unsupported migration in progress\n", + pr_err("cannot assemble volume '%.16s': unsupported migration in progress\n", dev->volume); continue; } @@ -6630,7 +7553,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra super->current_vol = i; getinfo_super_imsm_volume(st, this, NULL); this->next = rest; -#ifndef MDASSEMBLE chunk = __le16_to_cpu(map->blocks_per_strip) >> 1; /* mdadm does not support all metadata features- set the bit in all arrays state */ if (!validate_geometry_imsm_orom(super, @@ -6639,14 +7561,12 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra map->num_members, /* raid disks */ &chunk, join_u32(dev->size_low, dev->size_high), 1 /* verbose */)) { - pr_err("IMSM RAID geometry validation" - " failed. Array %s activation is blocked.\n", + pr_err("IMSM RAID geometry validation failed. Array %s activation is blocked.\n", dev->volume); this->array.state |= (1<vol.dirty) + if ((skip || recovery_start == 0) && + !(dev->vol.dirty & RAIDVOL_DIRTY)) this->resync_start = MaxSector; if (skip) continue; @@ -6714,17 +7635,31 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra info_d->events = __le32_to_cpu(mpb->generation_num); info_d->data_offset = pba_of_lba0(map); - info_d->component_size = blocks_per_member(map); + + if (map->raid_level == 5) { + info_d->component_size = + num_data_stripes(map) * + map->blocks_per_strip; + info_d->ppl_sector = this->ppl_sector; + info_d->ppl_size = this->ppl_size; + } else { + info_d->component_size = blocks_per_member(map); + } + info_d->consistency_policy = this->consistency_policy; + + info_d->bb.supported = 1; + get_volume_badblocks(super->bbm_log, ord_to_idx(ord), + info_d->data_offset, + info_d->component_size, + &info_d->bb); } /* now that the disk list is up-to-date fixup recovery_start */ update_recovery_start(super, dev, this); this->array.spare_disks += spare_disks; -#ifndef MDASSEMBLE /* check for reshape */ if (this->reshape_active == 1) recover_backup_imsm(st, this); -#endif rest = this; } @@ -6828,8 +7763,8 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev, /* when MAP_X is passed both maps failures are counted */ if (prev && - ((look_in_map == MAP_1) || (look_in_map == MAP_X)) && - (i < prev->num_members)) { + (look_in_map == MAP_1 || look_in_map == MAP_X) && + i < prev->num_members) { ord = __le32_to_cpu(prev->disk_ord_tbl[i]); idx_1 = ord_to_idx(ord); @@ -6837,8 +7772,8 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev, if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD) failed++; } - if (((look_in_map == MAP_0) || (look_in_map == MAP_X)) && - (i < map->num_members)) { + if ((look_in_map == MAP_0 || look_in_map == MAP_X) && + i < map->num_members) { ord = __le32_to_cpu(map->disk_ord_tbl[i]); idx = ord_to_idx(ord); @@ -6854,21 +7789,24 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev, return failed; } -#ifndef MDASSEMBLE static int imsm_open_new(struct supertype *c, struct active_array *a, char *inst) { struct intel_super *super = c->sb; struct imsm_super *mpb = super->anchor; + struct imsm_update_prealloc_bb_mem u; if (atoi(inst) >= mpb->num_raid_devs) { - pr_err("%s: subarry index %d, out of range\n", - __func__, atoi(inst)); + pr_err("subarry index %d, out of range\n", atoi(inst)); return -ENODEV; } dprintf("imsm: open_new %s\n", inst); a->info.container_member = atoi(inst); + + u.type = update_prealloc_badblocks_mem; + imsm_update_metadata_locally(c, &u, sizeof(u)); + return 0; } @@ -6888,15 +7826,16 @@ static int is_resyncing(struct imsm_dev *dev) migr_map = get_imsm_map(dev, MAP_1); - if ((migr_map->map_state == IMSM_T_STATE_NORMAL) && - (dev->vol.migr_type != MIGR_GEN_MIGR)) + if (migr_map->map_state == IMSM_T_STATE_NORMAL && + dev->vol.migr_type != MIGR_GEN_MIGR) return 1; else return 0; } /* return true if we recorded new information */ -static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) +static int mark_failure(struct intel_super *super, + struct imsm_dev *dev, struct imsm_disk *disk, int idx) { __u32 ord; int slot; @@ -6932,19 +7871,22 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx) struct imsm_map *map2 = get_imsm_map(dev, MAP_1); int slot2 = get_imsm_disk_slot(map2, idx); - if ((slot2 < map2->num_members) && - (slot2 >= 0)) + if (slot2 < map2->num_members && slot2 >= 0) set_imsm_ord_tbl_ent(map2, slot2, idx | IMSM_ORD_REBUILD); } if (map->failed_disk_num == 0xff) map->failed_disk_num = slot; + + clear_disk_badblocks(super->bbm_log, ord_to_idx(ord)); + return 1; } -static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx) +static void mark_missing(struct intel_super *super, + struct imsm_dev *dev, struct imsm_disk *disk, int idx) { - mark_failure(dev, disk, idx); + mark_failure(super, dev, disk, idx); if (disk->scsi_id == __cpu_to_le32(~(__u32)0)) return; @@ -6980,7 +7922,7 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev) end_migration(dev, super, map_state); } for (dl = super->missing; dl; dl = dl->next) - mark_missing(dev, &dl->disk, dl->index); + mark_missing(super, dev, &dl->disk, dl->index); super->updates_pending++; } @@ -7016,9 +7958,7 @@ static unsigned long long imsm_set_array_size(struct imsm_dev *dev, array_blocks = new_size; } - /* round array size down to closest MB - */ - array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT; + array_blocks = round_size_to_mb(array_blocks, used_disks); dev->size_low = __cpu_to_le32((__u32)array_blocks); dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32)); @@ -7131,11 +8071,9 @@ static int imsm_set_array_state(struct active_array *a, int consistent) array_blocks = blocks_per_member(map) * used_disks; - /* round array size down to closest MB - */ - array_blocks = (array_blocks - >> SECT_PER_MB_SHIFT) - << SECT_PER_MB_SHIFT; + array_blocks = + round_size_to_mb(array_blocks, + used_disks); a->info.custom_array_size = array_blocks; /* encourage manager to update array * size @@ -7216,18 +8154,41 @@ mark_checkpoint: skip_mark_checkpoint: /* mark dirty / clean */ - if (dev->vol.dirty != !consistent) { + if (((dev->vol.dirty & RAIDVOL_DIRTY) && consistent) || + (!(dev->vol.dirty & RAIDVOL_DIRTY) && !consistent)) { dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty"); - if (consistent) - dev->vol.dirty = 0; - else - dev->vol.dirty = 1; + if (consistent) { + dev->vol.dirty = RAIDVOL_CLEAN; + } else { + dev->vol.dirty = RAIDVOL_DIRTY; + if (dev->rwh_policy == RWH_DISTRIBUTED) + dev->vol.dirty |= RAIDVOL_DSRECORD_VALID; + } super->updates_pending++; } return consistent; } +static int imsm_disk_slot_to_ord(struct active_array *a, int slot) +{ + int inst = a->info.container_member; + struct intel_super *super = a->container->sb; + struct imsm_dev *dev = get_imsm_dev(super, inst); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + + if (slot > map->num_members) { + pr_err("imsm: imsm_disk_slot_to_ord %d out of range 0..%d\n", + slot, map->num_members - 1); + return -1; + } + + if (slot < 0) + return -1; + + return get_imsm_ord_tbl_ent(dev, slot, MAP_0); +} + static void imsm_set_disk(struct active_array *a, int n, int state) { int inst = a->info.container_member; @@ -7238,24 +8199,19 @@ static void imsm_set_disk(struct active_array *a, int n, int state) struct mdinfo *mdi; int recovery_not_finished = 0; int failed; - __u32 ord; + int ord; __u8 map_state; - if (n > map->num_members) - pr_err("imsm: set_disk %d out of range 0..%d\n", - n, map->num_members - 1); - - if (n < 0) + ord = imsm_disk_slot_to_ord(a, n); + if (ord < 0) return; dprintf("imsm: set_disk %d:%x\n", n, state); - - ord = get_imsm_ord_tbl_ent(dev, n, MAP_0); disk = get_imsm_disk(super, ord_to_idx(ord)); /* check for new failures */ if (state & DS_FAULTY) { - if (mark_failure(dev, disk, ord_to_idx(ord))) + if (mark_failure(super, dev, disk, ord_to_idx(ord))) super->updates_pending++; } @@ -7276,7 +8232,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state) case IMSM_T_STATE_NORMAL: /* transition to normal state */ dprintf("normal: "); if (is_rebuilding(dev)) { - dprintf("while rebuilding"); + dprintf_cont("while rebuilding"); /* check if recovery is really finished */ for (mdi = a->info.devs; mdi ; mdi = mdi->next) if (mdi->recovery_start != MaxSector) { @@ -7284,8 +8240,8 @@ static void imsm_set_disk(struct active_array *a, int n, int state) break; } if (recovery_not_finished) { - dprintf("\nimsm: Rebuild has not finished yet, " - "state not changed"); + dprintf_cont("\n"); + dprintf("Rebuild has not finished yet, state not changed"); if (a->last_checkpoint < mdi->recovery_start) { a->last_checkpoint = mdi->recovery_start; super->updates_pending++; @@ -7300,7 +8256,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state) break; } if (is_gen_migration(dev)) { - dprintf("while general migration"); + dprintf_cont("while general migration"); if (a->last_checkpoint >= a->info.component_size) end_migration(dev, super, map_state); else @@ -7312,26 +8268,25 @@ static void imsm_set_disk(struct active_array *a, int n, int state) } break; case IMSM_T_STATE_DEGRADED: /* transition to degraded state */ - dprintf("degraded: "); - if ((map->map_state != map_state) && - !dev->vol.migr_state) { - dprintf("mark degraded"); + dprintf_cont("degraded: "); + if (map->map_state != map_state && !dev->vol.migr_state) { + dprintf_cont("mark degraded"); map->map_state = map_state; super->updates_pending++; a->last_checkpoint = 0; break; } if (is_rebuilding(dev)) { - dprintf("while rebuilding."); + dprintf_cont("while rebuilding."); if (map->map_state != map_state) { - dprintf(" Map state change"); + dprintf_cont(" Map state change"); end_migration(dev, super, map_state); super->updates_pending++; } break; } if (is_gen_migration(dev)) { - dprintf("while general migration"); + dprintf_cont("while general migration"); if (a->last_checkpoint >= a->info.component_size) end_migration(dev, super, map_state); else { @@ -7342,22 +8297,22 @@ static void imsm_set_disk(struct active_array *a, int n, int state) break; } if (is_initializing(dev)) { - dprintf("while initialization."); + dprintf_cont("while initialization."); map->map_state = map_state; super->updates_pending++; break; } break; case IMSM_T_STATE_FAILED: /* transition to failed state */ - dprintf("failed: "); + dprintf_cont("failed: "); if (is_gen_migration(dev)) { - dprintf("while general migration"); + dprintf_cont("while general migration"); map->map_state = map_state; super->updates_pending++; break; } if (map->map_state != map_state) { - dprintf("mark failed"); + dprintf_cont("mark failed"); end_migration(dev, super, map_state); super->updates_pending++; a->last_checkpoint = 0; @@ -7365,10 +8320,9 @@ static void imsm_set_disk(struct active_array *a, int n, int state) } break; default: - dprintf("state %i\n", map_state); + dprintf_cont("state %i\n", map_state); } - dprintf("\n"); - + dprintf_cont("\n"); } static int store_imsm_mpb(int fd, struct imsm_super *mpb) @@ -7377,27 +8331,30 @@ static int store_imsm_mpb(int fd, struct imsm_super *mpb) __u32 mpb_size = __le32_to_cpu(mpb->mpb_size); unsigned long long dsize; unsigned long long sectors; + unsigned int sector_size; + get_dev_sector_size(fd, NULL, §or_size); get_dev_size(fd, NULL, &dsize); - if (mpb_size > 512) { + if (mpb_size > sector_size) { /* -1 to account for anchor */ - sectors = mpb_sectors(mpb) - 1; + sectors = mpb_sectors(mpb, sector_size) - 1; /* write the extended mpb to the sectors preceeding the anchor */ - if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) + if (lseek64(fd, dsize - (sector_size * (2 + sectors)), + SEEK_SET) < 0) return 1; - if ((unsigned long long)write(fd, buf + 512, 512 * sectors) - != 512 * sectors) + if ((unsigned long long)write(fd, buf + sector_size, + sector_size * sectors) != sector_size * sectors) return 1; } /* first block is stored on second to last sector of the disk */ - if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) + if (lseek64(fd, dsize - (sector_size * 2), SEEK_SET) < 0) return 1; - if (write(fd, buf, 512) != 512) + if ((unsigned int)write(fd, buf, sector_size) != sector_size) return 1; return 0; @@ -7430,7 +8387,7 @@ static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_a dl = NULL; if (dl) - dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor); + dprintf("found %x:%x\n", dl->major, dl->minor); return dl; } @@ -7631,8 +8588,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, /* Cannot activate another spare if rebuild is in progress already */ if (is_rebuilding(dev)) { - dprintf("imsm: No spare activation allowed. " - "Rebuild in progress already.\n"); + dprintf("imsm: No spare activation allowed. Rebuild in progress already.\n"); return NULL; } @@ -7646,14 +8602,18 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, IMSM_T_STATE_DEGRADED) return NULL; + if (get_imsm_map(dev, MAP_0)->map_state == IMSM_T_STATE_UNINITIALIZED) { + dprintf("imsm: No spare activation allowed. Volume is not initialized.\n"); + return NULL; + } + /* * If there are any failed disks check state of the other volume. * Block rebuild if the another one is failed until failed disks * are removed from container. */ if (failed) { - dprintf("found failed disks in %.*s, check if there another" - "failed sub-array.\n", + dprintf("found failed disks in %.*s, check if there anotherfailed sub-array.\n", MAX_RAID_SERIAL_LEN, dev->volume); /* check if states of the other volumes allow for rebuild */ for (i = 0; i < super->anchor->num_raid_devs; i++) { @@ -7714,6 +8674,12 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a, di->data_offset = pba_of_lba0(map); di->component_size = a->info.component_size; di->container_member = inst; + di->bb.supported = 1; + if (dev->rwh_policy == RWH_DISTRIBUTED) { + di->consistency_policy = CONSISTENCY_POLICY_PPL; + di->ppl_sector = get_ppl_sector(super, inst); + di->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9; + } super->random = random32(); di->next = rv; rv = di; @@ -7775,21 +8741,22 @@ static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_ static struct dl *get_disk_super(struct intel_super *super, int major, int minor) { - struct dl *dl = NULL; + struct dl *dl; + for (dl = super->disks; dl; dl = dl->next) - if ((dl->major == major) && (dl->minor == minor)) + if (dl->major == major && dl->minor == minor) return dl; return NULL; } static int remove_disk_super(struct intel_super *super, int major, int minor) { - struct dl *prev = NULL; + struct dl *prev; struct dl *dl; prev = NULL; for (dl = super->disks; dl; dl = dl->next) { - if ((dl->major == major) && (dl->minor == minor)) { + if (dl->major == major && dl->minor == minor) { /* remove */ if (prev) prev->next = dl->next; @@ -7797,8 +8764,7 @@ static int remove_disk_super(struct intel_super *super, int major, int minor) super->disks = dl->next; dl->next = NULL; __free_imsm_disk(dl); - dprintf("%s: removed %x:%x\n", - __func__, major, minor); + dprintf("removed %x:%x\n", major, minor); break; } prev = dl; @@ -7811,7 +8777,8 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind static int add_remove_disk_update(struct intel_super *super) { int check_degraded = 0; - struct dl *disk = NULL; + struct dl *disk; + /* add/remove some spares to/from the metadata/contrainer */ while (super->disk_mgmt_list) { struct dl *disk_cfg; @@ -7824,9 +8791,8 @@ static int add_remove_disk_update(struct intel_super *super) disk_cfg->next = super->disks; super->disks = disk_cfg; check_degraded = 1; - dprintf("%s: added %x:%x\n", - __func__, disk_cfg->major, - disk_cfg->minor); + dprintf("added %x:%x\n", + disk_cfg->major, disk_cfg->minor); } else if (disk_cfg->action == DISK_REMOVE) { dprintf("Disk remove action processed: %x.%x\n", disk_cfg->major, disk_cfg->minor); @@ -7858,13 +8824,12 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration * void **tofree = NULL; int ret_val = 0; - dprintf("apply_reshape_migration_update()\n"); - if ((u->subdev < 0) || - (u->subdev > 1)) { + dprintf("(enter)\n"); + if (u->subdev < 0 || u->subdev > 1) { dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev); return ret_val; } - if ((space_list == NULL) || (*space_list == NULL)) { + if (space_list == NULL || *space_list == NULL) { dprintf("imsm: Error: Memory is not allocated\n"); return ret_val; } @@ -7919,15 +8884,28 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration * /* update chunk size */ - if (u->new_chunksize > 0) + if (u->new_chunksize > 0) { + unsigned long long num_data_stripes; + int used_disks = + imsm_num_data_members(dev, MAP_0); + + if (used_disks == 0) + return ret_val; + map->blocks_per_strip = __cpu_to_le16(u->new_chunksize * 2); + num_data_stripes = + (join_u32(dev->size_low, dev->size_high) + / used_disks); + num_data_stripes /= map->blocks_per_strip; + num_data_stripes /= map->num_domains; + set_num_data_stripes(map, num_data_stripes); + } /* add disk */ - if ((u->new_level != 5) || - (migr_map->raid_level != 0) || - (migr_map->raid_level == map->raid_level)) + if (u->new_level != 5 || migr_map->raid_level != 0 || + migr_map->raid_level == map->raid_level) goto skip_disk_add; if (u->new_disks[0] >= 0) { @@ -7936,8 +8914,7 @@ static int apply_reshape_migration_update(struct imsm_update_reshape_migration * new_disk = get_disk_super(super, major(u->new_disks[0]), minor(u->new_disks[0])); - dprintf("imsm: new disk for reshape is: %i:%i " - "(%p, index = %i)\n", + dprintf("imsm: new disk for reshape is: %i:%i (%p, index = %i)\n", major(u->new_disks[0]), minor(u->new_disks[0]), new_disk, new_disk->index); @@ -7978,9 +8955,8 @@ static int apply_size_change_update(struct imsm_update_size_change *u, struct intel_dev *id; int ret_val = 0; - dprintf("apply_size_change_update()\n"); - if ((u->subdev < 0) || - (u->subdev > 1)) { + dprintf("(enter)\n"); + if (u->subdev < 0 || u->subdev > 1) { dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev); return ret_val; } @@ -7991,14 +8967,19 @@ static int apply_size_change_update(struct imsm_update_size_change *u, struct imsm_map *map = get_imsm_map(dev, MAP_0); int used_disks = imsm_num_data_members(dev, MAP_0); unsigned long long blocks_per_member; + unsigned long long num_data_stripes; /* calculate new size */ blocks_per_member = u->new_size / used_disks; - dprintf("imsm: apply_size_change_update(size: %llu, " - "blocks per member: %llu)\n", - u->new_size, blocks_per_member); + num_data_stripes = blocks_per_member / + map->blocks_per_strip; + num_data_stripes /= map->num_domains; + dprintf("(size: %llu, blocks per member: %llu, num_data_stipes: %llu)\n", + u->new_size, blocks_per_member, + num_data_stripes); set_blocks_per_member(map, blocks_per_member); + set_num_data_stripes(map, num_data_stripes); imsm_set_array_size(dev, u->new_size); ret_val = 1; @@ -8038,8 +9019,7 @@ static int apply_update_activate_spare(struct imsm_update_activate_spare *u, break; if (!dl) { - pr_err("error: imsm_activate_spare passed " - "an unknown disk (index: %d)\n", + pr_err("error: imsm_activate_spare passed an unknown disk (index: %d)\n", u->dl->index); return 0; } @@ -8139,20 +9119,19 @@ static int apply_reshape_container_disks_update(struct imsm_update_reshape *u, int ret_val = 0; unsigned int dev_id; - dprintf("imsm: apply_reshape_container_disks_update()\n"); + dprintf("(enter)\n"); /* enable spares to use in array */ for (i = 0; i < delta_disks; i++) { new_disk = get_disk_super(super, major(u->new_disks[i]), minor(u->new_disks[i])); - dprintf("imsm: new disk for reshape is: %i:%i " - "(%p, index = %i)\n", + dprintf("imsm: new disk for reshape is: %i:%i (%p, index = %i)\n", major(u->new_disks[i]), minor(u->new_disks[i]), new_disk, new_disk->index); - if ((new_disk == NULL) || - ((new_disk->index >= 0) && - (new_disk->index < u->old_raid_disks))) + if (new_disk == NULL || + (new_disk->index >= 0 && + new_disk->index < u->old_raid_disks)) goto update_reshape_exit; new_disk->index = disk_count++; /* slot to fill in autolayout @@ -8251,6 +9230,14 @@ static int apply_takeover_update(struct imsm_update_takeover *u, map = get_imsm_map(dev, MAP_0); if (u->direction == R10_TO_R0) { + unsigned long long num_data_stripes; + + map->num_domains = 1; + num_data_stripes = blocks_per_member(map); + num_data_stripes /= map->blocks_per_strip; + num_data_stripes /= map->num_domains; + set_num_data_stripes(map, num_data_stripes); + /* Number of failed disks must be half of initial disk number */ if (imsm_count_failed(super, dev, MAP_0) != (map->num_members / 2)) @@ -8327,7 +9314,7 @@ static int apply_takeover_update(struct imsm_update_takeover *u, for (du = super->missing; du; du = du->next) if (du->index >= 0) { set_imsm_ord_tbl_ent(map, du->index, du->index); - mark_missing(dv->dev, &du->disk, du->index); + mark_missing(super, dv->dev, &du->disk, du->index); } return 1; @@ -8380,8 +9367,7 @@ static void imsm_process_update(struct supertype *st, struct imsm_update_general_migration_checkpoint *u = (void *)update->buf; - dprintf("imsm: process_update() " - "for update_general_migration_checkpoint called\n"); + dprintf("called for update_general_migration_checkpoint\n"); /* find device under general migration */ for (id = super->devlist ; id; id = id->next) { @@ -8450,15 +9436,14 @@ static void imsm_process_update(struct supertype *st, /* handle racing creates: first come first serve */ if (u->dev_idx < mpb->num_raid_devs) { - dprintf("%s: subarray %d already defined\n", - __func__, u->dev_idx); + dprintf("subarray %d already defined\n", u->dev_idx); goto create_error; } /* check update is next in sequence */ if (u->dev_idx != mpb->num_raid_devs) { - dprintf("%s: can not create array %d expected index %d\n", - __func__, u->dev_idx, mpb->num_raid_devs); + dprintf("can not create array %d expected index %d\n", + u->dev_idx, mpb->num_raid_devs); goto create_error; } @@ -8483,14 +9468,14 @@ static void imsm_process_update(struct supertype *st, continue; if (disks_overlap(super, i, u)) { - dprintf("%s: arrays overlap\n", __func__); + dprintf("arrays overlap\n"); goto create_error; } } /* check that prepare update was successful */ if (!update->space) { - dprintf("%s: prepare update failed\n", __func__); + dprintf("prepare update failed\n"); goto create_error; } @@ -8502,7 +9487,7 @@ static void imsm_process_update(struct supertype *st, for (i = 0; i < new_map->num_members; i++) { dl = serial_to_dl(inf[i].serial, super); if (!dl) { - dprintf("%s: disk disappeared\n", __func__); + dprintf("disk disappeared\n"); goto create_error; } } @@ -8620,9 +9605,25 @@ static void imsm_process_update(struct supertype *st, } break; } + case update_prealloc_badblocks_mem: + break; + case update_rwh_policy: { + struct imsm_update_rwh_policy *u = (void *)update->buf; + int target = u->dev_idx; + struct imsm_dev *dev = get_imsm_dev(super, target); + if (!dev) { + dprintf("could not find subarray-%d\n", target); + break; + } + + if (dev->rwh_policy != u->new_policy) { + dev->rwh_policy = u->new_policy; + super->updates_pending++; + } + break; + } default: - pr_err("error: unsuported process update type:" - "(type: %d)\n", type); + pr_err("error: unsuported process update type:(type: %d)\n", type); } } @@ -8640,6 +9641,7 @@ static int imsm_prepare_update(struct supertype *st, */ enum imsm_update_type type; struct intel_super *super = st->sb; + unsigned int sector_size = super->sector_size; struct imsm_super *mpb = super->anchor; size_t buf_len; size_t len = 0; @@ -8653,8 +9655,7 @@ static int imsm_prepare_update(struct supertype *st, case update_general_migration_checkpoint: if (update->len < (int)sizeof(struct imsm_update_general_migration_checkpoint)) return 0; - dprintf("imsm: prepare_update() " - "for update_general_migration_checkpoint called\n"); + dprintf("called for update_general_migration_checkpoint\n"); break; case update_takeover: { struct imsm_update_takeover *u = (void *)update->buf; @@ -8703,7 +9704,7 @@ static int imsm_prepare_update(struct supertype *st, if (update->len < (int)sizeof(*u)) return 0; - dprintf("imsm: imsm_prepare_update() for update_reshape\n"); + dprintf("for update_reshape\n"); for (dl = super->devlist; dl; dl = dl->next) { int size = sizeof_imsm_dev(dl->dev, 1); @@ -8738,7 +9739,7 @@ static int imsm_prepare_update(struct supertype *st, if (update->len < (int)sizeof(*u)) return 0; - dprintf("imsm: imsm_prepare_update() for update_reshape\n"); + dprintf("for update_reshape\n"); /* add space for bigger array in update */ @@ -8778,7 +9779,7 @@ static int imsm_prepare_update(struct supertype *st, current_level = map->raid_level; break; } - if ((u->new_level == 5) && (u->new_level != current_level)) { + if (u->new_level == 5 && u->new_level != current_level) { struct mdinfo *spares; spares = get_spares_for_grow(st); @@ -8862,6 +9863,15 @@ static int imsm_prepare_update(struct supertype *st, case update_add_remove_disk: /* no update->len needed */ break; + case update_prealloc_badblocks_mem: + super->extra_space += sizeof(struct bbm_log) - + get_imsm_bbm_log_size(super->bbm_log); + break; + case update_rwh_policy: { + if (update->len < (int)sizeof(struct imsm_update_rwh_policy)) + return 0; + break; + } default: return 0; } @@ -8872,17 +9882,18 @@ static int imsm_prepare_update(struct supertype *st, else buf_len = super->len; - if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) { + if (__le32_to_cpu(mpb->mpb_size) + super->extra_space + len > buf_len) { /* ok we need a larger buf than what is currently allocated * if this allocation fails process_update will notice that * ->next_len is set and ->next_buf is NULL */ - buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512); + buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + + super->extra_space + len, sector_size); if (super->next_buf) free(super->next_buf); super->next_len = buf_len; - if (posix_memalign(&super->next_buf, 512, buf_len) == 0) + if (posix_memalign(&super->next_buf, sector_size, buf_len) == 0) memset(super->next_buf, 0, buf_len); else super->next_buf = NULL; @@ -8897,11 +9908,11 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind struct dl *iter; struct imsm_dev *dev; struct imsm_map *map; - int i, j, num_members; + unsigned int i, j, num_members; __u32 ord; + struct bbm_log *log = super->bbm_log; - dprintf("%s: deleting device[%d] from imsm_super\n", - __func__, index); + dprintf("deleting device[%d] from imsm_super\n", index); /* shift all indexes down one */ for (iter = super->disks; iter; iter = iter->next) @@ -8932,6 +9943,14 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind } } + for (i = 0; i < log->entry_count; i++) { + struct bbm_log_entry *entry = &log->marked_block_entries[i]; + + if (entry->disk_ordinal <= index) + continue; + entry->disk_ordinal--; + } + mpb->num_disks--; super->updates_pending++; if (*dlp) { @@ -8941,7 +9960,6 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind __free_imsm_disk(dl); } } -#endif /* MDASSEMBLE */ static void close_targets(int *targets, int new_disks) { @@ -9043,8 +10061,7 @@ int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds, continue; } - if ((sd->disk.raid_disk >= raid_disks) || - (sd->disk.raid_disk < 0)) + if (sd->disk.raid_disk >= raid_disks || sd->disk.raid_disk < 0) continue; dn = map_dev(sd->disk.major, @@ -9059,9 +10076,8 @@ int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds, /* check if maximum array degradation level is not exceeded */ if ((raid_disks - opened) > - imsm_get_allowed_degradation(info->new_level, - raid_disks, - super, dev)) { + imsm_get_allowed_degradation(info->new_level, raid_disks, + super, dev)) { pr_err("Not enough disks can be opened.\n"); close_targets(raid_fds, raid_disks); return -2; @@ -9146,7 +10162,151 @@ int validate_container_imsm(struct mdinfo *info) return 0; } -#ifndef MDASSEMBLE + +/******************************************************************************* +* Function: imsm_record_badblock +* Description: This routine stores new bad block record in BBM log +* +* Parameters: +* a : array containing a bad block +* slot : disk number containing a bad block +* sector : bad block sector +* length : bad block sectors range +* Returns: +* 1 : Success +* 0 : Error +******************************************************************************/ +static int imsm_record_badblock(struct active_array *a, int slot, + unsigned long long sector, int length) +{ + struct intel_super *super = a->container->sb; + int ord; + int ret; + + ord = imsm_disk_slot_to_ord(a, slot); + if (ord < 0) + return 0; + + ret = record_new_badblock(super->bbm_log, ord_to_idx(ord), sector, + length); + if (ret) + super->updates_pending++; + + return ret; +} +/******************************************************************************* +* Function: imsm_clear_badblock +* Description: This routine clears bad block record from BBM log +* +* Parameters: +* a : array containing a bad block +* slot : disk number containing a bad block +* sector : bad block sector +* length : bad block sectors range +* Returns: +* 1 : Success +* 0 : Error +******************************************************************************/ +static int imsm_clear_badblock(struct active_array *a, int slot, + unsigned long long sector, int length) +{ + struct intel_super *super = a->container->sb; + int ord; + int ret; + + ord = imsm_disk_slot_to_ord(a, slot); + if (ord < 0) + return 0; + + ret = clear_badblock(super->bbm_log, ord_to_idx(ord), sector, length); + if (ret) + super->updates_pending++; + + return ret; +} +/******************************************************************************* +* Function: imsm_get_badblocks +* Description: This routine get list of bad blocks for an array +* +* Parameters: +* a : array +* slot : disk number +* Returns: +* bb : structure containing bad blocks +* NULL : error +******************************************************************************/ +static struct md_bb *imsm_get_badblocks(struct active_array *a, int slot) +{ + int inst = a->info.container_member; + struct intel_super *super = a->container->sb; + struct imsm_dev *dev = get_imsm_dev(super, inst); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + int ord; + + ord = imsm_disk_slot_to_ord(a, slot); + if (ord < 0) + return NULL; + + get_volume_badblocks(super->bbm_log, ord_to_idx(ord), pba_of_lba0(map), + blocks_per_member(map), &super->bb); + + return &super->bb; +} +/******************************************************************************* +* Function: examine_badblocks_imsm +* Description: Prints list of bad blocks on a disk to the standard output +* +* Parameters: +* st : metadata handler +* fd : open file descriptor for device +* devname : device name +* Returns: +* 0 : Success +* 1 : Error +******************************************************************************/ +static int examine_badblocks_imsm(struct supertype *st, int fd, char *devname) +{ + struct intel_super *super = st->sb; + struct bbm_log *log = super->bbm_log; + struct dl *d = NULL; + int any = 0; + + for (d = super->disks; d ; d = d->next) { + if (strcmp(d->devname, devname) == 0) + break; + } + + if ((d == NULL) || (d->index < 0)) { /* serial mismatch probably */ + pr_err("%s doesn't appear to be part of a raid array\n", + devname); + return 1; + } + + if (log != NULL) { + unsigned int i; + struct bbm_log_entry *entry = &log->marked_block_entries[0]; + + for (i = 0; i < log->entry_count; i++) { + if (entry[i].disk_ordinal == d->index) { + unsigned long long sector = __le48_to_cpu( + &entry[i].defective_block_start); + int cnt = entry[i].marked_count + 1; + + if (!any) { + printf("Bad-blocks on %s:\n", devname); + any = 1; + } + + printf("%20llu for %d sectors\n", sector, cnt); + } + } + } + + if (!any) + printf("No bad-blocks list configured on %s\n", devname); + + return 0; +} /******************************************************************************* * Function: init_migr_record_imsm * Description: Function inits imsm migration record @@ -9243,8 +10403,8 @@ int save_backup_imsm(struct supertype *st, { int rv = -1; struct intel_super *super = st->sb; - unsigned long long *target_offsets = NULL; - int *targets = NULL; + unsigned long long *target_offsets; + int *targets; int i; struct imsm_map *map_dest = get_imsm_map(dev, MAP_0); int new_disks = map_dest->num_members; @@ -9325,8 +10485,7 @@ int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state) unsigned long long curr_migr_unit; if (load_imsm_migr_rec(super, info) != 0) { - dprintf("imsm: ERROR: Cannot read migration record " - "for checkpoint save.\n"); + dprintf("imsm: ERROR: Cannot read migration record for checkpoint save.\n"); return 1; } @@ -9350,8 +10509,7 @@ int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state) __cpu_to_le32(curr_migr_unit * __le32_to_cpu(super->migr_rec->dest_depth_per_unit)); if (write_imsm_migr_rec(st) < 0) { - dprintf("imsm: Cannot write migration record " - "outside backup area\n"); + dprintf("imsm: Cannot write migration record outside backup area\n"); return 1; } @@ -9373,7 +10531,7 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) { struct intel_super *super = st->sb; struct migr_record *migr_rec = super->migr_rec; - struct imsm_map *map_dest = NULL; + struct imsm_map *map_dest; struct intel_dev *id = NULL; unsigned long long read_offset; unsigned long long write_offset; @@ -9382,6 +10540,7 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) int new_disks, i, err; char *buf = NULL; int retval = 1; + unsigned int sector_size = super->sector_size; unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit); unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units); char buffer[20]; @@ -9418,7 +10577,7 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) pba_of_lba0(map_dest)) * 512; unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512; - if (posix_memalign((void **)&buf, 512, unit_len) != 0) + if (posix_memalign((void **)&buf, sector_size, unit_len) != 0) goto abort; targets = xcalloc(new_disks, sizeof(int)); @@ -9462,16 +10621,14 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info) new_disks, super, id->dev)) { - pr_err("Cannot restore data from backup." - " Too many failed disks\n"); + pr_err("Cannot restore data from backup. Too many failed disks\n"); goto abort; } if (save_checkpoint_imsm(st, info, UNIT_SRC_NORMAL)) { /* ignore error == 2, this can mean end of reshape here */ - dprintf("imsm: Cannot write checkpoint to " - "migration record (UNIT_SRC_NORMAL) during restart\n"); + dprintf("imsm: Cannot write checkpoint to migration record (UNIT_SRC_NORMAL) during restart\n"); } else retval = 0; @@ -9498,7 +10655,7 @@ static const char *imsm_get_disk_controller_domain(const char *path) strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1); if (stat(disk_path, &st) == 0) { struct sys_dev* hba; - char *path=NULL; + char *path; path = devt_to_devpath(st.st_rdev); if (path == NULL) @@ -9546,22 +10703,19 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, struct mdinfo *info, *member; int devices_that_can_grow = 0; - dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): " - "st->devnm = (%s)\n", st->devnm); + dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): st->devnm = (%s)\n", st->devnm); if (geo->size > 0 || geo->level != UnSet || geo->layout != UnSet || geo->chunksize != 0 || geo->raid_disks == UnSet) { - dprintf("imsm: Container operation is allowed for " - "raid disks number change only.\n"); + dprintf("imsm: Container operation is allowed for raid disks number change only.\n"); return ret_val; } if (direction == ROLLBACK_METADATA_CHANGES) { - dprintf("imsm: Metadata changes rollback is not supported for " - "container operation.\n"); + dprintf("imsm: Metadata changes rollback is not supported for container operation.\n"); return ret_val; } @@ -9576,17 +10730,14 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, /* we work on container for Online Capacity Expansion * only so raid_disks has to grow */ - dprintf("imsm: for container operation raid disks " - "increase is required\n"); + dprintf("imsm: for container operation raid disks increase is required\n"); break; } - if ((info->array.level != 0) && - (info->array.level != 5)) { + if (info->array.level != 0 && info->array.level != 5) { /* we cannot use this container with other raid level */ - dprintf("imsm: for container operation wrong" - " raid level (%i) detected\n", + dprintf("imsm: for container operation wrong raid level (%i) detected\n", info->array.level); break; } else { @@ -9597,8 +10748,7 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, if (!is_raid_level_supported(super->orom, member->array.level, geo->raid_disks)) { - dprintf("platform does not support raid%d with" - " %d disk%s\n", + dprintf("platform does not support raid%d with %d disk%s\n", info->array.level, geo->raid_disks, geo->raid_disks > 1 ? "s" : ""); @@ -9608,8 +10758,7 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, */ if (info->component_size % (info->array.chunk_size/512)) { - dprintf("Component size is not aligned to " - "chunk size\n"); + dprintf("Component size is not aligned to chunk size\n"); break; } } @@ -9637,9 +10786,9 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, ret_val = 1; if (ret_val) - dprintf("\tContainer operation allowed\n"); + dprintf("Container operation allowed\n"); else - dprintf("\tError: %i\n", ret_val); + dprintf("Error: %i\n", ret_val); return ret_val; } @@ -9670,15 +10819,14 @@ static int imsm_create_metadata_update_for_reshape( { struct intel_super *super = st->sb; struct imsm_super *mpb = super->anchor; - int update_memory_size = 0; - struct imsm_update_reshape *u = NULL; - struct mdinfo *spares = NULL; + int update_memory_size; + struct imsm_update_reshape *u; + struct mdinfo *spares; int i; - int delta_disks = 0; + int delta_disks; struct mdinfo *dev; - dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n", - geo->raid_disks); + dprintf("(enter) raid_disks = %i\n", geo->raid_disks); delta_disks = geo->raid_disks - old_raid_disks; @@ -9699,8 +10847,7 @@ static int imsm_create_metadata_update_for_reshape( if (spares == NULL || delta_disks > spares->array.spare_disks) { - pr_err("imsm: ERROR: Cannot get spare devices " - "for %s.\n", geo->dev_name); + pr_err("imsm: ERROR: Cannot get spare devices for %s.\n", geo->dev_name); i = -1; goto abort; } @@ -9732,12 +10879,12 @@ abort: dprintf("imsm: reshape update preparation :"); if (i == delta_disks) { - dprintf(" OK\n"); + dprintf_cont(" OK\n"); *updatep = u; return update_memory_size; } free(u); - dprintf(" Error\n"); + dprintf_cont(" Error\n"); return 0; } @@ -9753,11 +10900,10 @@ static int imsm_create_metadata_update_for_size_change( struct imsm_update_size_change **updatep) { struct intel_super *super = st->sb; - int update_memory_size = 0; - struct imsm_update_size_change *u = NULL; + int update_memory_size; + struct imsm_update_size_change *u; - dprintf("imsm_create_metadata_update_for_size_change(enter)" - " New size = %llu\n", geo->size); + dprintf("(enter) New size = %llu\n", geo->size); /* size of all update data without anchor */ update_memory_size = sizeof(struct imsm_update_size_change); @@ -9784,13 +10930,12 @@ static int imsm_create_metadata_update_for_migration( struct imsm_update_reshape_migration **updatep) { struct intel_super *super = st->sb; - int update_memory_size = 0; - struct imsm_update_reshape_migration *u = NULL; + int update_memory_size; + struct imsm_update_reshape_migration *u; struct imsm_dev *dev; int previous_level = -1; - dprintf("imsm_create_metadata_update_for_migration(enter)" - " New Level = %i\n", geo->level); + dprintf("(enter) New Level = %i\n", geo->level); /* size of all update data without anchor */ update_memory_size = sizeof(struct imsm_update_reshape_migration); @@ -9815,24 +10960,22 @@ static int imsm_create_metadata_update_for_migration( if (geo->chunksize != current_chunk_size) { u->new_chunksize = geo->chunksize / 1024; - dprintf("imsm: " - "chunk size change from %i to %i\n", + dprintf("imsm: chunk size change from %i to %i\n", current_chunk_size, u->new_chunksize); } previous_level = map->raid_level; } } - if ((geo->level == 5) && (previous_level == 0)) { + if (geo->level == 5 && previous_level == 0) { struct mdinfo *spares = NULL; u->new_raid_disks++; spares = get_spares_for_grow(st); - if ((spares == NULL) || (spares->array.spare_disks < 1)) { + if (spares == NULL || spares->array.spare_disks < 1) { free(u); sysfs_free(spares); update_memory_size = 0; - dprintf("error: cannot get spare device " - "for requested migration"); + pr_err("cannot get spare device for requested migration\n"); return 0; } sysfs_free(spares); @@ -9892,17 +11035,14 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, int rv; getinfo_super_imsm_volume(st, &info, NULL); - if ((geo->level != info.array.level) && - (geo->level >= 0) && - (geo->level != UnSet)) { + if (geo->level != info.array.level && geo->level >= 0 && + geo->level != UnSet) { switch (info.array.level) { case 0: if (geo->level == 5) { change = CH_MIGRATION; if (geo->layout != ALGORITHM_LEFT_ASYMMETRIC) { - pr_err("Error. Requested Layout " - "not supported (left-asymmetric layout " - "is supported only)!\n"); + pr_err("Error. Requested Layout not supported (left-asymmetric layout is supported only)!\n"); change = -1; goto analyse_change_exit; } @@ -9927,30 +11067,26 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, break; } if (change == -1) { - pr_err("Error. Level Migration from %d to %d " - "not supported!\n", + pr_err("Error. Level Migration from %d to %d not supported!\n", info.array.level, geo->level); goto analyse_change_exit; } } else geo->level = info.array.level; - if ((geo->layout != info.array.layout) - && ((geo->layout != UnSet) && (geo->layout != -1))) { + if (geo->layout != info.array.layout && + (geo->layout != UnSet && geo->layout != -1)) { change = CH_MIGRATION; - if ((info.array.layout == 0) - && (info.array.level == 5) - && (geo->layout == 5)) { + if (info.array.layout == 0 && info.array.level == 5 && + geo->layout == 5) { /* reshape 5 -> 4 */ - } else if ((info.array.layout == 5) - && (info.array.level == 5) - && (geo->layout == 0)) { + } else if (info.array.layout == 5 && info.array.level == 5 && + geo->layout == 0) { /* reshape 4 -> 5 */ geo->layout = 0; geo->level = 5; } else { - pr_err("Error. Layout Migration from %d to %d " - "not supported!\n", + pr_err("Error. Layout Migration from %d to %d not supported!\n", info.array.layout, geo->layout); change = -1; goto analyse_change_exit; @@ -9961,11 +11097,22 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, imsm_layout = info.array.layout; } - if ((geo->chunksize > 0) && (geo->chunksize != UnSet) - && (geo->chunksize != info.array.chunk_size)) + if (geo->chunksize > 0 && geo->chunksize != UnSet && + geo->chunksize != info.array.chunk_size) { + if (info.array.level == 10) { + pr_err("Error. Chunk size change for RAID 10 is not supported.\n"); + change = -1; + goto analyse_change_exit; + } else if (info.component_size % (geo->chunksize/512)) { + pr_err("New chunk size (%dK) does not evenly divide device size (%lluk). Aborting...\n", + geo->chunksize/1024, info.component_size/2); + change = -1; + goto analyse_change_exit; + } change = CH_MIGRATION; - else + } else { geo->chunksize = info.array.chunk_size; + } chunk = geo->chunksize / 1024; @@ -9976,32 +11123,28 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, */ current_size = info.custom_array_size / data_disks; - if ((geo->size > 0) && (geo->size != MAX_SIZE)) { + if (geo->size > 0 && geo->size != MAX_SIZE) { /* align component size */ geo->size = imsm_component_size_aligment_check( get_imsm_raid_level(dev->vol.map), - chunk * 1024, + chunk * 1024, super->sector_size, geo->size * 2); if (geo->size == 0) { - pr_err("Error. Size expansion is " \ - "supported only (current size is %llu, " \ - "requested size /rounded/ is 0).\n", + pr_err("Error. Size expansion is supported only (current size is %llu, requested size /rounded/ is 0).\n", current_size); goto analyse_change_exit; } } - if ((current_size != geo->size) && (geo->size > 0)) { + if (current_size != geo->size && geo->size > 0) { if (change != -1) { - pr_err("Error. Size change should be the only " - "one at a time.\n"); + pr_err("Error. Size change should be the only one at a time.\n"); change = -1; goto analyse_change_exit; } if ((super->current_vol + 1) != super->anchor->num_raid_devs) { - pr_err("Error. The last volume in container " - "can be expanded only (%i/%s).\n", + pr_err("Error. The last volume in container can be expanded only (%i/%s).\n", super->current_vol, st->devnm); goto analyse_change_exit; } @@ -10019,22 +11162,21 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, */ max_size = imsm_component_size_aligment_check( get_imsm_raid_level(dev->vol.map), - chunk * 1024, + chunk * 1024, super->sector_size, max_size); } if (geo->size == MAX_SIZE) { /* requested size change to the maximum available size */ if (max_size == 0) { - pr_err("Error. Cannot find " - "maximum available space.\n"); + pr_err("Error. Cannot find maximum available space.\n"); change = -1; goto analyse_change_exit; } else geo->size = max_size; } - if ((direction == ROLLBACK_METADATA_CHANGES)) { + if (direction == ROLLBACK_METADATA_CHANGES) { /* accept size for rollback only */ } else { @@ -10045,17 +11187,12 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, dprintf("Prepare update for size change to %llu\n", geo->size ); if (current_size >= geo->size) { - pr_err("Error. Size expansion is " - "supported only (current size is %llu, " - "requested size /rounded/ is %llu).\n", + pr_err("Error. Size expansion is supported only (current size is %llu, requested size /rounded/ is %llu).\n", current_size, geo->size); goto analyse_change_exit; } if (max_size && geo->size > max_size) { - pr_err("Error. Requested size is larger " - "than maximum available size (maximum " - "available size is %llu, " - "requested size /rounded/ is %llu).\n", + pr_err("Error. Requested size is larger than maximum available size (maximum available size is %llu, requested size /rounded/ is %llu).\n", max_size, geo->size); goto analyse_change_exit; } @@ -10070,7 +11207,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, geo->raid_disks + devNumChange, &chunk, geo->size, INVALID_SECTORS, - 0, 0, 1)) + 0, 0, info.consistency_policy, 1)) change = -1; if (check_devs) { @@ -10078,19 +11215,16 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, struct imsm_super *mpb = super->anchor; if (mpb->num_raid_devs > 1) { - pr_err("Error. Cannot perform operation on %s" - "- for this operation it MUST be single " - "array in container\n", + pr_err("Error. Cannot perform operation on %s- for this operation it MUST be single array in container\n", geo->dev_name); change = -1; } } analyse_change_exit: - if ((direction == ROLLBACK_METADATA_CHANGES) && - ((change == CH_MIGRATION) || (change == CH_TAKEOVER))) { - dprintf("imsm: Metadata changes rollback is not supported for " - "migration and takeover operations.\n"); + if (direction == ROLLBACK_METADATA_CHANGES && + (change == CH_MIGRATION || change == CH_TAKEOVER)) { + dprintf("imsm: Metadata changes rollback is not supported for migration and takeover operations.\n"); change = -1; } return change; @@ -10136,7 +11270,7 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size, int ret_val = 1; struct geo_params geo; - dprintf("imsm: reshape_super called.\n"); + dprintf("(enter)\n"); memset(&geo, 0, sizeof(struct geo_params)); @@ -10150,8 +11284,8 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size, if (delta_disks != UnSet) geo.raid_disks += delta_disks; - dprintf("\tfor level : %i\n", geo.level); - dprintf("\tfor raid_disks : %i\n", geo.raid_disks); + dprintf("for level : %i\n", geo.level); + dprintf("for raid_disks : %i\n", geo.raid_disks); if (experimental() == 0) return ret_val; @@ -10184,8 +11318,7 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size, free(u); } else { - pr_err("(imsm) Operation " - "is not allowed on this container\n"); + pr_err("(imsm) Operation is not allowed on this container\n"); } } else { /* On volume level we support following operations @@ -10223,8 +11356,7 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size, imsm_create_metadata_update_for_migration( st, &geo, &u); if (len < 1) { - dprintf("imsm: " - "Cannot prepare update\n"); + dprintf("imsm: Cannot prepare update\n"); break; } ret_val = 0; @@ -10243,8 +11375,7 @@ static int imsm_reshape_super(struct supertype *st, unsigned long long size, imsm_create_metadata_update_for_size_change( st, &geo, &u); if (len < 1) { - dprintf("imsm: " - "Cannot prepare update\n"); + dprintf("imsm: Cannot prepare update\n"); break; } ret_val = 0; @@ -10267,6 +11398,33 @@ exit_imsm_reshape_super: return ret_val; } +#define COMPLETED_OK 0 +#define COMPLETED_NONE 1 +#define COMPLETED_DELAYED 2 + +static int read_completed(int fd, unsigned long long *val) +{ + int ret; + char buf[50]; + + ret = sysfs_fd_get_str(fd, buf, 50); + if (ret < 0) + return ret; + + ret = COMPLETED_OK; + if (strncmp(buf, "none", 4) == 0) { + ret = COMPLETED_NONE; + } else if (strncmp(buf, "delayed", 7) == 0) { + ret = COMPLETED_DELAYED; + } else { + char *ep; + *val = strtoull(buf, &ep, 0); + if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' ')) + ret = -1; + } + return ret; +} + /******************************************************************************* * Function: wait_for_reshape_imsm * Description: Function writes new sync_max value and waits until @@ -10282,27 +11440,31 @@ exit_imsm_reshape_super: int wait_for_reshape_imsm(struct mdinfo *sra, int ndata) { int fd = sysfs_get_fd(sra, NULL, "sync_completed"); + int retry = 3; unsigned long long completed; /* to_complete : new sync_max position */ unsigned long long to_complete = sra->reshape_progress; unsigned long long position_to_set = to_complete / ndata; if (fd < 0) { - dprintf("imsm: wait_for_reshape_imsm() " - "cannot open reshape_position\n"); + dprintf("cannot open reshape_position\n"); return 1; } - if (sysfs_fd_get_ll(fd, &completed) < 0) { - dprintf("imsm: wait_for_reshape_imsm() " - "cannot read reshape_position (no reshape in progres)\n"); - close(fd); - return 0; - } + do { + if (sysfs_fd_get_ll(fd, &completed) < 0) { + if (!retry) { + dprintf("cannot read reshape_position (no reshape in progres)\n"); + close(fd); + return 1; + } + usleep(30000); + } else + break; + } while (retry--); if (completed > position_to_set) { - dprintf("imsm: wait_for_reshape_imsm() " - "wrong next position to set %llu (%llu)\n", + dprintf("wrong next position to set %llu (%llu)\n", to_complete, position_to_set); close(fd); return -1; @@ -10310,30 +11472,38 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata) dprintf("Position set: %llu\n", position_to_set); if (sysfs_set_num(sra, NULL, "sync_max", position_to_set) != 0) { - dprintf("imsm: wait_for_reshape_imsm() " - "cannot set reshape position to %llu\n", + dprintf("cannot set reshape position to %llu\n", position_to_set); close(fd); return -1; } do { + int rc; char action[20]; - sysfs_wait(fd, NULL); + int timeout = 3000; + + sysfs_wait(fd, &timeout); if (sysfs_get_str(sra, NULL, "sync_action", action, 20) > 0 && - strncmp(action, "reshape", 7) != 0) - break; - if (sysfs_fd_get_ll(fd, &completed) < 0) { - dprintf("imsm: wait_for_reshape_imsm() " - "cannot read reshape_position (in loop)\n"); + strncmp(action, "reshape", 7) != 0) { + if (strncmp(action, "idle", 4) == 0) + break; close(fd); - return 1; + return -1; } + + rc = read_completed(fd, &completed); + if (rc < 0) { + dprintf("cannot read reshape_position (in loop)\n"); + close(fd); + return 1; + } else if (rc == COMPLETED_NONE) + break; } while (completed < position_to_set); + close(fd); return 0; - } /******************************************************************************* @@ -10354,7 +11524,7 @@ int check_degradation_change(struct mdinfo *info, int rv; rv = sysfs_get_ll(info, NULL, "degraded", &new_degraded); - if ((rv == -1) || (new_degraded != (unsigned long long)degraded)) { + if (rv == -1 || (new_degraded != (unsigned long long)degraded)) { /* check each device to ensure it is still working */ struct mdinfo *sd; new_degraded = 0; @@ -10362,9 +11532,10 @@ int check_degradation_change(struct mdinfo *info, if (sd->disk.state & (1<disk.state & (1<sb; - struct intel_dev *dv = NULL; + struct intel_dev *dv; + unsigned int sector_size = super->sector_size; struct imsm_dev *dev = NULL; struct imsm_map *map_src; int migr_vol_qan = 0; @@ -10431,7 +11603,10 @@ static int imsm_manage_reshape( int degraded = 0; int source_layout = 0; - if (!fds || !offsets || !sra) + if (!sra) + return ret_val; + + if (!fds || !offsets) goto abort; /* Find volume during the reshape */ @@ -10444,7 +11619,7 @@ static int imsm_manage_reshape( } /* Only one volume can migrate at the same time */ if (migr_vol_qan != 1) { - pr_err(": %s", migr_vol_qan ? + pr_err("%s", migr_vol_qan ? "Number of migrating volumes greater than 1\n" : "There is no volume during migrationg\n"); goto abort; @@ -10467,8 +11642,7 @@ static int imsm_manage_reshape( init_migr_record_imsm(st, dev, sra); else { if (__le32_to_cpu(migr_rec->rec_status) != UNIT_SRC_NORMAL) { - dprintf("imsm: cannot restart migration when data " - "are present in copy area.\n"); + dprintf("imsm: cannot restart migration when data are present in copy area.\n"); goto abort; } /* Save checkpoint to update migration record for current @@ -10478,9 +11652,7 @@ static int imsm_manage_reshape( if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) { /* ignore error == 2, this can mean end of reshape here */ - dprintf("imsm: Cannot write checkpoint to " - "migration record (UNIT_SRC_NORMAL, " - "initial save)\n"); + dprintf("imsm: Cannot write checkpoint to migration record (UNIT_SRC_NORMAL, initial save)\n"); goto abort; } } @@ -10491,8 +11663,8 @@ static int imsm_manage_reshape( buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512; /* add space for stripe aligment */ buf_size += old_data_stripe_length; - if (posix_memalign((void **)&buf, 4096, buf_size)) { - dprintf("imsm: Cannot allocate checpoint buffer\n"); + if (posix_memalign((void **)&buf, MAX_SECTOR_SIZE, buf_size)) { + dprintf("imsm: Cannot allocate checkpoint buffer\n"); goto abort; } @@ -10511,8 +11683,7 @@ static int imsm_manage_reshape( */ degraded = check_degradation_change(sra, fds, degraded); if (degraded > 1) { - dprintf("imsm: Abort reshape due to degradation" - " level (%i)\n", degraded); + dprintf("imsm: Abort reshape due to degradation level (%i)\n", degraded); goto abort; } @@ -10523,7 +11694,7 @@ static int imsm_manage_reshape( start = current_position * 512; - /* allign reading start to old geometry */ + /* align reading start to old geometry */ start_buf_shift = start % old_data_stripe_length; start_src = start - start_buf_shift; @@ -10537,7 +11708,7 @@ static int imsm_manage_reshape( * to backup alligned to source array * [bytes] */ - unsigned long long next_step_filler = 0; + unsigned long long next_step_filler; unsigned long long copy_length = next_step * 512; /* allign copy area length to stripe in old geometry */ @@ -10546,10 +11717,7 @@ static int imsm_manage_reshape( if (next_step_filler) next_step_filler = (old_data_stripe_length - next_step_filler); - dprintf("save_stripes() parameters: start = %llu," - "\tstart_src = %llu,\tnext_step*512 = %llu," - "\tstart_in_buf_shift = %llu," - "\tnext_step_filler = %llu\n", + dprintf("save_stripes() parameters: start = %llu,\tstart_src = %llu,\tnext_step*512 = %llu,\tstart_in_buf_shift = %llu,\tnext_step_filler = %llu\n", start, start_src, copy_length, start_buf_shift, next_step_filler); @@ -10559,8 +11727,7 @@ static int imsm_manage_reshape( copy_length + next_step_filler + start_buf_shift, buf)) { - dprintf("imsm: Cannot save stripes" - " to buffer\n"); + dprintf("imsm: Cannot save stripes to buffer\n"); goto abort; } /* Convert data to destination format and store it @@ -10568,14 +11735,12 @@ static int imsm_manage_reshape( */ if (save_backup_imsm(st, dev, sra, buf + start_buf_shift, copy_length)) { - dprintf("imsm: Cannot save stripes to " - "target devices\n"); + dprintf("imsm: Cannot save stripes to target devices\n"); goto abort; } if (save_checkpoint_imsm(st, sra, UNIT_SRC_IN_CP_AREA)) { - dprintf("imsm: Cannot write checkpoint to " - "migration record (UNIT_SRC_IN_CP_AREA)\n"); + dprintf("imsm: Cannot write checkpoint to migration record (UNIT_SRC_IN_CP_AREA)\n"); goto abort; } } else { @@ -10596,7 +11761,7 @@ static int imsm_manage_reshape( sra->reshape_progress = next_step; /* wait until reshape finish */ - if (wait_for_reshape_imsm(sra, ndata) < 0) { + if (wait_for_reshape_imsm(sra, ndata)) { dprintf("wait_for_reshape_imsm returned error!\n"); goto abort; } @@ -10606,26 +11771,44 @@ static int imsm_manage_reshape( if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL) == 1) { /* ignore error == 2, this can mean end of reshape here */ - dprintf("imsm: Cannot write checkpoint to " - "migration record (UNIT_SRC_NORMAL)\n"); + dprintf("imsm: Cannot write checkpoint to migration record (UNIT_SRC_NORMAL)\n"); goto abort; } } + /* clear migr_rec on disks after successful migration */ + struct dl *d; + + memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*sector_size); + for (d = super->disks; d; d = d->next) { + if (d->index < 0 || is_failed(&d->disk)) + continue; + unsigned long long dsize; + + get_dev_size(d->fd, NULL, &dsize); + if (lseek64(d->fd, dsize - MIGR_REC_SECTOR_POSITION*sector_size, + SEEK_SET) >= 0) { + if ((unsigned int)write(d->fd, super->migr_rec_buf, + MIGR_REC_BUF_SECTORS*sector_size) != + MIGR_REC_BUF_SECTORS*sector_size) + perror("Write migr_rec failed"); + } + } + /* return '1' if done */ ret_val = 1; abort: free(buf); - abort_reshape(sra); + /* See Grow.c: abort_reshape() for further explanation */ + sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL); + sysfs_set_num(sra, NULL, "suspend_hi", 0); + sysfs_set_num(sra, NULL, "suspend_lo", 0); return ret_val; } -#endif /* MDASSEMBLE */ - struct superswitch super_imsm = { -#ifndef MDASSEMBLE .examine_super = examine_super_imsm, .brief_examine_super = brief_examine_super_imsm, .brief_examine_subarrays = brief_examine_subarrays_imsm, @@ -10647,7 +11830,7 @@ struct superswitch super_imsm = { .manage_reshape = imsm_manage_reshape, .recover_backup = recover_backup_imsm, .copy_metadata = copy_metadata_imsm, -#endif + .examine_badblocks = examine_badblocks_imsm, .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm, .getinfo_super = getinfo_super_imsm, @@ -10667,10 +11850,12 @@ struct superswitch super_imsm = { .container_content = container_content_imsm, .validate_container = validate_container_imsm, + .write_init_ppl = write_init_ppl_imsm, + .validate_ppl = validate_ppl_imsm, + .external = 1, .name = "imsm", -#ifndef MDASSEMBLE /* for mdmon */ .open_new = imsm_open_new, .set_array_state= imsm_set_array_state, @@ -10679,5 +11864,7 @@ struct superswitch super_imsm = { .activate_spare = imsm_activate_spare, .process_update = imsm_process_update, .prepare_update = imsm_prepare_update, -#endif /* MDASSEMBLE */ + .record_bad_block = imsm_record_badblock, + .clear_bad_block = imsm_clear_badblock, + .get_bad_blocks = imsm_get_badblocks, };