#define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE)
#define MPB_SECTOR_CNT 2210
-#define IMSM_RESERVED_SECTORS 4096
-#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056
+#define IMSM_RESERVED_SECTORS 8192
+#define NUM_BLOCKS_DIRTY_STRIPE_REGION 2048
#define SECT_PER_MB_SHIFT 11
#define MAX_SECTOR_SIZE 4096
+#define MULTIPLE_PPL_AREA_SIZE_IMSM (1024 * 1024) /* Size of the whole
+ * mutliple PPL area
+ */
/* Disk configuration info. */
#define IMSM_MAX_DEVICES 255
#define RWH_OFF 0
#define RWH_DISTRIBUTED 1
#define RWH_JOURNALING_DRIVE 2
+#define RWH_MULTIPLE_DISTRIBUTED 3
+#define RWH_MULTIPLE_PPLS_JOURNALING_DRIVE 4
+#define RWH_MULTIPLE_OFF 5
__u8 rwh_policy; /* Raid Write Hole Policy */
__u8 jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
__u8 filler1;
__u32 orig_family_num; /* 0x40 - 0x43 original family num */
__u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
__u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
-#define IMSM_FILLERS 35
- __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
+ __u16 num_raid_devs_created; /* 0x4C - 0x4D Used for generating unique
+ * volume IDs for raid_dev created in this array
+ * (starts at 1)
+ */
+ __u16 filler1; /* 0x4E - 0x4F */
+#define IMSM_FILLERS 34
+ __u32 filler[IMSM_FILLERS]; /* 0x50 - 0xD7 RAID_MPB_FILLERS */
struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
/* here comes imsm_dev[num_raid_devs] */
/* here comes BBM logs */
struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES];
} __attribute__ ((__packed__));
-#ifndef MDASSEMBLE
static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
-#endif
+
+#define BLOCKS_PER_KB (1024/512)
#define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
* already been migrated and must
* be recovered from checkpoint area */
-#define PPL_ENTRY_SPACE (128 * 1024) /* Size of the PPL, without the header */
+#define PPL_ENTRY_SPACE (128 * 1024) /* Size of single PPL, without the header */
struct migr_record {
__u32 rec_status; /* Status used to determine how to restart
update_general_migration_checkpoint,
update_size_change,
update_prealloc_badblocks_mem,
+ update_rwh_policy,
};
struct imsm_update_activate_spare {
enum imsm_update_type type;
};
+struct imsm_update_rwh_policy {
+ enum imsm_update_type type;
+ int new_policy;
+ int dev_idx;
+};
+
static const char *_sys_dev_type[] = {
[SYS_DEV_UNKNOWN] = "Unknown",
[SYS_DEV_SAS] = "SAS",
return st;
}
-#ifndef MDASSEMBLE
static __u8 *get_imsm_version(struct imsm_super *mpb)
{
return &mpb->sig[MPB_SIG_LEN];
}
-#endif
/* retrieve a disk directly from the anchor when the anchor is known to be
* up-to-date, currently only at load time
return size;
}
-#ifndef MDASSEMBLE
/* retrieve disk serial number list from a metadata update */
static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
{
return inf;
}
-#endif
static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
{
return addr;
}
-#ifndef MDASSEMBLE
/* get size of the bbm log */
static __u32 get_imsm_bbm_log_size(struct bbm_log *log)
{
return 1;
}
-#endif /* MDASSEMBLE */
/* allocate and load BBM log from metadata */
static int load_bbm_log(struct intel_super *super)
return (disk->status & JOURNAL_DISK) == JOURNAL_DISK;
}
+/* round array size down to closest MB and ensure it splits evenly
+ * between members
+ */
+static unsigned long long round_size_to_mb(unsigned long long size, unsigned int
+ disk_count)
+{
+ size /= disk_count;
+ size = (size >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+ size *= disk_count;
+
+ return size;
+}
+
+static int able_to_resync(int raid_level, int missing_disks)
+{
+ int max_missing_disks = 0;
+
+ switch (raid_level) {
+ case 10:
+ max_missing_disks = 1;
+ break;
+ default:
+ max_missing_disks = 0;
+ }
+ return missing_disks <= max_missing_disks;
+}
+
/* try to determine how much space is reserved for metadata from
* the last get_extents() entry on the smallest active disk,
* otherwise fallback to the default
return (remainder < rv) ? remainder : rv;
}
-/* Return minimum size of a spare that can be used in this array*/
-static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
+/*
+ * Return minimum size of a spare and sector size
+ * that can be used in this array
+ */
+int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c)
{
struct intel_super *super = st->sb;
struct dl *dl;
struct extent *e;
int i;
- unsigned long long rv = 0;
+ unsigned long long size = 0;
+
+ c->min_size = 0;
+ c->sector_size = 0;
if (!super)
- return rv;
+ return -EINVAL;
/* find first active disk in array */
dl = super->disks;
while (dl && (is_failed(&dl->disk) || dl->index == -1))
dl = dl->next;
if (!dl)
- return rv;
+ return -EINVAL;
/* find last lba used by subarrays */
e = get_extents(super, dl);
if (!e)
- return rv;
+ return -EINVAL;
for (i = 0; e[i].size; i++)
continue;
if (i > 0)
- rv = e[i-1].start + e[i-1].size;
+ size = e[i-1].start + e[i-1].size;
free(e);
/* add the amount of space needed for metadata */
- rv = rv + imsm_min_reserved_sectors(super);
+ size += imsm_min_reserved_sectors(super);
+
+ c->min_size = size * 512;
+ c->sector_size = super->sector_size;
- return rv * 512;
+ return 0;
}
static int is_gen_migration(struct imsm_dev *dev);
#define IMSM_4K_DIV 8
-#ifndef MDASSEMBLE
static __u64 blocks_per_migr_unit(struct intel_super *super,
struct imsm_dev *dev);
ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
} else
printf(" This Slot : ?\n");
+ printf(" Sector Size : %u\n", super->sector_size);
sz = __le32_to_cpu(dev->size_high);
sz <<= 32;
sz += __le32_to_cpu(dev->size_low);
- printf(" Array Size : %llu%s\n", (unsigned long long)sz,
+ printf(" Array Size : %llu%s\n",
+ (unsigned long long)sz * 512 / super->sector_size,
human_size(sz * 512));
sz = blocks_per_member(map);
- printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
+ printf(" Per Dev Size : %llu%s\n",
+ (unsigned long long)sz * 512 / super->sector_size,
human_size(sz * 512));
printf(" Sector Offset : %llu\n",
pba_of_lba0(map));
printf(" Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ?
"dirty" : "clean");
printf(" RWH Policy : ");
- if (dev->rwh_policy == RWH_OFF)
+ if (dev->rwh_policy == RWH_OFF || dev->rwh_policy == RWH_MULTIPLE_OFF)
printf("off\n");
else if (dev->rwh_policy == RWH_DISTRIBUTED)
printf("PPL distributed\n");
else if (dev->rwh_policy == RWH_JOURNALING_DRIVE)
printf("PPL journaling drive\n");
+ else if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
+ printf("Multiple distributed PPLs\n");
+ else if (dev->rwh_policy == RWH_MULTIPLE_PPLS_JOURNALING_DRIVE)
+ printf("Multiple PPLs on journaling drive\n");
else
printf("<unknown:%d>\n", dev->rwh_policy);
}
break;
}
}
-#endif /* MDASSEMBLE */
void convert_from_4k_imsm_migr_rec(struct intel_super *super)
{
return ret_val;
}
-#ifndef MDASSEMBLE
static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
static void examine_super_imsm(struct supertype *st, char *homehost)
printf(" Platform : Intel(R) ");
if (orom->capabilities == 0 && orom->driver_features == 0)
printf("Matrix Storage Manager\n");
+ else if (imsm_orom_is_enterprise(orom) && orom->major_ver >= 6)
+ printf("Virtual RAID on CPU\n");
else
printf("Rapid Storage Technology%s\n",
imsm_orom_is_enterprise(orom) ? " enterprise" : "");
return result;
}
-#endif
-
static int match_home_imsm(struct supertype *st, char *homehost)
{
/* the imsm metadata format does not specify any host
return retval;
}
-#ifndef MDASSEMBLE
/*******************************************************************************
* function: imsm_create_metadata_checkpoint_update
* Description: It creates update for checkpoint change.
close(fd);
return retval;
}
-#endif /* MDASSEMBLE */
/* spare/missing disks activations are not allowe when
* array/container performs reshape operation, because
memset(info->uuid, 0, sizeof(info->uuid));
info->recovery_start = MaxSector;
- if (info->array.level == 5 && dev->rwh_policy == RWH_DISTRIBUTED) {
+ if (info->array.level == 5 &&
+ (dev->rwh_policy == RWH_DISTRIBUTED ||
+ dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)) {
info->consistency_policy = CONSISTENCY_POLICY_PPL;
info->ppl_sector = get_ppl_sector(super, super->current_vol);
- info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
+ if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
+ info->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9;
+ else
+ info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE)
+ >> 9;
} else if (info->array.level <= 0) {
info->consistency_policy = CONSISTENCY_POLICY_NONE;
} else {
if (used_disks > 0) {
array_blocks = blocks_per_member(map) *
used_disks;
- /* round array size down to closest MB
- */
- info->custom_array_size = (array_blocks
- >> SECT_PER_MB_SHIFT)
- << SECT_PER_MB_SHIFT;
+ info->custom_array_size =
+ round_size_to_mb(array_blocks,
+ used_disks);
+
}
}
case MIGR_VERIFY:
static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
int look_in_map);
-#ifndef MDASSEMBLE
static void manage_second_map(struct intel_super *super, struct imsm_dev *dev)
{
if (is_gen_migration(dev)) {
}
}
}
-#endif
static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
{
__u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0);
__u32 idx = ord_to_idx(ord);
+ if (super->disks && super->disks->index == (int)idx)
+ info->disk.raid_disk = j;
+
if (!(ord & IMSM_ORD_REBUILD) &&
get_imsm_missing(super, idx)) {
missing = 1;
return 0;
}
-#ifndef MDASSEMBLE
/* When migrating map0 contains the 'destination' state while map1
* contains the current state. When not migrating map0 contains the
* current state. This routine assumes that map[0].map_state is set to
/* duplicate and then set the target end state in map[0] */
memcpy(dest, src, sizeof_imsm_map(src));
- if (migr_type == MIGR_REBUILD || migr_type == MIGR_GEN_MIGR) {
+ if (migr_type == MIGR_GEN_MIGR) {
__u32 ord;
int i;
dev->vol.curr_migr_unit = 0;
map->map_state = map_state;
}
-#endif
static int parse_raid_devices(struct intel_super *super)
{
if (pba_of_lba0(map0) != pba_of_lba0(map1))
/* migration optimization area was used */
return -1;
- if (migr_rec->ascending_migr == 0
- && migr_rec->dest_depth_per_unit > 0)
+ if (migr_rec->ascending_migr == 0 &&
+ migr_rec->dest_depth_per_unit > 0)
/* descending reshape not supported yet */
return -1;
}
sectors = mpb_sectors(anchor, sector_size) - 1;
free(anchor);
- if (posix_memalign(&super->migr_rec_buf, sector_size,
- MIGR_REC_BUF_SECTORS*sector_size) != 0) {
+ if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE,
+ MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE) != 0) {
pr_err("could not allocate migr_rec buffer\n");
free(super->buf);
return 2;
hba = hba->next;
}
fprintf(stderr, ").\n"
- " Mixing devices attached to different %s is not allowed.\n",
- hba_name->type == SYS_DEV_VMD ? "VMD domains" : "controllers");
+ " Mixing devices attached to different controllers is not allowed.\n");
}
return 2;
}
return 0;
}
-#ifndef MDASSEMBLE
static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
{
struct intel_disk *idisk = disk_list;
{
return load_super_imsm_all(st, fd, &st->sb, devname, NULL, 1);
}
-#endif
static int load_super_imsm(struct supertype *st, int fd, char *devname)
{
unsigned long long array_blocks;
size_t size_old, size_new;
unsigned long long num_data_stripes;
+ unsigned int data_disks;
+ unsigned long long size_per_member;
if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
pr_err("This imsm-container already has the maximum of %d volumes\n", super->orom->vpa);
pr_err("could not allocate new mpb\n");
return 0;
}
- if (posix_memalign(&super->migr_rec_buf, sector_size,
- MIGR_REC_BUF_SECTORS*sector_size) != 0) {
+ if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE,
+ MIGR_REC_BUF_SECTORS*
+ MAX_SECTOR_SIZE) != 0) {
pr_err("could not allocate migr_rec buffer\n");
free(super->buf);
free(super);
strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
array_blocks = calc_array_size(info->level, info->raid_disks,
info->layout, info->chunk_size,
- s->size * 2);
- /* round array size down to closest MB */
- array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+ s->size * BLOCKS_PER_KB);
+ data_disks = get_data_disks(info->level, info->layout,
+ info->raid_disks);
+ array_blocks = round_size_to_mb(array_blocks, data_disks);
+ size_per_member = array_blocks / data_disks;
dev->size_low = __cpu_to_le32((__u32) array_blocks);
dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
vol->curr_migr_unit = 0;
map = get_imsm_map(dev, MAP_0);
set_pba_of_lba0(map, super->create_offset);
- set_blocks_per_member(map, info_to_blocks_per_member(info, s->size));
+ set_blocks_per_member(map, info_to_blocks_per_member(info,
+ size_per_member /
+ BLOCKS_PER_KB));
map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
map->failed_disk_num = ~0;
if (info->level > 0)
map->num_domains = 1;
/* info->size is only int so use the 'size' parameter instead */
- num_data_stripes = (s->size * 2) / info_to_blocks_per_strip(info);
+ num_data_stripes = size_per_member / info_to_blocks_per_strip(info);
num_data_stripes /= map->num_domains;
set_num_data_stripes(map, num_data_stripes);
set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
}
mpb->num_raid_devs++;
+ mpb->num_raid_devs_created++;
+ dev->my_vol_raid_dev_num = mpb->num_raid_devs_created;
- if (s->consistency_policy == UnSet ||
- s->consistency_policy == CONSISTENCY_POLICY_RESYNC ||
- s->consistency_policy == CONSISTENCY_POLICY_NONE) {
- dev->rwh_policy = RWH_OFF;
+ if (s->consistency_policy <= CONSISTENCY_POLICY_RESYNC) {
+ dev->rwh_policy = RWH_MULTIPLE_OFF;
} else if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
- dev->rwh_policy = RWH_DISTRIBUTED;
+ dev->rwh_policy = RWH_MULTIPLE_DISTRIBUTED;
} else {
free(dev);
free(dv);
return 1;
}
-#ifndef MDASSEMBLE
+static int drive_validate_sector_size(struct intel_super *super, struct dl *dl)
+{
+ unsigned int member_sector_size;
+
+ if (dl->fd < 0) {
+ pr_err("Invalid file descriptor for %s\n", dl->devname);
+ return 0;
+ }
+
+ if (!get_dev_sector_size(dl->fd, dl->devname, &member_sector_size))
+ return 0;
+ if (member_sector_size != super->sector_size)
+ return 0;
+ return 1;
+}
+
static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
int fd, char *devname)
{
return 1;
}
+ if (!drive_validate_sector_size(super, dl)) {
+ pr_err("Combining drives of different sector size in one volume is not allowed\n");
+ return 1;
+ }
+
/* add a pristine spare to the metadata */
if (dl->index < 0) {
dl->index = super->anchor->num_disks;
} else if (super->hba->type == SYS_DEV_VMD && super->orom &&
!imsm_orom_has_tpv_support(super->orom)) {
pr_err("\tPlatform configuration does not support non-Intel NVMe drives.\n"
- "\tPlease refer to Intel(R) RSTe user guide.\n");
+ "\tPlease refer to Intel(R) RSTe/VROC user guide.\n");
free(dd->devname);
free(dd);
return 1;
if (super->sector_size == 0) {
/* this a first device, so sector_size is not set yet */
super->sector_size = member_sector_size;
- } else if (member_sector_size != super->sector_size) {
- pr_err("Mixing between different sector size is forbidden, aborting...\n");
- if (dd->devname)
- free(dd->devname);
- free(dd);
- return 1;
}
/* clear migr_rec when adding disk to container */
- memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*super->sector_size);
- if (lseek64(fd, size - MIGR_REC_SECTOR_POSITION*super->sector_size,
+ memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
+ if (lseek64(fd, size - MIGR_REC_SECTOR_POSITION*member_sector_size,
SEEK_SET) >= 0) {
if ((unsigned int)write(fd, super->migr_rec_buf,
- MIGR_REC_BUF_SECTORS*super->sector_size) !=
- MIGR_REC_BUF_SECTORS*super->sector_size)
+ MIGR_REC_BUF_SECTORS*member_sector_size) !=
+ MIGR_REC_BUF_SECTORS*member_sector_size)
perror("Write migr_rec failed");
}
}
if (clear_migration_record)
memset(super->migr_rec_buf, 0,
- MIGR_REC_BUF_SECTORS*sector_size);
+ MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
if (sector_size == 4096)
convert_to_4k(super);
return 0;
}
-#endif
__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len);
+static int write_ppl_header(unsigned long long ppl_sector, int fd, void *buf)
+{
+ struct ppl_header *ppl_hdr = buf;
+ int ret;
+
+ ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
+
+ if (lseek64(fd, ppl_sector * 512, SEEK_SET) < 0) {
+ ret = -errno;
+ perror("Failed to seek to PPL header location");
+ return ret;
+ }
+
+ if (write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+ ret = -errno;
+ perror("Write PPL header failed");
+ return ret;
+ }
+
+ fsync(fd);
+
+ return 0;
+}
+
static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd)
{
struct intel_super *super = st->sb;
struct ppl_header *ppl_hdr;
int ret;
- ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE);
+ /* first clear entire ppl space */
+ ret = zero_disk_range(fd, info->ppl_sector, info->ppl_size);
+ if (ret)
+ return ret;
+
+ ret = posix_memalign(&buf, MAX_SECTOR_SIZE, PPL_HEADER_SIZE);
if (ret) {
pr_err("Failed to allocate PPL header buffer\n");
- return ret;
+ return -ret;
}
memset(buf, 0, PPL_HEADER_SIZE);
ppl_hdr = buf;
memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
ppl_hdr->signature = __cpu_to_le32(super->anchor->orig_family_num);
- ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
- if (lseek64(fd, info->ppl_sector * 512, SEEK_SET) < 0) {
- ret = errno;
- perror("Failed to seek to PPL header location");
- }
-
- if (!ret && write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
- ret = errno;
- perror("Write PPL header failed");
+ if (info->mismatch_cnt) {
+ /*
+ * We are overwriting an invalid ppl. Make one entry with wrong
+ * checksum to prevent the kernel from skipping resync.
+ */
+ ppl_hdr->entries_count = __cpu_to_le32(1);
+ ppl_hdr->entries[0].checksum = ~0;
}
- if (!ret)
- fsync(fd);
+ ret = write_ppl_header(info->ppl_sector, fd, buf);
free(buf);
return ret;
}
+static int is_rebuilding(struct imsm_dev *dev);
+
static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
struct mdinfo *disk)
{
struct intel_super *super = st->sb;
struct dl *d;
- void *buf;
+ void *buf_orig, *buf, *buf_prev = NULL;
int ret = 0;
- struct ppl_header *ppl_hdr;
+ struct ppl_header *ppl_hdr = NULL;
__u32 crc;
struct imsm_dev *dev;
- struct imsm_map *map;
__u32 idx;
+ unsigned int i;
+ unsigned long long ppl_offset = 0;
+ unsigned long long prev_gen_num = 0;
if (disk->disk.raid_disk < 0)
return 0;
- if (posix_memalign(&buf, 4096, PPL_HEADER_SIZE)) {
+ dev = get_imsm_dev(super, info->container_member);
+ idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_0);
+ d = get_imsm_dl_disk(super, idx);
+
+ if (!d || d->index < 0 || is_failed(&d->disk))
+ return 0;
+
+ if (posix_memalign(&buf_orig, MAX_SECTOR_SIZE, PPL_HEADER_SIZE * 2)) {
pr_err("Failed to allocate PPL header buffer\n");
return -1;
}
+ buf = buf_orig;
- dev = get_imsm_dev(super, info->container_member);
- map = get_imsm_map(dev, MAP_X);
- idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_X);
- d = get_imsm_dl_disk(super, idx);
+ ret = 1;
+ while (ppl_offset < MULTIPLE_PPL_AREA_SIZE_IMSM) {
+ void *tmp;
- if (!d || d->index < 0 || is_failed(&d->disk))
- goto out;
+ dprintf("Checking potential PPL at offset: %llu\n", ppl_offset);
- if (lseek64(d->fd, info->ppl_sector * 512, SEEK_SET) < 0) {
- perror("Failed to seek to PPL header location");
- ret = -1;
- goto out;
+ if (lseek64(d->fd, info->ppl_sector * 512 + ppl_offset,
+ SEEK_SET) < 0) {
+ perror("Failed to seek to PPL header location");
+ ret = -1;
+ break;
+ }
+
+ if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+ perror("Read PPL header failed");
+ ret = -1;
+ break;
+ }
+
+ ppl_hdr = buf;
+
+ crc = __le32_to_cpu(ppl_hdr->checksum);
+ ppl_hdr->checksum = 0;
+
+ if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
+ dprintf("Wrong PPL header checksum on %s\n",
+ d->devname);
+ break;
+ }
+
+ if (prev_gen_num > __le64_to_cpu(ppl_hdr->generation)) {
+ /* previous was newest, it was already checked */
+ break;
+ }
+
+ if ((__le32_to_cpu(ppl_hdr->signature) !=
+ super->anchor->orig_family_num)) {
+ dprintf("Wrong PPL header signature on %s\n",
+ d->devname);
+ ret = 1;
+ break;
+ }
+
+ ret = 0;
+ prev_gen_num = __le64_to_cpu(ppl_hdr->generation);
+
+ ppl_offset += PPL_HEADER_SIZE;
+ for (i = 0; i < __le32_to_cpu(ppl_hdr->entries_count); i++)
+ ppl_offset +=
+ __le32_to_cpu(ppl_hdr->entries[i].pp_size);
+
+ if (!buf_prev)
+ buf_prev = buf + PPL_HEADER_SIZE;
+ tmp = buf_prev;
+ buf_prev = buf;
+ buf = tmp;
}
- if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
- perror("Read PPL header failed");
- ret = -1;
- goto out;
+ if (buf_prev) {
+ buf = buf_prev;
+ ppl_hdr = buf_prev;
}
- ppl_hdr = buf;
+ /*
+ * Update metadata to use mutliple PPLs area (1MB).
+ * This is done once for all RAID members
+ */
+ if (info->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ info->ppl_size != (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9)) {
+ char subarray[20];
+ struct mdinfo *member_dev;
- crc = __le32_to_cpu(ppl_hdr->checksum);
- ppl_hdr->checksum = 0;
+ sprintf(subarray, "%d", info->container_member);
- if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
- dprintf("Wrong PPL header checksum on %s\n",
- d->devname);
- ret = 1;
- }
+ if (mdmon_running(st->container_devnm))
+ st->update_tail = &st->updates;
- if (!ret && (__le32_to_cpu(ppl_hdr->signature) !=
- super->anchor->orig_family_num)) {
- dprintf("Wrong PPL header signature on %s\n",
- d->devname);
- ret = 1;
+ if (st->ss->update_subarray(st, subarray, "ppl", NULL)) {
+ pr_err("Failed to update subarray %s\n",
+ subarray);
+ } else {
+ if (st->update_tail)
+ flush_metadata_updates(st);
+ else
+ st->ss->sync_metadata(st);
+ info->ppl_size = (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9);
+ for (member_dev = info->devs; member_dev;
+ member_dev = member_dev->next)
+ member_dev->ppl_size =
+ (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9);
+ }
}
-out:
- free(buf);
+ if (ret == 1) {
+ struct imsm_map *map = get_imsm_map(dev, MAP_X);
- if (ret == 1 && map->map_state == IMSM_T_STATE_UNINITIALIZED)
- return st->ss->write_init_ppl(st, info, d->fd);
+ if (map->map_state == IMSM_T_STATE_UNINITIALIZED ||
+ (map->map_state == IMSM_T_STATE_NORMAL &&
+ !(dev->vol.dirty & RAIDVOL_DIRTY)) ||
+ (is_rebuilding(dev) &&
+ dev->vol.curr_migr_unit == 0 &&
+ get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_1) != idx))
+ ret = st->ss->write_init_ppl(st, info, d->fd);
+ else
+ info->mismatch_cnt++;
+ } else if (ret == 0 &&
+ ppl_hdr->entries_count == 0 &&
+ is_rebuilding(dev) &&
+ info->resync_start == 0) {
+ /*
+ * The header has no entries - add a single empty entry and
+ * rewrite the header to prevent the kernel from going into
+ * resync after an interrupted rebuild.
+ */
+ ppl_hdr->entries_count = __cpu_to_le32(1);
+ ret = write_ppl_header(info->ppl_sector, d->fd, buf);
+ }
+
+ free(buf_orig);
return ret;
}
-#ifndef MDASSEMBLE
-
static int write_init_ppl_imsm_all(struct supertype *st, struct mdinfo *info)
{
struct intel_super *super = st->sb;
return rv;
}
-#endif
static int store_super_imsm(struct supertype *st, int fd)
{
if (!mpb)
return 1;
-#ifndef MDASSEMBLE
if (super->sector_size == 4096)
convert_to_4k(super);
return store_imsm_mpb(fd, mpb);
-#else
- return 1;
-#endif
}
-#ifndef MDASSEMBLE
static int validate_geometry_imsm_container(struct supertype *st, int level,
int layout, int raiddisks, int chunk,
unsigned long long size,
for (memb = mdstat ; memb ; memb = memb->next) {
if (memb->metadata_version &&
- (strncmp(memb->metadata_version, "external:", 9) == 0) &&
+ (strncmp(memb->metadata_version, "external:", 9) == 0) &&
(strcmp(&memb->metadata_version[9], name) == 0) &&
!is_subarray(memb->metadata_version+9) &&
memb->members) {
for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
char *devname = tmpdev->devname;
- struct stat stb;
+ dev_t rdev;
struct supertype *tst;
int dfd;
if (tmpdev->used > 1)
dprintf("cannot open device %s: %s\n",
devname, strerror(errno));
tmpdev->used = 2;
- } else if (fstat(dfd, &stb)< 0) {
- /* Impossible! */
- dprintf("fstat failed for %s: %s\n",
- devname, strerror(errno));
- tmpdev->used = 2;
- } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
- dprintf("%s is not a block device.\n",
- devname);
+ } else if (!fstat_is_blkdev(dfd, devname, &rdev)) {
tmpdev->used = 2;
} else if (must_be_container(dfd)) {
struct supertype *cst;
if (cst)
cst->ss->free_super(cst);
} else {
- tmpdev->st_rdev = stb.st_rdev;
+ tmpdev->st_rdev = rdev;
if (tst->ss->load_super(tst,dfd, NULL)) {
dprintf("no RAID superblock on %s\n",
devname);
unsigned long long *freesize,
int verbose)
{
- struct stat stb;
+ dev_t rdev;
struct intel_super *super = st->sb;
struct imsm_super *mpb;
struct dl *dl;
}
/* This device must be a member of the set */
- if (stat(dev, &stb) < 0)
- return 0;
- if ((S_IFMT & stb.st_mode) != S_IFBLK)
+ if (!stat_is_blkdev(dev, &rdev))
return 0;
for (dl = super->disks ; dl ; dl = dl->next) {
- if (dl->major == (int)major(stb.st_rdev) &&
- dl->minor == (int)minor(stb.st_rdev))
+ if (dl->major == (int)major(rdev) &&
+ dl->minor == (int)minor(rdev))
break;
}
if (!dl) {
}
super->updates_pending++;
}
+ } else if (strcmp(update, "ppl") == 0 ||
+ strcmp(update, "no-ppl") == 0) {
+ int new_policy;
+ char *ep;
+ int vol = strtoul(subarray, &ep, 10);
+
+ if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
+ return 2;
+
+ if (strcmp(update, "ppl") == 0)
+ new_policy = RWH_MULTIPLE_DISTRIBUTED;
+ else
+ new_policy = RWH_MULTIPLE_OFF;
+
+ if (st->update_tail) {
+ struct imsm_update_rwh_policy *u = xmalloc(sizeof(*u));
+
+ u->type = update_rwh_policy;
+ u->dev_idx = vol;
+ u->new_policy = new_policy;
+ append_metadata_update(st, u, sizeof(*u));
+ } else {
+ struct imsm_dev *dev;
+
+ dev = get_imsm_dev(super, vol);
+ dev->rwh_policy = new_policy;
+ super->updates_pending++;
+ }
} else
return 2;
return 0;
}
-#endif /* MDASSEMBLE */
static int is_gen_migration(struct imsm_dev *dev)
{
return 0;
}
-#ifndef MDASSEMBLE
static int is_initializing(struct imsm_dev *dev)
{
struct imsm_map *migr_map;
return 0;
}
-#endif
static void update_recovery_start(struct intel_super *super,
struct imsm_dev *dev,
rebuild->recovery_start = units * blocks_per_migr_unit(super, dev);
}
-#ifndef MDASSEMBLE
static int recover_backup_imsm(struct supertype *st, struct mdinfo *info);
-#endif
static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
{
struct imsm_map *map2;
struct mdinfo *this;
int slot;
-#ifndef MDASSEMBLE
int chunk;
-#endif
char *ep;
+ int level;
if (subarray &&
(i != strtoul(subarray, &ep, 10) || *ep != '\0'))
dev = get_imsm_dev(super, i);
map = get_imsm_map(dev, MAP_0);
map2 = get_imsm_map(dev, MAP_1);
+ level = get_imsm_raid_level(map);
/* do not publish arrays that are in the middle of an
* unsupported migration
super->current_vol = i;
getinfo_super_imsm_volume(st, this, NULL);
this->next = rest;
-#ifndef MDASSEMBLE
chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
/* mdadm does not support all metadata features- set the bit in all arrays state */
if (!validate_geometry_imsm_orom(super,
- get_imsm_raid_level(map), /* RAID level */
- imsm_level_to_layout(get_imsm_raid_level(map)),
+ level, /* RAID level */
+ imsm_level_to_layout(level),
map->num_members, /* raid disks */
&chunk, join_u32(dev->size_low, dev->size_high),
1 /* verbose */)) {
(1<<MD_SB_BLOCK_CONTAINER_RESHAPE) |
(1<<MD_SB_BLOCK_VOLUME);
}
-#endif
/* if array has bad blocks, set suitable bit in all arrays state */
if (sb_errors)
int idx;
int skip;
__u32 ord;
+ int missing = 0;
skip = 0;
idx = get_imsm_disk_idx(dev, slot, MAP_0);
skip = 1;
if (d && is_failed(&d->disk))
skip = 1;
- if (ord & IMSM_ORD_REBUILD)
+ if (!skip && (ord & IMSM_ORD_REBUILD))
recovery_start = 0;
/*
* if we skip some disks the array will be assmebled degraded;
* reset resync start to avoid a dirty-degraded
* situation when performing the intial sync
- *
- * FIXME handle dirty degraded
*/
- if ((skip || recovery_start == 0) &&
- !(dev->vol.dirty & RAIDVOL_DIRTY))
- this->resync_start = MaxSector;
+ if (skip)
+ missing++;
+
+ if (!(dev->vol.dirty & RAIDVOL_DIRTY)) {
+ if ((!able_to_resync(level, missing) ||
+ recovery_start == 0))
+ this->resync_start = MaxSector;
+ } else {
+ /*
+ * FIXME handle dirty degraded
+ */
+ }
+
if (skip)
continue;
map->blocks_per_strip;
info_d->ppl_sector = this->ppl_sector;
info_d->ppl_size = this->ppl_size;
+ if (this->consistency_policy == CONSISTENCY_POLICY_PPL &&
+ recovery_start == 0)
+ this->resync_start = 0;
} else {
info_d->component_size = blocks_per_member(map);
}
- info_d->consistency_policy = this->consistency_policy;
info_d->bb.supported = 1;
get_volume_badblocks(super->bbm_log, ord_to_idx(ord),
update_recovery_start(super, dev, this);
this->array.spare_disks += spare_disks;
-#ifndef MDASSEMBLE
/* check for reshape */
if (this->reshape_active == 1)
recover_backup_imsm(st, this);
-#endif
rest = this;
}
return failed;
}
-#ifndef MDASSEMBLE
static int imsm_open_new(struct supertype *c, struct active_array *a,
char *inst)
{
/* end process for initialization and rebuild only
*/
if (is_gen_migration(dev) == 0) {
- __u8 map_state;
- int failed;
+ int failed = imsm_count_failed(super, dev, MAP_0);
- failed = imsm_count_failed(super, dev, MAP_0);
- map_state = imsm_check_degraded(super, dev, failed, MAP_0);
+ if (failed) {
+ __u8 map_state;
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
+ struct imsm_map *map1;
+ int i, ord, ord_map1;
+ int rebuilt = 1;
- if (failed)
- end_migration(dev, super, map_state);
+ for (i = 0; i < map->num_members; i++) {
+ ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
+ if (!(ord & IMSM_ORD_REBUILD))
+ continue;
+
+ map1 = get_imsm_map(dev, MAP_1);
+ if (!map1)
+ continue;
+
+ ord_map1 = __le32_to_cpu(map1->disk_ord_tbl[i]);
+ if (ord_map1 & IMSM_ORD_REBUILD)
+ rebuilt = 0;
+ }
+
+ if (rebuilt) {
+ map_state = imsm_check_degraded(super, dev,
+ failed, MAP_0);
+ end_migration(dev, super, map_state);
+ }
+ }
}
for (dl = super->missing; dl; dl = dl->next)
mark_missing(super, dev, &dl->disk, dl->index);
array_blocks = new_size;
}
- /* round array size down to closest MB
- */
- array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+ array_blocks = round_size_to_mb(array_blocks, used_disks);
dev->size_low = __cpu_to_le32((__u32)array_blocks);
dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
array_blocks =
blocks_per_member(map) *
used_disks;
- /* round array size down to closest MB
- */
- array_blocks = (array_blocks
- >> SECT_PER_MB_SHIFT)
- << SECT_PER_MB_SHIFT;
+ array_blocks =
+ round_size_to_mb(array_blocks,
+ used_disks);
a->info.custom_array_size = array_blocks;
/* encourage manager to update array
* size
dev->vol.dirty = RAIDVOL_CLEAN;
} else {
dev->vol.dirty = RAIDVOL_DIRTY;
- if (dev->rwh_policy == RWH_DISTRIBUTED)
+ if (dev->rwh_policy == RWH_DISTRIBUTED ||
+ dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
dev->vol.dirty |= RAIDVOL_DSRECORD_VALID;
}
super->updates_pending++;
int failed;
int ord;
__u8 map_state;
+ int rebuild_done = 0;
+ int i;
- ord = imsm_disk_slot_to_ord(a, n);
+ ord = get_imsm_ord_tbl_ent(dev, n, MAP_X);
if (ord < 0)
return;
struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
+ rebuild_done = 1;
super->updates_pending++;
}
dprintf_cont(" Map state change");
end_migration(dev, super, map_state);
super->updates_pending++;
+ } else if (!rebuild_done) {
+ break;
}
+
+ /* check if recovery is really finished */
+ for (mdi = a->info.devs; mdi ; mdi = mdi->next)
+ if (mdi->recovery_start != MaxSector) {
+ recovery_not_finished = 1;
+ break;
+ }
+ if (recovery_not_finished) {
+ dprintf_cont("\n");
+ dprintf("Rebuild has not finished yet, state not changed");
+ if (a->last_checkpoint < mdi->recovery_start) {
+ a->last_checkpoint =
+ mdi->recovery_start;
+ super->updates_pending++;
+ }
+ break;
+ }
+
+ dprintf_cont(" Rebuild done, still degraded");
+ dev->vol.migr_state = 0;
+ set_migr_type(dev, 0);
+ dev->vol.curr_migr_unit = 0;
+
+ for (i = 0; i < map->num_members; i++) {
+ int idx = get_imsm_ord_tbl_ent(dev, i, MAP_0);
+
+ if (idx & IMSM_ORD_REBUILD)
+ map->failed_disk_num = i;
+ }
+ super->updates_pending++;
break;
}
if (is_gen_migration(dev)) {
if (dl->index == -1 && !activate_new)
continue;
+ if (!drive_validate_sector_size(super, dl))
+ continue;
+
/* Does this unused device have the requisite free space?
* It needs to be able to cover all member volumes
*/
di->component_size = a->info.component_size;
di->container_member = inst;
di->bb.supported = 1;
- if (dev->rwh_policy == RWH_DISTRIBUTED) {
- di->consistency_policy = CONSISTENCY_POLICY_PPL;
+ if (a->info.consistency_policy == CONSISTENCY_POLICY_PPL) {
di->ppl_sector = get_ppl_sector(super, inst);
- di->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
+ di->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9;
}
super->random = random32();
di->next = rv;
}
case update_prealloc_badblocks_mem:
break;
+ case update_rwh_policy: {
+ struct imsm_update_rwh_policy *u = (void *)update->buf;
+ int target = u->dev_idx;
+ struct imsm_dev *dev = get_imsm_dev(super, target);
+ if (!dev) {
+ dprintf("could not find subarray-%d\n", target);
+ break;
+ }
+
+ if (dev->rwh_policy != u->new_policy) {
+ dev->rwh_policy = u->new_policy;
+ super->updates_pending++;
+ }
+ break;
+ }
default:
pr_err("error: unsuported process update type:(type: %d)\n", type);
}
super->extra_space += sizeof(struct bbm_log) -
get_imsm_bbm_log_size(super->bbm_log);
break;
+ case update_rwh_policy: {
+ if (update->len < (int)sizeof(struct imsm_update_rwh_policy))
+ return 0;
+ break;
+ }
default:
return 0;
}
struct imsm_dev *dev;
struct imsm_map *map;
unsigned int i, j, num_members;
- __u32 ord;
+ __u32 ord, ord_map0;
struct bbm_log *log = super->bbm_log;
dprintf("deleting device[%d] from imsm_super\n", index);
* ord-flags to the first map
*/
ord = get_imsm_ord_tbl_ent(dev, j, MAP_X);
+ ord_map0 = get_imsm_ord_tbl_ent(dev, j, MAP_0);
if (ord_to_idx(ord) <= index)
continue;
map = get_imsm_map(dev, MAP_0);
- set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
+ set_imsm_ord_tbl_ent(map, j, ord_map0 - 1);
map = get_imsm_map(dev, MAP_1);
if (map)
set_imsm_ord_tbl_ent(map, j, ord - 1);
__free_imsm_disk(dl);
}
}
-#endif /* MDASSEMBLE */
static void close_targets(int *targets, int new_disks)
{
return 0;
}
-#ifndef MDASSEMBLE
+
/*******************************************************************************
* Function: imsm_record_badblock
* Description: This routine stores new bad block record in BBM log
drv = "isci";
else if (hba && hba->type == SYS_DEV_SATA)
drv = "ahci";
+ else if (hba && hba->type == SYS_DEV_VMD)
+ drv = "vmd";
+ else if (hba && hba->type == SYS_DEV_NVME)
+ drv = "nvme";
else
drv = "unknown";
dprintf("path: %s hba: %s attached: %s\n",
*/
static struct mdinfo *get_spares_for_grow(struct supertype *st)
{
- unsigned long long min_size = min_acceptable_spare_size_imsm(st);
- return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
+ struct spare_criteria sc;
+
+ get_spare_criteria_imsm(st, &sc);
+ return container_choose_spares(st, &sc, NULL, NULL, NULL, 0);
}
/******************************************************************************
*/
spares = get_spares_for_grow(st);
- if (spares == NULL
- || delta_disks > spares->array.spare_disks) {
+ if (spares == NULL || delta_disks > spares->array.spare_disks) {
pr_err("imsm: ERROR: Cannot get spare devices for %s.\n", geo->dev_name);
i = -1;
goto abort;
/* Find volume during the reshape */
for (dv = super->devlist; dv; dv = dv->next) {
- if (dv->dev->vol.migr_type == MIGR_GEN_MIGR
- && dv->dev->vol.migr_state == 1) {
+ if (dv->dev->vol.migr_type == MIGR_GEN_MIGR &&
+ dv->dev->vol.migr_state == 1) {
dev = dv->dev;
migr_vol_qan++;
}
/* clear migr_rec on disks after successful migration */
struct dl *d;
- memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*sector_size);
+ memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
for (d = super->disks; d; d = d->next) {
if (d->index < 0 || is_failed(&d->disk))
continue;
return ret_val;
}
-#endif /* MDASSEMBLE */
-
struct superswitch super_imsm = {
-#ifndef MDASSEMBLE
.examine_super = examine_super_imsm,
.brief_examine_super = brief_examine_super_imsm,
.brief_examine_subarrays = brief_examine_subarrays_imsm,
.recover_backup = recover_backup_imsm,
.copy_metadata = copy_metadata_imsm,
.examine_badblocks = examine_badblocks_imsm,
-#endif
.match_home = match_home_imsm,
.uuid_from_super= uuid_from_super_imsm,
.getinfo_super = getinfo_super_imsm,
.update_super = update_super_imsm,
.avail_size = avail_size_imsm,
- .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
+ .get_spare_criteria = get_spare_criteria_imsm,
.compare_super = compare_super_imsm,
.external = 1,
.name = "imsm",
-#ifndef MDASSEMBLE
/* for mdmon */
.open_new = imsm_open_new,
.set_array_state= imsm_set_array_state,
.record_bad_block = imsm_record_badblock,
.clear_bad_block = imsm_clear_badblock,
.get_bad_blocks = imsm_get_badblocks,
-#endif /* MDASSEMBLE */
};