]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - super-intel.c
Add one sanity check for missing device
[thirdparty/mdadm.git] / super-intel.c
index fbff215c0ec12521ea6fec407a4217a7c2eaf2df..a429940d48a51754a9fd672503a5a4b3590ed915 100644 (file)
@@ -92,6 +92,9 @@
 #define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056
 #define SECT_PER_MB_SHIFT 11
 #define MAX_SECTOR_SIZE 4096
+#define MULTIPLE_PPL_AREA_SIZE_IMSM (1024 * 1024) /* Size of the whole
+                                                  * mutliple PPL area
+                                                  */
 
 /* Disk configuration info. */
 #define IMSM_MAX_DEVICES 255
@@ -207,6 +210,9 @@ struct imsm_dev {
 #define RWH_OFF 0
 #define RWH_DISTRIBUTED 1
 #define RWH_JOURNALING_DRIVE 2
+#define RWH_MULTIPLE_DISTRIBUTED 3
+#define RWH_MULTIPLE_PPLS_JOURNALING_DRIVE 4
+#define RWH_MULTIPLE_OFF 5
        __u8  rwh_policy; /* Raid Write Hole Policy */
        __u8  jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
        __u8  filler1;
@@ -232,8 +238,13 @@ struct imsm_super {
        __u32 orig_family_num;          /* 0x40 - 0x43 original family num */
        __u32 pwr_cycle_count;          /* 0x44 - 0x47 simulated power cycle count for array */
        __u32 bbm_log_size;             /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
-#define IMSM_FILLERS 35
-       __u32 filler[IMSM_FILLERS];     /* 0x4C - 0xD7 RAID_MPB_FILLERS */
+       __u16 num_raid_devs_created;    /* 0x4C - 0x4D Used for generating unique
+                                        * volume IDs for raid_dev created in this array
+                                        * (starts at 1)
+                                        */
+       __u16 filler1;                  /* 0x4E - 0x4F */
+#define IMSM_FILLERS 34
+       __u32 filler[IMSM_FILLERS];     /* 0x50 - 0xD7 RAID_MPB_FILLERS */
        struct imsm_disk disk[1];       /* 0xD8 diskTbl[numDisks] */
        /* here comes imsm_dev[num_raid_devs] */
        /* here comes BBM logs */
@@ -279,7 +290,7 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed"
                                 *  already been migrated and must
                                 *  be recovered from checkpoint area */
 
-#define PPL_ENTRY_SPACE (128 * 1024) /* Size of the PPL, without the header */
+#define PPL_ENTRY_SPACE (128 * 1024) /* Size of single PPL, without the header */
 
 struct migr_record {
        __u32 rec_status;           /* Status used to determine how to restart
@@ -1331,6 +1342,20 @@ static unsigned long long round_size_to_mb(unsigned long long size, unsigned int
        return size;
 }
 
+static int able_to_resync(int raid_level, int missing_disks)
+{
+       int max_missing_disks = 0;
+
+       switch (raid_level) {
+       case 10:
+               max_missing_disks = 1;
+               break;
+       default:
+               max_missing_disks = 0;
+       }
+       return missing_disks <= max_missing_disks;
+}
+
 /* try to determine how much space is reserved for metadata from
  * the last get_extents() entry on the smallest active disk,
  * otherwise fallback to the default
@@ -1378,37 +1403,46 @@ static __u32 imsm_min_reserved_sectors(struct intel_super *super)
        return  (remainder < rv) ? remainder : rv;
 }
 
-/* Return minimum size of a spare that can be used in this array*/
-static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
+/*
+ * Return minimum size of a spare and sector size
+ * that can be used in this array
+ */
+int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c)
 {
        struct intel_super *super = st->sb;
        struct dl *dl;
        struct extent *e;
        int i;
-       unsigned long long rv = 0;
+       unsigned long long size = 0;
+
+       c->min_size = 0;
+       c->sector_size = 0;
 
        if (!super)
-               return rv;
+               return -EINVAL;
        /* find first active disk in array */
        dl = super->disks;
        while (dl && (is_failed(&dl->disk) || dl->index == -1))
                dl = dl->next;
        if (!dl)
-               return rv;
+               return -EINVAL;
        /* find last lba used by subarrays */
        e = get_extents(super, dl);
        if (!e)
-               return rv;
+               return -EINVAL;
        for (i = 0; e[i].size; i++)
                continue;
        if (i > 0)
-               rv = e[i-1].start + e[i-1].size;
+               size = e[i-1].start + e[i-1].size;
        free(e);
 
        /* add the amount of space needed for metadata */
-       rv = rv + imsm_min_reserved_sectors(super);
+       size += imsm_min_reserved_sectors(super);
+
+       c->min_size = size * 512;
+       c->sector_size = super->sector_size;
 
-       return rv * 512;
+       return 0;
 }
 
 static int is_gen_migration(struct imsm_dev *dev);
@@ -1468,13 +1502,16 @@ static void print_imsm_dev(struct intel_super *super,
                       ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
        } else
                printf("      This Slot : ?\n");
+       printf("    Sector Size : %u\n", super->sector_size);
        sz = __le32_to_cpu(dev->size_high);
        sz <<= 32;
        sz += __le32_to_cpu(dev->size_low);
-       printf("     Array Size : %llu%s\n", (unsigned long long)sz,
+       printf("     Array Size : %llu%s\n",
+                  (unsigned long long)sz * 512 / super->sector_size,
               human_size(sz * 512));
        sz = blocks_per_member(map);
-       printf("   Per Dev Size : %llu%s\n", (unsigned long long)sz,
+       printf("   Per Dev Size : %llu%s\n",
+                  (unsigned long long)sz * 512 / super->sector_size,
               human_size(sz * 512));
        printf("  Sector Offset : %llu\n",
                pba_of_lba0(map));
@@ -1522,12 +1559,16 @@ static void print_imsm_dev(struct intel_super *super,
        printf("    Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ?
                                         "dirty" : "clean");
        printf("     RWH Policy : ");
-       if (dev->rwh_policy == RWH_OFF)
+       if (dev->rwh_policy == RWH_OFF || dev->rwh_policy == RWH_MULTIPLE_OFF)
                printf("off\n");
        else if (dev->rwh_policy == RWH_DISTRIBUTED)
                printf("PPL distributed\n");
        else if (dev->rwh_policy == RWH_JOURNALING_DRIVE)
                printf("PPL journaling drive\n");
+       else if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
+               printf("Multiple distributed PPLs\n");
+       else if (dev->rwh_policy == RWH_MULTIPLE_PPLS_JOURNALING_DRIVE)
+               printf("Multiple PPLs on journaling drive\n");
        else
                printf("<unknown:%d>\n", dev->rwh_policy);
 }
@@ -2328,6 +2369,8 @@ static void print_imsm_capability(const struct imsm_orom *orom)
        printf("       Platform : Intel(R) ");
        if (orom->capabilities == 0 && orom->driver_features == 0)
                printf("Matrix Storage Manager\n");
+       else if (imsm_orom_is_enterprise(orom) && orom->major_ver >= 6)
+               printf("Virtual RAID on CPU\n");
        else
                printf("Rapid Storage Technology%s\n",
                        imsm_orom_is_enterprise(orom) ? " enterprise" : "");
@@ -3277,10 +3320,16 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
        memset(info->uuid, 0, sizeof(info->uuid));
        info->recovery_start = MaxSector;
 
-       if (info->array.level == 5 && dev->rwh_policy == RWH_DISTRIBUTED) {
+       if (info->array.level == 5 &&
+           (dev->rwh_policy == RWH_DISTRIBUTED ||
+            dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)) {
                info->consistency_policy = CONSISTENCY_POLICY_PPL;
                info->ppl_sector = get_ppl_sector(super, super->current_vol);
-               info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
+               if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
+                       info->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9;
+               else
+                       info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE)
+                                         >> 9;
        } else if (info->array.level <= 0) {
                info->consistency_policy = CONSISTENCY_POLICY_NONE;
        } else {
@@ -3469,6 +3518,9 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
                        __u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0);
                        __u32 idx = ord_to_idx(ord);
 
+                       if (super->disks && super->disks->index == (int)idx)
+                               info->disk.raid_disk = j;
+
                        if (!(ord & IMSM_ORD_REBUILD) &&
                            get_imsm_missing(super, idx)) {
                                missing = 1;
@@ -4006,7 +4058,7 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super,
 
        /* duplicate and then set the target end state in map[0] */
        memcpy(dest, src, sizeof_imsm_map(src));
-       if (migr_type == MIGR_REBUILD || migr_type ==  MIGR_GEN_MIGR) {
+       if (migr_type == MIGR_GEN_MIGR) {
                __u32 ord;
                int i;
 
@@ -4148,8 +4200,8 @@ int check_mpb_migr_compatibility(struct intel_super *super)
                        if (pba_of_lba0(map0) != pba_of_lba0(map1))
                                /* migration optimization area was used */
                                return -1;
-                       if (migr_rec->ascending_migr == 0
-                               && migr_rec->dest_depth_per_unit > 0)
+                       if (migr_rec->ascending_migr == 0 &&
+                           migr_rec->dest_depth_per_unit > 0)
                                /* descending reshape not supported yet */
                                return -1;
                }
@@ -4224,8 +4276,8 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
        sectors = mpb_sectors(anchor, sector_size) - 1;
        free(anchor);
 
-       if (posix_memalign(&super->migr_rec_buf, sector_size,
-           MIGR_REC_BUF_SECTORS*sector_size) != 0) {
+       if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE,
+           MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE) != 0) {
                pr_err("could not allocate migr_rec buffer\n");
                free(super->buf);
                return 2;
@@ -4464,8 +4516,7 @@ static int find_intel_hba_capability(int fd, struct intel_super *super, char *de
                                hba = hba->next;
                        }
                        fprintf(stderr, ").\n"
-                               "    Mixing devices attached to different %s is not allowed.\n",
-                               hba_name->type == SYS_DEV_VMD ? "VMD domains" : "controllers");
+                               "    Mixing devices attached to different controllers is not allowed.\n");
                }
                return 2;
        }
@@ -5253,8 +5304,9 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
                        pr_err("could not allocate new mpb\n");
                        return 0;
                }
-               if (posix_memalign(&super->migr_rec_buf, sector_size,
-                                  MIGR_REC_BUF_SECTORS*sector_size) != 0) {
+               if (posix_memalign(&super->migr_rec_buf, MAX_SECTOR_SIZE,
+                                  MIGR_REC_BUF_SECTORS*
+                                  MAX_SECTOR_SIZE) != 0) {
                        pr_err("could not allocate migr_rec buffer\n");
                        free(super->buf);
                        free(super);
@@ -5368,11 +5420,13 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
                set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
        }
        mpb->num_raid_devs++;
+       mpb->num_raid_devs_created++;
+       dev->my_vol_raid_dev_num = mpb->num_raid_devs_created;
 
        if (s->consistency_policy <= CONSISTENCY_POLICY_RESYNC) {
-               dev->rwh_policy = RWH_OFF;
+               dev->rwh_policy = RWH_MULTIPLE_OFF;
        } else if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
-               dev->rwh_policy = RWH_DISTRIBUTED;
+               dev->rwh_policy = RWH_MULTIPLE_DISTRIBUTED;
        } else {
                free(dev);
                free(dv);
@@ -5460,6 +5514,22 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
        return 1;
 }
 
+static int drive_validate_sector_size(struct intel_super *super, struct dl *dl)
+{
+       unsigned int member_sector_size;
+
+       if (dl->fd < 0) {
+               pr_err("Invalid file descriptor for %s\n", dl->devname);
+               return 0;
+       }
+
+       if (!get_dev_sector_size(dl->fd, dl->devname, &member_sector_size))
+               return 0;
+       if (member_sector_size != super->sector_size)
+               return 0;
+       return 1;
+}
+
 static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
                                     int fd, char *devname)
 {
@@ -5499,6 +5569,11 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
                return 1;
        }
 
+       if (!drive_validate_sector_size(super, dl)) {
+               pr_err("Combining drives of different sector size in one volume is not allowed\n");
+               return 1;
+       }
+
        /* add a pristine spare to the metadata */
        if (dl->index < 0) {
                dl->index = super->anchor->num_disks;
@@ -5696,7 +5771,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
                } else if (super->hba->type == SYS_DEV_VMD && super->orom &&
                    !imsm_orom_has_tpv_support(super->orom)) {
                        pr_err("\tPlatform configuration does not support non-Intel NVMe drives.\n"
-                              "\tPlease refer to Intel(R) RSTe user guide.\n");
+                              "\tPlease refer to Intel(R) RSTe/VROC user guide.\n");
                        free(dd->devname);
                        free(dd);
                        return 1;
@@ -5709,21 +5784,15 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
        if (super->sector_size == 0) {
                /* this a first device, so sector_size is not set yet */
                super->sector_size = member_sector_size;
-       } else if (member_sector_size != super->sector_size) {
-               pr_err("Mixing between different sector size is forbidden, aborting...\n");
-               if (dd->devname)
-                       free(dd->devname);
-               free(dd);
-               return 1;
        }
 
        /* clear migr_rec when adding disk to container */
-       memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*super->sector_size);
-       if (lseek64(fd, size - MIGR_REC_SECTOR_POSITION*super->sector_size,
+       memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
+       if (lseek64(fd, size - MIGR_REC_SECTOR_POSITION*member_sector_size,
            SEEK_SET) >= 0) {
                if ((unsigned int)write(fd, super->migr_rec_buf,
-                   MIGR_REC_BUF_SECTORS*super->sector_size) !=
-                   MIGR_REC_BUF_SECTORS*super->sector_size)
+                   MIGR_REC_BUF_SECTORS*member_sector_size) !=
+                   MIGR_REC_BUF_SECTORS*member_sector_size)
                        perror("Write migr_rec failed");
        }
 
@@ -5915,7 +5984,7 @@ static int write_super_imsm(struct supertype *st, int doclose)
        }
        if (clear_migration_record)
                memset(super->migr_rec_buf, 0,
-                   MIGR_REC_BUF_SECTORS*sector_size);
+                   MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
 
        if (sector_size == 4096)
                convert_to_4k(super);
@@ -6014,7 +6083,12 @@ static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd
        struct ppl_header *ppl_hdr;
        int ret;
 
-       ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE);
+       /* first clear entire ppl space */
+       ret = zero_disk_range(fd, info->ppl_sector, info->ppl_size);
+       if (ret)
+               return ret;
+
+       ret = posix_memalign(&buf, MAX_SECTOR_SIZE, PPL_HEADER_SIZE);
        if (ret) {
                pr_err("Failed to allocate PPL header buffer\n");
                return ret;
@@ -6024,6 +6098,16 @@ static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd
        ppl_hdr = buf;
        memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
        ppl_hdr->signature = __cpu_to_le32(super->anchor->orig_family_num);
+
+       if (info->mismatch_cnt) {
+               /*
+                * We are overwriting an invalid ppl. Make one entry with wrong
+                * checksum to prevent the kernel from skipping resync.
+                */
+               ppl_hdr->entries_count = __cpu_to_le32(1);
+               ppl_hdr->entries[0].checksum = ~0;
+       }
+
        ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
 
        if (lseek64(fd, info->ppl_sector * 512, SEEK_SET) < 0) {
@@ -6053,60 +6137,122 @@ static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
        struct ppl_header *ppl_hdr;
        __u32 crc;
        struct imsm_dev *dev;
-       struct imsm_map *map;
        __u32 idx;
+       unsigned int i;
+       unsigned long long ppl_offset = 0;
+       unsigned long long prev_gen_num = 0;
 
        if (disk->disk.raid_disk < 0)
                return 0;
 
-       if (posix_memalign(&buf, 4096, PPL_HEADER_SIZE)) {
+       if (posix_memalign(&buf, MAX_SECTOR_SIZE, PPL_HEADER_SIZE)) {
                pr_err("Failed to allocate PPL header buffer\n");
                return -1;
        }
 
        dev = get_imsm_dev(super, info->container_member);
-       map = get_imsm_map(dev, MAP_X);
-       idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_X);
+       idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_0);
        d = get_imsm_dl_disk(super, idx);
 
        if (!d || d->index < 0 || is_failed(&d->disk))
                goto out;
 
-       if (lseek64(d->fd, info->ppl_sector * 512, SEEK_SET) < 0) {
-               perror("Failed to seek to PPL header location");
-               ret = -1;
-               goto out;
-       }
+       ret = 1;
+       while (ppl_offset < MULTIPLE_PPL_AREA_SIZE_IMSM) {
+               dprintf("Checking potential PPL at offset: %llu\n", ppl_offset);
 
-       if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
-               perror("Read PPL header failed");
-               ret = -1;
-               goto out;
-       }
+               if (lseek64(d->fd, info->ppl_sector * 512 + ppl_offset,
+                           SEEK_SET) < 0) {
+                       perror("Failed to seek to PPL header location");
+                       ret = -1;
+                       goto out;
+               }
 
-       ppl_hdr = buf;
+               if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+                       perror("Read PPL header failed");
+                       ret = -1;
+                       goto out;
+               }
 
-       crc = __le32_to_cpu(ppl_hdr->checksum);
-       ppl_hdr->checksum = 0;
+               ppl_hdr = buf;
 
-       if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
-               dprintf("Wrong PPL header checksum on %s\n",
-                       d->devname);
-               ret = 1;
-       }
+               crc = __le32_to_cpu(ppl_hdr->checksum);
+               ppl_hdr->checksum = 0;
+
+               if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
+                       dprintf("Wrong PPL header checksum on %s\n",
+                               d->devname);
+                       goto out;
+               }
+
+               if (prev_gen_num > __le64_to_cpu(ppl_hdr->generation)) {
+                       /* previous was newest, it was already checked */
+                       goto out;
+               }
+
+               if ((__le32_to_cpu(ppl_hdr->signature) !=
+                             super->anchor->orig_family_num)) {
+                       dprintf("Wrong PPL header signature on %s\n",
+                               d->devname);
+                       ret = 1;
+                       goto out;
+               }
+
+               ret = 0;
+               prev_gen_num = __le64_to_cpu(ppl_hdr->generation);
 
-       if (!ret && (__le32_to_cpu(ppl_hdr->signature) !=
-                     super->anchor->orig_family_num)) {
-               dprintf("Wrong PPL header signature on %s\n",
-                       d->devname);
-               ret = 1;
+               ppl_offset += PPL_HEADER_SIZE;
+               for (i = 0; i < __le32_to_cpu(ppl_hdr->entries_count); i++)
+                       ppl_offset +=
+                                  __le32_to_cpu(ppl_hdr->entries[i].pp_size);
        }
 
 out:
        free(buf);
 
-       if (ret == 1 && map->map_state == IMSM_T_STATE_UNINITIALIZED)
-               return st->ss->write_init_ppl(st, info, d->fd);
+       /*
+        * Update metadata to use mutliple PPLs area (1MB).
+        * This is done once for all RAID members
+        */
+       if (info->consistency_policy == CONSISTENCY_POLICY_PPL &&
+           info->ppl_size != (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9)) {
+               char subarray[20];
+               struct mdinfo *member_dev;
+
+               sprintf(subarray, "%d", info->container_member);
+
+               if (mdmon_running(st->container_devnm))
+                       st->update_tail = &st->updates;
+
+               if (st->ss->update_subarray(st, subarray, "ppl", NULL)) {
+                       pr_err("Failed to update subarray %s\n",
+                             subarray);
+               } else {
+                       if (st->update_tail)
+                               flush_metadata_updates(st);
+                       else
+                               st->ss->sync_metadata(st);
+                       info->ppl_size = (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9);
+                       for (member_dev = info->devs; member_dev;
+                            member_dev = member_dev->next)
+                               member_dev->ppl_size =
+                                   (MULTIPLE_PPL_AREA_SIZE_IMSM >> 9);
+               }
+       }
+
+       if (ret == 1) {
+               struct imsm_map *map = get_imsm_map(dev, MAP_X);
+
+               if (map->map_state == IMSM_T_STATE_UNINITIALIZED ||
+                  (map->map_state == IMSM_T_STATE_NORMAL &&
+                  !(dev->vol.dirty & RAIDVOL_DIRTY)) ||
+                  (dev->vol.migr_state == MIGR_REBUILD &&
+                   dev->vol.curr_migr_unit == 0 &&
+                   get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_1) != idx))
+                       ret = st->ss->write_init_ppl(st, info, d->fd);
+               else
+                       info->mismatch_cnt++;
+       }
 
        return ret;
 }
@@ -6407,7 +6553,7 @@ active_arrays_by_format(char *name, char* hba, struct md_list **devlist,
 
        for (memb = mdstat ; memb ; memb = memb->next) {
                if (memb->metadata_version &&
-                   (strncmp(memb->metadata_version, "external:", 9) == 0)  &&
+                   (strncmp(memb->metadata_version, "external:", 9) == 0) &&
                    (strcmp(&memb->metadata_version[9], name) == 0) &&
                    !is_subarray(memb->metadata_version+9) &&
                    memb->members) {
@@ -6555,7 +6701,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
 
        for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
                char *devname = tmpdev->devname;
-               struct stat stb;
+               dev_t rdev;
                struct supertype *tst;
                int dfd;
                if (tmpdev->used > 1)
@@ -6571,14 +6717,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
                        dprintf("cannot open device %s: %s\n",
                                devname, strerror(errno));
                        tmpdev->used = 2;
-               } else if (fstat(dfd, &stb)< 0) {
-                       /* Impossible! */
-                       dprintf("fstat failed for %s: %s\n",
-                               devname, strerror(errno));
-                       tmpdev->used = 2;
-               } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
-                       dprintf("%s is not a block device.\n",
-                               devname);
+               } else if (!fstat_is_blkdev(dfd, devname, &rdev)) {
                        tmpdev->used = 2;
                } else if (must_be_container(dfd)) {
                        struct supertype *cst;
@@ -6600,7 +6739,7 @@ count_volumes_list(struct md_list *devlist, char *homehost,
                        if (cst)
                                cst->ss->free_super(cst);
                } else {
-                       tmpdev->st_rdev = stb.st_rdev;
+                       tmpdev->st_rdev = rdev;
                        if (tst->ss->load_super(tst,dfd, NULL)) {
                                dprintf("no RAID superblock on %s\n",
                                        devname);
@@ -6855,7 +6994,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
                                         unsigned long long *freesize,
                                         int verbose)
 {
-       struct stat stb;
+       dev_t rdev;
        struct intel_super *super = st->sb;
        struct imsm_super *mpb;
        struct dl *dl;
@@ -6920,13 +7059,11 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
        }
 
        /* This device must be a member of the set */
-       if (stat(dev, &stb) < 0)
-               return 0;
-       if ((S_IFMT & stb.st_mode) != S_IFBLK)
+       if (!stat_is_blkdev(dev, &rdev))
                return 0;
        for (dl = super->disks ; dl ; dl = dl->next) {
-               if (dl->major == (int)major(stb.st_rdev) &&
-                   dl->minor == (int)minor(stb.st_rdev))
+               if (dl->major == (int)major(rdev) &&
+                   dl->minor == (int)minor(rdev))
                        break;
        }
        if (!dl) {
@@ -7377,9 +7514,9 @@ static int update_subarray_imsm(struct supertype *st, char *subarray,
                        return 2;
 
                if (strcmp(update, "ppl") == 0)
-                       new_policy = RWH_DISTRIBUTED;
+                       new_policy = RWH_MULTIPLE_DISTRIBUTED;
                else
-                       new_policy = RWH_OFF;
+                       new_policy = RWH_MULTIPLE_OFF;
 
                if (st->update_tail) {
                        struct imsm_update_rwh_policy *u = xmalloc(sizeof(*u));
@@ -7524,6 +7661,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                int slot;
                int chunk;
                char *ep;
+               int level;
 
                if (subarray &&
                    (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
@@ -7532,6 +7670,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                dev = get_imsm_dev(super, i);
                map = get_imsm_map(dev, MAP_0);
                map2 = get_imsm_map(dev, MAP_1);
+               level = get_imsm_raid_level(map);
 
                /* do not publish arrays that are in the middle of an
                 * unsupported migration
@@ -7554,8 +7693,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
                /* mdadm does not support all metadata features- set the bit in all arrays state */
                if (!validate_geometry_imsm_orom(super,
-                                                get_imsm_raid_level(map), /* RAID level */
-                                                imsm_level_to_layout(get_imsm_raid_level(map)),
+                                                level, /* RAID level */
+                                                imsm_level_to_layout(level),
                                                 map->num_members, /* raid disks */
                                                 &chunk, join_u32(dev->size_low, dev->size_high),
                                                 1 /* verbose */)) {
@@ -7579,6 +7718,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                        int idx;
                        int skip;
                        __u32 ord;
+                       int missing = 0;
 
                        skip = 0;
                        idx = get_imsm_disk_idx(dev, slot, MAP_0);
@@ -7592,19 +7732,27 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                                skip = 1;
                        if (d && is_failed(&d->disk))
                                skip = 1;
-                       if (ord & IMSM_ORD_REBUILD)
+                       if (!skip && (ord & IMSM_ORD_REBUILD))
                                recovery_start = 0;
 
                        /*
                         * if we skip some disks the array will be assmebled degraded;
                         * reset resync start to avoid a dirty-degraded
                         * situation when performing the intial sync
-                        *
-                        * FIXME handle dirty degraded
                         */
-                       if ((skip || recovery_start == 0) &&
-                           !(dev->vol.dirty & RAIDVOL_DIRTY))
-                               this->resync_start = MaxSector;
+                       if (skip)
+                               missing++;
+
+                       if (!(dev->vol.dirty & RAIDVOL_DIRTY)) {
+                               if ((!able_to_resync(level, missing) ||
+                                    recovery_start == 0))
+                                       this->resync_start = MaxSector;
+                       } else {
+                               /*
+                                * FIXME handle dirty degraded
+                                */
+                       }
+
                        if (skip)
                                continue;
 
@@ -7640,10 +7788,12 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                                                map->blocks_per_strip;
                                info_d->ppl_sector = this->ppl_sector;
                                info_d->ppl_size = this->ppl_size;
+                               if (this->consistency_policy == CONSISTENCY_POLICY_PPL &&
+                                   recovery_start == 0)
+                                       this->resync_start = 0;
                        } else {
                                info_d->component_size = blocks_per_member(map);
                        }
-                       info_d->consistency_policy = this->consistency_policy;
 
                        info_d->bb.supported = 1;
                        get_volume_badblocks(super->bbm_log, ord_to_idx(ord),
@@ -7910,14 +8060,35 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
        /* end process for initialization and rebuild only
         */
        if (is_gen_migration(dev) == 0) {
-               __u8 map_state;
-               int failed;
+               int failed = imsm_count_failed(super, dev, MAP_0);
 
-               failed = imsm_count_failed(super, dev, MAP_0);
-               map_state = imsm_check_degraded(super, dev, failed, MAP_0);
+               if (failed) {
+                       __u8 map_state;
+                       struct imsm_map *map = get_imsm_map(dev, MAP_0);
+                       struct imsm_map *map1;
+                       int i, ord, ord_map1;
+                       int rebuilt = 1;
 
-               if (failed)
-                       end_migration(dev, super, map_state);
+                       for (i = 0; i < map->num_members; i++) {
+                               ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
+                               if (!(ord & IMSM_ORD_REBUILD))
+                                       continue;
+
+                               map1 = get_imsm_map(dev, MAP_1);
+                               if (!map1)
+                                       continue;
+
+                               ord_map1 = __le32_to_cpu(map1->disk_ord_tbl[i]);
+                               if (ord_map1 & IMSM_ORD_REBUILD)
+                                       rebuilt = 0;
+                       }
+
+                       if (rebuilt) {
+                               map_state = imsm_check_degraded(super, dev,
+                                                               failed, MAP_0);
+                               end_migration(dev, super, map_state);
+                       }
+               }
        }
        for (dl = super->missing; dl; dl = dl->next)
                mark_missing(super, dev, &dl->disk, dl->index);
@@ -8159,7 +8330,8 @@ skip_mark_checkpoint:
                        dev->vol.dirty = RAIDVOL_CLEAN;
                } else {
                        dev->vol.dirty = RAIDVOL_DIRTY;
-                       if (dev->rwh_policy == RWH_DISTRIBUTED)
+                       if (dev->rwh_policy == RWH_DISTRIBUTED ||
+                           dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
                                dev->vol.dirty |= RAIDVOL_DSRECORD_VALID;
                }
                super->updates_pending++;
@@ -8199,8 +8371,10 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
        int failed;
        int ord;
        __u8 map_state;
+       int rebuild_done = 0;
+       int i;
 
-       ord = imsm_disk_slot_to_ord(a, n);
+       ord = get_imsm_ord_tbl_ent(dev, n, MAP_X);
        if (ord < 0)
                return;
 
@@ -8218,6 +8392,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
                struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
 
                set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
+               rebuild_done = 1;
                super->updates_pending++;
        }
 
@@ -8280,7 +8455,39 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
                                dprintf_cont(" Map state change");
                                end_migration(dev, super, map_state);
                                super->updates_pending++;
+                       } else if (!rebuild_done) {
+                               break;
+                       }
+
+                       /* check if recovery is really finished */
+                       for (mdi = a->info.devs; mdi ; mdi = mdi->next)
+                               if (mdi->recovery_start != MaxSector) {
+                                       recovery_not_finished = 1;
+                                       break;
+                               }
+                       if (recovery_not_finished) {
+                               dprintf_cont("\n");
+                               dprintf("Rebuild has not finished yet, state not changed");
+                               if (a->last_checkpoint < mdi->recovery_start) {
+                                       a->last_checkpoint =
+                                               mdi->recovery_start;
+                                       super->updates_pending++;
+                               }
+                               break;
                        }
+
+                       dprintf_cont(" Rebuild done, still degraded");
+                       dev->vol.migr_state = 0;
+                       set_migr_type(dev, 0);
+                       dev->vol.curr_migr_unit = 0;
+
+                       for (i = 0; i < map->num_members; i++) {
+                               int idx = get_imsm_ord_tbl_ent(dev, i, MAP_0);
+
+                               if (idx & IMSM_ORD_REBUILD)
+                                       map->failed_disk_num = i;
+                       }
+                       super->updates_pending++;
                        break;
                }
                if (is_gen_migration(dev)) {
@@ -8447,6 +8654,9 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot,
                if (dl->index == -1 && !activate_new)
                        continue;
 
+               if (!drive_validate_sector_size(super, dl))
+                       continue;
+
                /* Does this unused device have the requisite free space?
                 * It needs to be able to cover all member volumes
                 */
@@ -8673,10 +8883,9 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
                di->component_size = a->info.component_size;
                di->container_member = inst;
                di->bb.supported = 1;
-               if (dev->rwh_policy == RWH_DISTRIBUTED) {
-                       di->consistency_policy = CONSISTENCY_POLICY_PPL;
+               if (a->info.consistency_policy == CONSISTENCY_POLICY_PPL) {
                        di->ppl_sector = get_ppl_sector(super, inst);
-                       di->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
+                       di->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9;
                }
                super->random = random32();
                di->next = rv;
@@ -9907,7 +10116,7 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
        struct imsm_dev *dev;
        struct imsm_map *map;
        unsigned int i, j, num_members;
-       __u32 ord;
+       __u32 ord, ord_map0;
        struct bbm_log *log = super->bbm_log;
 
        dprintf("deleting device[%d] from imsm_super\n", index);
@@ -9929,12 +10138,13 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
                         * ord-flags to the first map
                         */
                        ord = get_imsm_ord_tbl_ent(dev, j, MAP_X);
+                       ord_map0 = get_imsm_ord_tbl_ent(dev, j, MAP_0);
 
                        if (ord_to_idx(ord) <= index)
                                continue;
 
                        map = get_imsm_map(dev, MAP_0);
-                       set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
+                       set_imsm_ord_tbl_ent(map, j, ord_map0 - 1);
                        map = get_imsm_map(dev, MAP_1);
                        if (map)
                                set_imsm_ord_tbl_ent(map, j, ord - 1);
@@ -10663,6 +10873,10 @@ static const char *imsm_get_disk_controller_domain(const char *path)
                        drv = "isci";
                else if (hba && hba->type == SYS_DEV_SATA)
                        drv = "ahci";
+               else if (hba && hba->type == SYS_DEV_VMD)
+                       drv = "vmd";
+               else if (hba && hba->type == SYS_DEV_NVME)
+                       drv = "nvme";
                else
                        drv = "unknown";
                dprintf("path: %s hba: %s attached: %s\n",
@@ -10800,8 +11014,10 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st,
  */
 static struct mdinfo *get_spares_for_grow(struct supertype *st)
 {
-       unsigned long long min_size = min_acceptable_spare_size_imsm(st);
-       return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
+       struct spare_criteria sc;
+
+       get_spare_criteria_imsm(st, &sc);
+       return container_choose_spares(st, &sc, NULL, NULL, NULL, 0);
 }
 
 /******************************************************************************
@@ -10843,8 +11059,7 @@ static int imsm_create_metadata_update_for_reshape(
         */
        spares = get_spares_for_grow(st);
 
-       if (spares == NULL
-           || delta_disks > spares->array.spare_disks) {
+       if (spares == NULL || delta_disks > spares->array.spare_disks) {
                pr_err("imsm: ERROR: Cannot get spare devices for %s.\n", geo->dev_name);
                i = -1;
                goto abort;
@@ -11609,8 +11824,8 @@ static int imsm_manage_reshape(
 
        /* Find volume during the reshape */
        for (dv = super->devlist; dv; dv = dv->next) {
-               if (dv->dev->vol.migr_type == MIGR_GEN_MIGR
-                   && dv->dev->vol.migr_state == 1) {
+               if (dv->dev->vol.migr_type == MIGR_GEN_MIGR &&
+                   dv->dev->vol.migr_state == 1) {
                        dev = dv->dev;
                        migr_vol_qan++;
                }
@@ -11778,7 +11993,7 @@ static int imsm_manage_reshape(
        /* clear migr_rec on disks after successful migration */
        struct dl *d;
 
-       memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*sector_size);
+       memset(super->migr_rec_buf, 0, MIGR_REC_BUF_SECTORS*MAX_SECTOR_SIZE);
        for (d = super->disks; d; d = d->next) {
                if (d->index < 0 || is_failed(&d->disk))
                        continue;
@@ -11836,7 +12051,7 @@ struct superswitch super_imsm = {
        .update_super   = update_super_imsm,
 
        .avail_size     = avail_size_imsm,
-       .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
+       .get_spare_criteria = get_spare_criteria_imsm,
 
        .compare_super  = compare_super_imsm,