]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - super-intel.c
Retire mdassemble
[thirdparty/mdadm.git] / super-intel.c
index 433bb6d3df2c59be3c5f8c5c94ecba294e030356..0aed57c8e2c5b431d5b7fb873e2da4ecb6f6bc97 100644 (file)
@@ -102,6 +102,7 @@ struct imsm_disk {
 #define SPARE_DISK      __cpu_to_le32(0x01)  /* Spare */
 #define CONFIGURED_DISK __cpu_to_le32(0x02)  /* Member of some RaidDev */
 #define FAILED_DISK     __cpu_to_le32(0x04)  /* Permanent failure */
+#define JOURNAL_DISK    __cpu_to_le32(0x2000000) /* Device marked as Journaling Drive */
        __u32 status;                    /* 0xF0 - 0xF3 */
        __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
        __u32 total_blocks_hi;           /* 0xF4 - 0xF5 total blocks hi */
@@ -155,6 +156,9 @@ struct imsm_vol {
 #define MIGR_STATE_CHANGE 4
 #define MIGR_REPAIR 5
        __u8  migr_type;        /* Initializing, Rebuilding, ... */
+#define RAIDVOL_CLEAN          0
+#define RAIDVOL_DIRTY          1
+#define RAIDVOL_DSRECORD_VALID 2
        __u8  dirty;
        __u8  fs_state;         /* fast-sync state for CnG (0xff == disabled) */
        __u16 verify_errors;    /* number of mismatches */
@@ -190,7 +194,24 @@ struct imsm_dev {
        __u16 cache_policy;
        __u8  cng_state;
        __u8  cng_sub_state;
-#define IMSM_DEV_FILLERS 10
+       __u16 my_vol_raid_dev_num; /* Used in Unique volume Id for this RaidDev */
+
+       /* NVM_EN */
+       __u8 nv_cache_mode;
+       __u8 nv_cache_flags;
+
+       /* Unique Volume Id of the NvCache Volume associated with this volume */
+       __u32 nvc_vol_orig_family_num;
+       __u16 nvc_vol_raid_dev_num;
+
+#define RWH_OFF 0
+#define RWH_DISTRIBUTED 1
+#define RWH_JOURNALING_DRIVE 2
+       __u8  rwh_policy; /* Raid Write Hole Policy */
+       __u8  jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
+       __u8  filler1;
+
+#define IMSM_DEV_FILLERS 3
        __u32 filler[IMSM_DEV_FILLERS];
        struct imsm_vol vol;
 } __attribute__ ((packed));
@@ -239,9 +260,9 @@ struct bbm_log {
        struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES];
 } __attribute__ ((__packed__));
 
-#ifndef MDASSEMBLE
 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
-#endif
+
+#define BLOCKS_PER_KB  (1024/512)
 
 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
 
@@ -257,6 +278,9 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed"
 #define UNIT_SRC_IN_CP_AREA 1   /* Source data for curr_migr_unit has
                                 *  already been migrated and must
                                 *  be recovered from checkpoint area */
+
+#define PPL_ENTRY_SPACE (128 * 1024) /* Size of the PPL, without the header */
+
 struct migr_record {
        __u32 rec_status;           /* Status used to determine how to restart
                                     * migration in case it aborts
@@ -427,6 +451,7 @@ enum imsm_update_type {
        update_general_migration_checkpoint,
        update_size_change,
        update_prealloc_badblocks_mem,
+       update_rwh_policy,
 };
 
 struct imsm_update_activate_spare {
@@ -519,6 +544,12 @@ struct imsm_update_prealloc_bb_mem {
        enum imsm_update_type type;
 };
 
+struct imsm_update_rwh_policy {
+       enum imsm_update_type type;
+       int new_policy;
+       int dev_idx;
+};
+
 static const char *_sys_dev_type[] = {
        [SYS_DEV_UNKNOWN] = "Unknown",
        [SYS_DEV_SAS] = "SAS",
@@ -639,12 +670,10 @@ static struct supertype *match_metadata_desc_imsm(char *arg)
        return st;
 }
 
-#ifndef MDASSEMBLE
 static __u8 *get_imsm_version(struct imsm_super *mpb)
 {
        return &mpb->sig[MPB_SIG_LEN];
 }
-#endif
 
 /* retrieve a disk directly from the anchor when the anchor is known to be
  * up-to-date, currently only at load time
@@ -751,7 +780,6 @@ static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
        return size;
 }
 
-#ifndef MDASSEMBLE
 /* retrieve disk serial number list from a metadata update */
 static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
 {
@@ -763,7 +791,6 @@ static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
 
        return inf;
 }
-#endif
 
 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
 {
@@ -814,7 +841,6 @@ static inline struct bbm_log_block_addr __cpu_to_le48(unsigned long long sec)
        return addr;
 }
 
-#ifndef MDASSEMBLE
 /* get size of the bbm log */
 static __u32 get_imsm_bbm_log_size(struct bbm_log *log)
 {
@@ -945,7 +971,6 @@ static int clear_badblock(struct bbm_log *log, const __u8 idx, const unsigned
 
        return 1;
 }
-#endif /* MDASSEMBLE */
 
 /* allocate and load BBM log from metadata */
 static int load_bbm_log(struct intel_super *super)
@@ -1288,6 +1313,24 @@ static int is_failed(struct imsm_disk *disk)
        return (disk->status & FAILED_DISK) == FAILED_DISK;
 }
 
+static int is_journal(struct imsm_disk *disk)
+{
+       return (disk->status & JOURNAL_DISK) == JOURNAL_DISK;
+}
+
+/* round array size down to closest MB and ensure it splits evenly
+ * between members
+ */
+static unsigned long long round_size_to_mb(unsigned long long size, unsigned int
+                                          disk_count)
+{
+       size /= disk_count;
+       size = (size >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+       size *= disk_count;
+
+       return size;
+}
+
 /* try to determine how much space is reserved for metadata from
  * the last get_extents() entry on the smallest active disk,
  * otherwise fallback to the default
@@ -1372,7 +1415,6 @@ static int is_gen_migration(struct imsm_dev *dev);
 
 #define IMSM_4K_DIV 8
 
-#ifndef MDASSEMBLE
 static __u64 blocks_per_migr_unit(struct intel_super *super,
                                  struct imsm_dev *dev);
 
@@ -1477,7 +1519,17 @@ static void print_imsm_dev(struct intel_super *super,
                                   blocks_per_migr_unit(super, dev));
        }
        printf("\n");
-       printf("    Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
+       printf("    Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ?
+                                        "dirty" : "clean");
+       printf("     RWH Policy : ");
+       if (dev->rwh_policy == RWH_OFF)
+               printf("off\n");
+       else if (dev->rwh_policy == RWH_DISTRIBUTED)
+               printf("PPL distributed\n");
+       else if (dev->rwh_policy == RWH_JOURNALING_DRIVE)
+               printf("PPL journaling drive\n");
+       else
+               printf("<unknown:%d>\n", dev->rwh_policy);
 }
 
 static void print_imsm_disk(struct imsm_disk *disk,
@@ -1496,9 +1548,10 @@ static void print_imsm_disk(struct imsm_disk *disk,
                printf("  Disk%02d Serial : %s\n", index, str);
        else
                printf("    Disk Serial : %s\n", str);
-       printf("          State :%s%s%s\n", is_spare(disk) ? " spare" : "",
-                                           is_configured(disk) ? " active" : "",
-                                           is_failed(disk) ? " failed" : "");
+       printf("          State :%s%s%s%s\n", is_spare(disk) ? " spare" : "",
+                                             is_configured(disk) ? " active" : "",
+                                             is_failed(disk) ? " failed" : "",
+                                             is_journal(disk) ? " journal" : "");
        printf("             Id : %08x\n", __le32_to_cpu(disk->scsi_id));
        sz = total_blocks(disk) - reserved;
        printf("    Usable Size : %llu%s\n",
@@ -1636,7 +1689,6 @@ void examine_migr_rec_imsm(struct intel_super *super)
                break;
        }
 }
-#endif /* MDASSEMBLE */
 
 void convert_from_4k_imsm_migr_rec(struct intel_super *super)
 {
@@ -1796,7 +1848,6 @@ static int imsm_check_attributes(__u32 attributes)
        return ret_val;
 }
 
-#ifndef MDASSEMBLE
 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
 
 static void examine_super_imsm(struct supertype *st, char *homehost)
@@ -1811,7 +1862,8 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
        __u32 reserved = imsm_reserved_sectors(super, super->disks);
        struct dl *dl;
 
-       snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
+       strncpy(str, (char *)mpb->sig, MPB_SIG_LEN);
+       str[MPB_SIG_LEN-1] = '\0';
        printf("          Magic : %s\n", str);
        snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
        printf("        Version : %s\n", get_imsm_version(mpb));
@@ -1986,7 +2038,7 @@ static void detail_super_imsm(struct supertype *st, char *homehost)
 
        getinfo_super_imsm(st, &info, NULL);
        fname_from_uuid(st, &info, nbuf, ':');
-       printf("\n           UUID : %s\n", nbuf + 5);
+       printf("\n              UUID : %s\n", nbuf + 5);
 }
 
 static void brief_detail_super_imsm(struct supertype *st)
@@ -2509,8 +2561,6 @@ static int export_detail_platform_imsm(int verbose, char *controller_path)
        return result;
 }
 
-#endif
-
 static int match_home_imsm(struct supertype *st, char *homehost)
 {
        /* the imsm metadata format does not specify any host
@@ -2921,7 +2971,6 @@ out:
        return retval;
 }
 
-#ifndef MDASSEMBLE
 /*******************************************************************************
  * function: imsm_create_metadata_checkpoint_update
  * Description: It creates update for checkpoint change.
@@ -3062,7 +3111,6 @@ static int write_imsm_migr_rec(struct supertype *st)
                close(fd);
        return retval;
 }
-#endif /* MDASSEMBLE */
 
 /* spare/missing disks activations are not allowe when
  * array/container performs reshape operation, because
@@ -3113,6 +3161,15 @@ static unsigned long long imsm_component_size_aligment_check(int level,
        return component_size;
 }
 
+static unsigned long long get_ppl_sector(struct intel_super *super, int dev_idx)
+{
+       struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
+       struct imsm_map *map = get_imsm_map(dev, MAP_0);
+
+       return pba_of_lba0(map) +
+              (num_data_stripes(map) * map->blocks_per_strip);
+}
+
 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
 {
        struct intel_super *super = st->sb;
@@ -3139,7 +3196,7 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
        info->array.utime         = 0;
        info->array.chunk_size    =
                __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
-       info->array.state         = !dev->vol.dirty;
+       info->array.state         = !(dev->vol.dirty & RAIDVOL_DIRTY);
        info->custom_array_size   = __le32_to_cpu(dev->size_high);
        info->custom_array_size   <<= 32;
        info->custom_array_size   |= __le32_to_cpu(dev->size_low);
@@ -3220,10 +3277,20 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
        memset(info->uuid, 0, sizeof(info->uuid));
        info->recovery_start = MaxSector;
 
+       if (info->array.level == 5 && dev->rwh_policy == RWH_DISTRIBUTED) {
+               info->consistency_policy = CONSISTENCY_POLICY_PPL;
+               info->ppl_sector = get_ppl_sector(super, super->current_vol);
+               info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
+       } else if (info->array.level <= 0) {
+               info->consistency_policy = CONSISTENCY_POLICY_NONE;
+       } else {
+               info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
+       }
+
        info->reshape_progress = 0;
        info->resync_start = MaxSector;
        if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
-           dev->vol.dirty) &&
+           !(info->array.state & 1)) &&
            imsm_reshape_blocks_arrays_changes(super) == 0) {
                info->resync_start = 0;
        }
@@ -3263,11 +3330,10 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
                        if (used_disks > 0) {
                                array_blocks = blocks_per_member(map) *
                                        used_disks;
-                               /* round array size down to closest MB
-                                */
-                               info->custom_array_size = (array_blocks
-                                               >> SECT_PER_MB_SHIFT)
-                                               << SECT_PER_MB_SHIFT;
+                               info->custom_array_size =
+                                       round_size_to_mb(array_blocks,
+                                                        used_disks);
+
                        }
                }
                case MIGR_VERIFY:
@@ -3315,7 +3381,6 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
                             int look_in_map);
 
-#ifndef MDASSEMBLE
 static void manage_second_map(struct intel_super *super, struct imsm_dev *dev)
 {
        if (is_gen_migration(dev)) {
@@ -3331,7 +3396,6 @@ static void manage_second_map(struct intel_super *super, struct imsm_dev *dev)
                }
        }
 }
-#endif
 
 static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
 {
@@ -3450,7 +3514,8 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
                 * found the 'most fresh' version of the metadata
                 */
                info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
-               info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
+               info->disk.state |= (is_spare(disk) || is_journal(disk)) ?
+                                   0 : (1 << MD_DISK_SYNC);
        }
 
        /* only call uuid_from_super_imsm when this disk is part of a populated container,
@@ -3905,14 +3970,13 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
                 */
                if (is_failed(&dl->disk))
                        dl->index = -2;
-               else if (is_spare(&dl->disk))
+               else if (is_spare(&dl->disk) || is_journal(&dl->disk))
                        dl->index = -1;
        }
 
        return 0;
 }
 
-#ifndef MDASSEMBLE
 /* When migrating map0 contains the 'destination' state while map1
  * contains the current state.  When not migrating map0 contains the
  * current state.  This routine assumes that map[0].map_state is set to
@@ -4000,7 +4064,6 @@ static void end_migration(struct imsm_dev *dev, struct intel_super *super,
        dev->vol.curr_migr_unit = 0;
        map->map_state = map_state;
 }
-#endif
 
 static int parse_raid_devices(struct intel_super *super)
 {
@@ -4446,7 +4509,6 @@ static int find_missing(struct intel_super *super)
        return 0;
 }
 
-#ifndef MDASSEMBLE
 static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
 {
        struct intel_disk *idisk = disk_list;
@@ -4984,7 +5046,6 @@ static int load_container_imsm(struct supertype *st, int fd, char *devname)
 {
        return load_super_imsm_all(st, fd, &st->sb, devname, NULL, 1);
 }
-#endif
 
 static int load_super_imsm(struct supertype *st, int fd, char *devname)
 {
@@ -5154,7 +5215,7 @@ static int check_name(struct intel_super *super, char *name, int quiet)
 }
 
 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
-                                 unsigned long long size, char *name,
+                                 struct shape *s, char *name,
                                  char *homehost, int *uuid,
                                  long long data_offset)
 {
@@ -5173,6 +5234,8 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
        unsigned long long array_blocks;
        size_t size_old, size_new;
        unsigned long long num_data_stripes;
+       unsigned int data_disks;
+       unsigned long long size_per_member;
 
        if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
                pr_err("This imsm-container already has the maximum of %d volumes\n", super->orom->vpa);
@@ -5227,7 +5290,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
                        disk->status = CONFIGURED_DISK | FAILED_DISK;
                        disk->scsi_id = __cpu_to_le32(~(__u32)0);
                        snprintf((char *) disk->serial, MAX_RAID_SERIAL_LEN,
-                                "missing:%d", i);
+                                "missing:%d", (__u8)i);
                }
                find_missing(super);
        } else {
@@ -5249,9 +5312,11 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
        strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
        array_blocks = calc_array_size(info->level, info->raid_disks,
                                               info->layout, info->chunk_size,
-                                              size * 2);
-       /* round array size down to closest MB */
-       array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+                                              s->size * BLOCKS_PER_KB);
+       data_disks = get_data_disks(info->level, info->layout,
+                                   info->raid_disks);
+       array_blocks = round_size_to_mb(array_blocks, data_disks);
+       size_per_member = array_blocks / data_disks;
 
        dev->size_low = __cpu_to_le32((__u32) array_blocks);
        dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
@@ -5263,7 +5328,9 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
        vol->curr_migr_unit = 0;
        map = get_imsm_map(dev, MAP_0);
        set_pba_of_lba0(map, super->create_offset);
-       set_blocks_per_member(map, info_to_blocks_per_member(info, size));
+       set_blocks_per_member(map, info_to_blocks_per_member(info,
+                                                            size_per_member /
+                                                            BLOCKS_PER_KB));
        map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
        map->failed_disk_num = ~0;
        if (info->level > 0)
@@ -5291,7 +5358,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
                map->num_domains = 1;
 
        /* info->size is only int so use the 'size' parameter instead */
-       num_data_stripes = (size * 2) / info_to_blocks_per_strip(info);
+       num_data_stripes = size_per_member / info_to_blocks_per_strip(info);
        num_data_stripes /= map->num_domains;
        set_num_data_stripes(map, num_data_stripes);
 
@@ -5302,6 +5369,20 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
        }
        mpb->num_raid_devs++;
 
+       if (s->consistency_policy == UnSet ||
+           s->consistency_policy == CONSISTENCY_POLICY_RESYNC ||
+           s->consistency_policy == CONSISTENCY_POLICY_NONE) {
+               dev->rwh_policy = RWH_OFF;
+       } else if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
+               dev->rwh_policy = RWH_DISTRIBUTED;
+       } else {
+               free(dev);
+               free(dv);
+               pr_err("imsm does not support consistency policy %s\n",
+                      map_num(consistency_policies, s->consistency_policy));
+               return 0;
+       }
+
        dv->dev = dev;
        dv->index = super->current_vol;
        dv->next = super->devlist;
@@ -5313,7 +5394,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
 }
 
 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
-                          unsigned long long size, char *name,
+                          struct shape *s, char *name,
                           char *homehost, int *uuid,
                           unsigned long long data_offset)
 {
@@ -5336,7 +5417,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
        }
 
        if (st->sb)
-               return init_super_imsm_volume(st, info, size, name, homehost, uuid,
+               return init_super_imsm_volume(st, info, s, name, homehost, uuid,
                                              data_offset);
 
        if (info)
@@ -5381,7 +5462,6 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
        return 1;
 }
 
-#ifndef MDASSEMBLE
 static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
                                     int fd, char *devname)
 {
@@ -5927,10 +6007,142 @@ static int mgmt_disk(struct supertype *st)
        return 0;
 }
 
+__u32 crc32c_le(__u32 crc, unsigned char const *p, size_t len);
+
+static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd)
+{
+       struct intel_super *super = st->sb;
+       void *buf;
+       struct ppl_header *ppl_hdr;
+       int ret;
+
+       ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE);
+       if (ret) {
+               pr_err("Failed to allocate PPL header buffer\n");
+               return ret;
+       }
+
+       memset(buf, 0, PPL_HEADER_SIZE);
+       ppl_hdr = buf;
+       memset(ppl_hdr->reserved, 0xff, PPL_HDR_RESERVED);
+       ppl_hdr->signature = __cpu_to_le32(super->anchor->orig_family_num);
+       ppl_hdr->checksum = __cpu_to_le32(~crc32c_le(~0, buf, PPL_HEADER_SIZE));
+
+       if (lseek64(fd, info->ppl_sector * 512, SEEK_SET) < 0) {
+               ret = errno;
+               perror("Failed to seek to PPL header location");
+       }
+
+       if (!ret && write(fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+               ret = errno;
+               perror("Write PPL header failed");
+       }
+
+       if (!ret)
+               fsync(fd);
+
+       free(buf);
+       return ret;
+}
+
+static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
+                            struct mdinfo *disk)
+{
+       struct intel_super *super = st->sb;
+       struct dl *d;
+       void *buf;
+       int ret = 0;
+       struct ppl_header *ppl_hdr;
+       __u32 crc;
+       struct imsm_dev *dev;
+       struct imsm_map *map;
+       __u32 idx;
+
+       if (disk->disk.raid_disk < 0)
+               return 0;
+
+       if (posix_memalign(&buf, 4096, PPL_HEADER_SIZE)) {
+               pr_err("Failed to allocate PPL header buffer\n");
+               return -1;
+       }
+
+       dev = get_imsm_dev(super, info->container_member);
+       map = get_imsm_map(dev, MAP_X);
+       idx = get_imsm_disk_idx(dev, disk->disk.raid_disk, MAP_X);
+       d = get_imsm_dl_disk(super, idx);
+
+       if (!d || d->index < 0 || is_failed(&d->disk))
+               goto out;
+
+       if (lseek64(d->fd, info->ppl_sector * 512, SEEK_SET) < 0) {
+               perror("Failed to seek to PPL header location");
+               ret = -1;
+               goto out;
+       }
+
+       if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+               perror("Read PPL header failed");
+               ret = -1;
+               goto out;
+       }
+
+       ppl_hdr = buf;
+
+       crc = __le32_to_cpu(ppl_hdr->checksum);
+       ppl_hdr->checksum = 0;
+
+       if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
+               dprintf("Wrong PPL header checksum on %s\n",
+                       d->devname);
+               ret = 1;
+       }
+
+       if (!ret && (__le32_to_cpu(ppl_hdr->signature) !=
+                     super->anchor->orig_family_num)) {
+               dprintf("Wrong PPL header signature on %s\n",
+                       d->devname);
+               ret = 1;
+       }
+
+out:
+       free(buf);
+
+       if (ret == 1 && map->map_state == IMSM_T_STATE_UNINITIALIZED)
+               return st->ss->write_init_ppl(st, info, d->fd);
+
+       return ret;
+}
+
+static int write_init_ppl_imsm_all(struct supertype *st, struct mdinfo *info)
+{
+       struct intel_super *super = st->sb;
+       struct dl *d;
+       int ret = 0;
+
+       if (info->consistency_policy != CONSISTENCY_POLICY_PPL ||
+           info->array.level != 5)
+               return 0;
+
+       for (d = super->disks; d ; d = d->next) {
+               if (d->index < 0 || is_failed(&d->disk))
+                       continue;
+
+               ret = st->ss->write_init_ppl(st, info, d->fd);
+               if (ret)
+                       break;
+       }
+
+       return ret;
+}
+
 static int write_init_super_imsm(struct supertype *st)
 {
        struct intel_super *super = st->sb;
        int current_vol = super->current_vol;
+       int rv = 0;
+       struct mdinfo info;
+
+       getinfo_super_imsm(st, &info, NULL);
 
        /* we are done with current_vol reset it to point st at the container */
        super->current_vol = -1;
@@ -5938,26 +6150,30 @@ static int write_init_super_imsm(struct supertype *st)
        if (st->update_tail) {
                /* queue the recently created array / added disk
                 * as a metadata update */
-               int rv;
 
                /* determine if we are creating a volume or adding a disk */
                if (current_vol < 0) {
                        /* in the mgmt (add/remove) disk case we are running
                         * in mdmon context, so don't close fd's
                         */
-                       return mgmt_disk(st);
-               } else
-                       rv = create_array(st, current_vol);
-
-               return rv;
+                       rv = mgmt_disk(st);
+               } else {
+                       rv = write_init_ppl_imsm_all(st, &info);
+                       if (!rv)
+                               rv = create_array(st, current_vol);
+               }
        } else {
                struct dl *d;
                for (d = super->disks; d; d = d->next)
                        Kill(d->devname, NULL, 0, -1, 1);
-               return write_super_imsm(st, 1);
+               if (current_vol >= 0)
+                       rv = write_init_ppl_imsm_all(st, &info);
+               if (!rv)
+                       rv = write_super_imsm(st, 1);
        }
+
+       return rv;
 }
-#endif
 
 static int store_super_imsm(struct supertype *st, int fd)
 {
@@ -5967,16 +6183,11 @@ static int store_super_imsm(struct supertype *st, int fd)
        if (!mpb)
                return 1;
 
-#ifndef MDASSEMBLE
        if (super->sector_size == 4096)
                convert_to_4k(super);
        return store_imsm_mpb(fd, mpb);
-#else
-       return 1;
-#endif
 }
 
-#ifndef MDASSEMBLE
 static int validate_geometry_imsm_container(struct supertype *st, int level,
                                            int layout, int raiddisks, int chunk,
                                            unsigned long long size,
@@ -6913,7 +7124,7 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
                                  int raiddisks, int *chunk, unsigned long long size,
                                  unsigned long long data_offset,
                                  char *dev, unsigned long long *freesize,
-                                 int verbose)
+                                 int consistency_policy, int verbose)
 {
        int fd, cfd;
        struct mdinfo *sra;
@@ -7142,26 +7353,55 @@ static int update_subarray_imsm(struct supertype *st, char *subarray,
 
                        u->type = update_rename_array;
                        u->dev_idx = vol;
-                       snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
+                       strncpy((char *) u->name, name, MAX_RAID_SERIAL_LEN);
+                       u->name[MAX_RAID_SERIAL_LEN-1] = '\0';
                        append_metadata_update(st, u, sizeof(*u));
                } else {
                        struct imsm_dev *dev;
                        int i;
 
                        dev = get_imsm_dev(super, vol);
-                       snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
+                       strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
+                       dev->volume[MAX_RAID_SERIAL_LEN-1] = '\0';
                        for (i = 0; i < mpb->num_raid_devs; i++) {
                                dev = get_imsm_dev(super, i);
                                handle_missing(super, dev);
                        }
                        super->updates_pending++;
                }
+       } else if (strcmp(update, "ppl") == 0 ||
+                  strcmp(update, "no-ppl") == 0) {
+               int new_policy;
+               char *ep;
+               int vol = strtoul(subarray, &ep, 10);
+
+               if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
+                       return 2;
+
+               if (strcmp(update, "ppl") == 0)
+                       new_policy = RWH_DISTRIBUTED;
+               else
+                       new_policy = RWH_OFF;
+
+               if (st->update_tail) {
+                       struct imsm_update_rwh_policy *u = xmalloc(sizeof(*u));
+
+                       u->type = update_rwh_policy;
+                       u->dev_idx = vol;
+                       u->new_policy = new_policy;
+                       append_metadata_update(st, u, sizeof(*u));
+               } else {
+                       struct imsm_dev *dev;
+
+                       dev = get_imsm_dev(super, vol);
+                       dev->rwh_policy = new_policy;
+                       super->updates_pending++;
+               }
        } else
                return 2;
 
        return 0;
 }
-#endif /* MDASSEMBLE */
 
 static int is_gen_migration(struct imsm_dev *dev)
 {
@@ -7195,7 +7435,6 @@ static int is_rebuilding(struct imsm_dev *dev)
                return 0;
 }
 
-#ifndef MDASSEMBLE
 static int is_initializing(struct imsm_dev *dev)
 {
        struct imsm_map *migr_map;
@@ -7213,7 +7452,6 @@ static int is_initializing(struct imsm_dev *dev)
 
        return 0;
 }
-#endif
 
 static void update_recovery_start(struct intel_super *super,
                                        struct imsm_dev *dev,
@@ -7247,9 +7485,7 @@ static void update_recovery_start(struct intel_super *super,
        rebuild->recovery_start = units * blocks_per_migr_unit(super, dev);
 }
 
-#ifndef MDASSEMBLE
 static int recover_backup_imsm(struct supertype *st, struct mdinfo *info);
-#endif
 
 static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
 {
@@ -7288,9 +7524,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                struct imsm_map *map2;
                struct mdinfo *this;
                int slot;
-#ifndef MDASSEMBLE
                int chunk;
-#endif
                char *ep;
 
                if (subarray &&
@@ -7319,7 +7553,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                super->current_vol = i;
                getinfo_super_imsm_volume(st, this, NULL);
                this->next = rest;
-#ifndef MDASSEMBLE
                chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
                /* mdadm does not support all metadata features- set the bit in all arrays state */
                if (!validate_geometry_imsm_orom(super,
@@ -7334,7 +7567,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                          (1<<MD_SB_BLOCK_CONTAINER_RESHAPE) |
                          (1<<MD_SB_BLOCK_VOLUME);
                }
-#endif
 
                /* if array has bad blocks, set suitable bit in all arrays state */
                if (sb_errors)
@@ -7372,7 +7604,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                         *
                         * FIXME handle dirty degraded
                         */
-                       if ((skip || recovery_start == 0) && !dev->vol.dirty)
+                       if ((skip || recovery_start == 0) &&
+                           !(dev->vol.dirty & RAIDVOL_DIRTY))
                                this->resync_start = MaxSector;
                        if (skip)
                                continue;
@@ -7407,9 +7640,12 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                                info_d->component_size =
                                                num_data_stripes(map) *
                                                map->blocks_per_strip;
+                               info_d->ppl_sector = this->ppl_sector;
+                               info_d->ppl_size = this->ppl_size;
                        } else {
                                info_d->component_size = blocks_per_member(map);
                        }
+                       info_d->consistency_policy = this->consistency_policy;
 
                        info_d->bb.supported = 1;
                        get_volume_badblocks(super->bbm_log, ord_to_idx(ord),
@@ -7421,11 +7657,9 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                update_recovery_start(super, dev, this);
                this->array.spare_disks += spare_disks;
 
-#ifndef MDASSEMBLE
                /* check for reshape */
                if (this->reshape_active == 1)
                        recover_backup_imsm(st, this);
-#endif
                rest = this;
        }
 
@@ -7555,7 +7789,6 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
        return failed;
 }
 
-#ifndef MDASSEMBLE
 static int imsm_open_new(struct supertype *c, struct active_array *a,
                         char *inst)
 {
@@ -7725,9 +7958,7 @@ static unsigned long long imsm_set_array_size(struct imsm_dev *dev,
                array_blocks = new_size;
        }
 
-       /* round array size down to closest MB
-        */
-       array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
+       array_blocks = round_size_to_mb(array_blocks, used_disks);
        dev->size_low = __cpu_to_le32((__u32)array_blocks);
        dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
 
@@ -7840,11 +8071,9 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
                                        array_blocks =
                                                blocks_per_member(map) *
                                                used_disks;
-                                       /* round array size down to closest MB
-                                        */
-                                       array_blocks = (array_blocks
-                                                       >> SECT_PER_MB_SHIFT)
-                                               << SECT_PER_MB_SHIFT;
+                                       array_blocks =
+                                               round_size_to_mb(array_blocks,
+                                                                used_disks);
                                        a->info.custom_array_size = array_blocks;
                                        /* encourage manager to update array
                                         * size
@@ -7925,12 +8154,16 @@ mark_checkpoint:
 
 skip_mark_checkpoint:
        /* mark dirty / clean */
-       if (dev->vol.dirty != !consistent) {
+       if (((dev->vol.dirty & RAIDVOL_DIRTY) && consistent) ||
+           (!(dev->vol.dirty & RAIDVOL_DIRTY) && !consistent)) {
                dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
-               if (consistent)
-                       dev->vol.dirty = 0;
-               else
-                       dev->vol.dirty = 1;
+               if (consistent) {
+                       dev->vol.dirty = RAIDVOL_CLEAN;
+               } else {
+                       dev->vol.dirty = RAIDVOL_DIRTY;
+                       if (dev->rwh_policy == RWH_DISTRIBUTED)
+                               dev->vol.dirty |= RAIDVOL_DSRECORD_VALID;
+               }
                super->updates_pending++;
        }
 
@@ -8442,6 +8675,11 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
                di->component_size = a->info.component_size;
                di->container_member = inst;
                di->bb.supported = 1;
+               if (dev->rwh_policy == RWH_DISTRIBUTED) {
+                       di->consistency_policy = CONSISTENCY_POLICY_PPL;
+                       di->ppl_sector = get_ppl_sector(super, inst);
+                       di->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
+               }
                super->random = random32();
                di->next = rv;
                rv = di;
@@ -9369,6 +9607,21 @@ static void imsm_process_update(struct supertype *st,
        }
        case update_prealloc_badblocks_mem:
                break;
+       case update_rwh_policy: {
+               struct imsm_update_rwh_policy *u = (void *)update->buf;
+               int target = u->dev_idx;
+               struct imsm_dev *dev = get_imsm_dev(super, target);
+               if (!dev) {
+                       dprintf("could not find subarray-%d\n", target);
+                       break;
+               }
+
+               if (dev->rwh_policy != u->new_policy) {
+                       dev->rwh_policy = u->new_policy;
+                       super->updates_pending++;
+               }
+               break;
+       }
        default:
                pr_err("error: unsuported process update type:(type: %d)\n",    type);
        }
@@ -9614,6 +9867,11 @@ static int imsm_prepare_update(struct supertype *st,
                super->extra_space += sizeof(struct bbm_log) -
                        get_imsm_bbm_log_size(super->bbm_log);
                break;
+       case update_rwh_policy: {
+               if (update->len < (int)sizeof(struct imsm_update_rwh_policy))
+                       return 0;
+               break;
+       }
        default:
                return 0;
        }
@@ -9702,7 +9960,6 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
                __free_imsm_disk(dl);
        }
 }
-#endif /* MDASSEMBLE */
 
 static void close_targets(int *targets, int new_disks)
 {
@@ -9905,7 +10162,7 @@ int validate_container_imsm(struct mdinfo *info)
 
        return 0;
 }
-#ifndef MDASSEMBLE
+
 /*******************************************************************************
 * Function:   imsm_record_badblock
 * Description: This routine stores new bad block record in BBM log
@@ -10718,7 +10975,7 @@ static int imsm_create_metadata_update_for_migration(
                        free(u);
                        sysfs_free(spares);
                        update_memory_size = 0;
-                       dprintf("error: cannot get spare device for requested migration");
+                       pr_err("cannot get spare device for requested migration\n");
                        return 0;
                }
                sysfs_free(spares);
@@ -10950,7 +11207,7 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
                                    geo->raid_disks + devNumChange,
                                    &chunk,
                                    geo->size, INVALID_SECTORS,
-                                   0, 0, 1))
+                                   0, 0, info.consistency_policy, 1))
                change = -1;
 
        if (check_devs) {
@@ -11551,10 +11808,7 @@ abort:
        return ret_val;
 }
 
-#endif /* MDASSEMBLE */
-
 struct superswitch super_imsm = {
-#ifndef        MDASSEMBLE
        .examine_super  = examine_super_imsm,
        .brief_examine_super = brief_examine_super_imsm,
        .brief_examine_subarrays = brief_examine_subarrays_imsm,
@@ -11577,7 +11831,6 @@ struct superswitch super_imsm = {
        .recover_backup = recover_backup_imsm,
        .copy_metadata = copy_metadata_imsm,
        .examine_badblocks = examine_badblocks_imsm,
-#endif
        .match_home     = match_home_imsm,
        .uuid_from_super= uuid_from_super_imsm,
        .getinfo_super  = getinfo_super_imsm,
@@ -11597,10 +11850,12 @@ struct superswitch super_imsm = {
        .container_content = container_content_imsm,
        .validate_container = validate_container_imsm,
 
+       .write_init_ppl = write_init_ppl_imsm,
+       .validate_ppl   = validate_ppl_imsm,
+
        .external       = 1,
        .name = "imsm",
 
-#ifndef MDASSEMBLE
 /* for mdmon */
        .open_new       = imsm_open_new,
        .set_array_state= imsm_set_array_state,
@@ -11612,5 +11867,4 @@ struct superswitch super_imsm = {
        .record_bad_block = imsm_record_badblock,
        .clear_bad_block  = imsm_clear_badblock,
        .get_bad_blocks   = imsm_get_badblocks,
-#endif /* MDASSEMBLE */
 };